In my research, my generative model is encapsulated into a software code (i do n…ot have access to the source code). This code reads an input file and writes an output file. I have two choices, i can either run a design of experiments with this code, fit a surrogate model, and then encode this surrogate model inside of pymc3's model definition. Conversely, i would like to be able to make a system call to this software code from within pymc3.
In an effort to test if this mechanism is possible, i have attempted to re-work the simplest example from Pymc3 tutorial where the calculation of mu (as a liner regression fit) is conducted in an external function call.
The following code block is a modified version of the first example from pymc3 website. Instead of having mu = alpha + beta[0]*X1 + beta[1]*X2 inside of the model block, mu is the return value from a sub-routine. This sub-routine takes in the current sampled values for alpha and beta[i], builds an input file, makes a system call to dummy.py, and lastly it reads the output file created by dummy.py and returns the value of mu.
```python
from pymc3 import *
import numpy as np
import matplotlib.pyplot as plt
import theano
import json
from subprocess import call
# X1, X2, and Y were generated from the code on Pymc3 tutorial website. first example
X1 = [-1.0856306, 0.99734545, 0.2829785, -1.50629471, -0.57860025, 1.65143654, -2.42667924, -0.42891263, 1.26593626, -0.8667404, -0.67888615, -0.09470897, 1.49138963, -0.638902, -0.44398196, -0.43435128, 2.20593008, 2.18678609, 1.0040539, 0.3861864, 0.73736858, 1.49073203, -0.93583387, 1.17582904, -1.25388067, -0.6377515, 0.9071052, -1.4286807, -0.14006872, -0.8617549, -0.25561937, -2.79858911, -1.7715331, -0.69987723, 0.92746243, -0.17363568, 0.00284592, 0.68822271, -0.87953634, 0.28362732, -0.80536652, -1.72766949, -0.39089979, 0.57380586, 0.33858905, -0.01183049, 2.39236527, 0.41291216, 0.97873601, 2.23814334, -1.29408532, -1.03878821, 1.74371223, -0.79806274, 0.02968323, 1.06931597, 0.89070639, 1.75488618, 1.49564414, 1.06939267, -0.77270871, 0.79486267, 0.31427199, -1.32626546, 1.41729905, 0.80723653, 0.04549008, -0.23309206, -1.19830114, 0.19952407, 0.46843912, -0.83115498, 1.16220405, -1.09720305, -2.12310035, 1.03972709, -0.40336604, -0.12602959, -0.83751672, -1.60596276, 1.25523737, -0.68886898, 1.66095249, 0.80730819, -0.31475815, -1.0859024, -0.73246199, -1.21252313, 2.08711336, 0.16444123, 1.15020554, -1.26735205, 0.18103513, 1.17786194, -0.33501076, 1.03111446, -1.08456791, -1.36347154, 0.37940061, -0.37917643];
X2 = [0.12841094, -0.39557759, 0.14245293, 0.51966079, -0.0049252, 0.00682843, 0.0359099, -0.37239514, 0.08522933, -0.32108195, -0.08553592, 0.24857391, -0.14704339, 0.1002498, 0.20254781, 0.05574817, -0.27418969, -0.06649506, 0.39188227, -0.40500915, -0.0551572, -0.11042161, 0.02414947, 0.14964312, 0.32173819, -0.05404648, 0.16246827, 0.09994803, 0.09486946, -0.11278479, -0.19946429, -0.22000862, -0.15128744, 0.06433732, 0.15218988, 0.06469377, -0.10979102, 0.36119402, 0.30377312, -0.07080002, -0.16468628, 0.02604299, 0.25345973, 0.066553, 0.11130974, -0.04241602, 0.09125418, 0.30890889, -0.04793376, 0.02866155, 0.0507633, 0.05674507, -0.28237778, -0.37537373, -0.20393101, 0.03358846, 0.11077123, -0.10613491, 0.2754515, -0.02863519, 0.0040632, -0.03879277, 0.02680536, 0.14089481, 0.13313069, -0.17968459, 0.30473276, -0.21900529, 0.0158454, -0.05487931, -0.20979834, -0.01502412, -0.14816275, 0.01458145, 0.08061719, 0.29438587, 0.06147684, -0.12224507, -0.07832396, 0.02799562, 0.01869217, 0.29191785, 0.27907059, -0.07178719, -0.10972843, -0.51141092, -0.10978408, -0.19561154, -0.07096489, 0.07831685, 0.03543847, -0.0059936, 0.03991642, -0.02522355, 0.03940379, -0.646211, -0.0538587, -0.02217014, -0.06825234, -0.04358925];
Y = np.array([9.38706859e-01, 4.10296149e-01,3.83981292e+00, 1.48115418e+00, 4.02779506e-01, 2.46184530e+00, -1.42342679e+00, -1.27520755e+00, 2.38380704e+00, -3.90761758e-01, 6.86815665e-01, 2.10641559e+00, 1.84890360e+00, -8.04359754e-01, 3.93284941e-01, 2.31721220e+00, 3.41651416e+00, 3.39016804e+00, 2.22246532e+00, 3.77308673e-01, 3.43806883e-01, 1.66274112e+00, -1.20663529e-01, 2.18829692e+00, 1.50706675e+00, -1.19159361e+00, 1.44784359e+00, -1.55349860e+00, -1.40248284e-01, -1.96609652e-02, -1.35472064e+00, -1.59474188e+00, -1.39656749e+00, 5.29754386e-01, 2.63051387e+00, 5.53932221e-01, 1.76084808e+00, 2.39686504e+00, 1.47396672e+00, 9.07514885e-01, 7.37921664e-02, -3.82899347e-01, 1.49271947e+00, 7.65880501e-01, 2.05273917e+00, 5.63172455e-01, 4.25098874e+00, 3.26909416e-02, 3.93785393e-01, 3.67324277e+00, 1.69575050e+00, 9.38133214e-01, 1.35531685e+00, -2.42854948e+00, 1.26254192e+00, 2.07270390e+00, 2.75833869e+00, 2.60484762e+00, 3.21391580e+00, 4.95643013e+00, 2.31319324e-01, 1.53863552e+00, 1.25983672e+00, -5.57565140e-01, 3.74025866e+00, 1.00427073e+00, 2.44326467e+00, 5.03997740e-01, 1.06029822e+00, 1.48250538e+00, -2.69441500e-01, -1.19520306e+00, 3.20016631e+00, -6.69460228e-01, -2.24215995e+00, 2.10607318e+00, 2.01495136e+00, -8.51855254e-01, -8.99821832e-01, -1.20278122e+00, 1.05077792e+00, -1.43401688e-01, 1.84052098e+00, 1.16665281e+00, 5.60119574e-02, -2.04696786e+00, -1.66062003e+00, 5.51783963e-01, 1.58062230e+00, 1.63826706e+00, 1.16403511e+00, 3.85980814e-01, 2.23665854e+00, 1.23718947e+00, -1.16021703e+00, 1.11137427e+00, 1.65658589e+00, -3.20236525e-03, 1.36931418e+00, 1.33161104e+00]);
def proc_test(alpha, beta1, beta2, X1, X2):
#### write an input file
params = {};
params['X1'] = X1;
params['X2'] = X2;
# These are the lines of code that I need help with.
# As written below, objects of the Normal class are being stored into the dictonary.
# I need to store the current sampled value for each distribution.
# Do I need to pull these values from a Backend or Trace object?
params['alpha'] = alpha;
params['beta1'] = beta1;
params['beta2'] = beta2;
###
with open("input.txt", 'w') as outfile:
json.dump(params, outfile)
#### make a system call
call ("python dummy.py input.txt");
#### read the output file and return the value
mu = np.loadtxt("output.txt");
return(mu)
# the model
with Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Priors for unknown model parameters
alpha = Normal('alpha', mu=0, sd=10);
beta1 = Normal('beta1', mu=0, sd=10);
beta2 = Normal('beta2', mu=0, sd=10);
sigma = HalfNormal('sigma', sd=1);
# Expected value of outcome
mu = proc_test(alpha, beta1, beta2, X1, X2);
# Likelihood (sampling distribution) of observations
Y_obs = Normal('Y_obs', mu=mu, sd=sigma, observed=Y)
# Inference!
start = find_MAP(); # Find starting value by optimization
step = NUTS(scaling=start); # Instantiate MCMC sampling algorithm
trace = sample(2000, step, start=start); # draw 2000 posterior samples using NUTS sampling
#plot results
plt.figure(figsize=(12, 12))
summary(trace)
traceplot(trace)
```
The next code block is the file dummy.py. This code reads its input file, calculates mu = alpha + beta[0]*X1 + beta[1]*X2 and writes mu to an output file.
```python
# file: dummy.py
import sys
import numpy as np
import json
def main(argv):
# provide an input file from the command line
input_file = sys.argv[1];
# read in the input file
params = {};
with open(input_file) as json_data:
params = json.load(json_data)
X1 = np.asarray(params["X1"], dtype=float);
X2 = np.asarray(params["X2"], dtype=float);
# the generative model
mu = float(params["alpha"]) + float(params["beta1"])*X1 + float(params["beta2"])*X2
# write the results to a output file
np.savetxt("output.txt", mu);
return
if __name__ == "__main__":
main(sys.argv[1:])
```
My issue is that I don't know enough about the core data structures that pymc3 uses. So i don't know how to extract the current iteration's sampled values of alpha, and beta[i], so that these values can be written to an input file.
thanks much,
-ian