Thanks for your reply. I have experimented with simple linear models and you are right that everything seems to work. I have attached a slightly more complicated example that is causing me some issues. Despite setting the random seed at the beginning, the final histogram of the trace of a single parameter is different each time I run the code. ADVI produces identical average ELBO each time I run, but the traces for the two models sometimes agree and sometimes catastrophically disagree. Any ideas what’s going on?
import numpy as np
np.random.seed(52)
import pymc3 as pm
import cPickle as pickle
import matplotlib.pyplot as plt
import theano
floatX = theano.config.floatX
ndata, ninputs, noutputs = 100, 4, 2
X = np.random.uniform(size=(ndata, ninputs))
Y = np.array([np.sum(X, axis=1), np.sum(X, axis=1)]).T
Yerr = 0.2
Y += np.random.normal(size=(ndata, noutputs)) * Yerr
Yerr = np.ones_like(Y) * Yerr
pkl_fl = 'test_simple.pkl'
neuronsPerHiddenlayer = 12
def construct_neural_network(inputs, targets, errtargets,
neuronsPerHiddenlayer, ninputs, noutputs, ndata):
np.random.seed(42)
init_1 = np.random.randn(ninputs, neuronsPerHiddenlayer).astype(floatX)
init_2 = np.random.randn(neuronsPerHiddenlayer, neuronsPerHiddenlayer).astype(floatX)
init_out = np.random.randn(neuronsPerHiddenlayer, noutputs).astype(floatX)
with pm.Model() as neural_network:
weights_in_1 = pm.Normal('w_in_1', 0, sd=1.,
shape=(ninputs, neuronsPerHiddenlayer),
testval=init_1)
weights_1_2 = pm.Normal('w_1_2', 0, sd=1.,
shape=(neuronsPerHiddenlayer, neuronsPerHiddenlayer),
testval=init_2)
weights_2_out = pm.Normal('w_2_out', 0, sd=1.,
shape=(neuronsPerHiddenlayer, noutputs),
testval=init_out)
b_1_out = pm.Normal('b_1_out', 0, 1,
shape=(noutputs))
act_1 = pm.math.sigmoid(pm.math.dot(inputs, weights_in_1))
act_out = pm.math.dot(pm.math.dot(act_1,weights_1_2), weights_2_out) + b_1_out
yTrain = pm.Normal('yTrain',
mu=act_out,
sd=errtargets,
observed=targets,
total_size=(ndata, noutputs))
return neural_network
model = construct_neural_network(
X, Y, Yerr, neuronsPerHiddenlayer, ninputs, noutputs, ndata)
with model:
inference = pm.ADVI()
inference.fit(150000)
saveparam = [(param.name, param.eval())
for param in inference.approx.params]
with open(pkl_fl, 'wb') as pkl:
pickle.dump(saveparam, pkl)
t = inference.approx.sample(100)
model2 = construct_neural_network(
X, Y, Yerr, neuronsPerHiddenlayer, ninputs, noutputs, ndata)
with model2:
inference = pm.ADVI()
with open(pkl_fl, 'rb') as pkl:
saveparam = pickle.load(pkl)
for i, param in enumerate(saveparam):
inference.approx.params[i].set_value(param[1])
t2 = inference.approx.sample(100)
plt.hist(t2['w_2_out'][:, 0, 0])
plt.hist(t['w_2_out'][:, 0, 0])
plt.show(block=True)