Sure, I’d be happy to do that. It’s nearly identical, but I replace ADVI with NUTS. I also did not change the prior for the standard deviation like mentioned earlier here.
import pickle
import matplotlib.pyplot as plt
import pymc3 as pm
import theano
import numpy as np
X_train = np.linspace(0, 1, 500)
X_train = np.expand_dims(X_train, axis=1)
print(X_train.shape)
Y_train = X_train*2
Y_train = Y_train.flatten()
print(Y_train.shape)
plt.scatter(X_train, Y_train)
def contruct_bnn(x_input, y_input):
#model structure
layer_in = 1
layer_nodes = [4, 1]
with pm.Model() as bnn:
x_data = pm.Data("x_data", x_input)
y_data = pm.Data("y_data", y_input)
#weights and bias prior
w_1 = pm.Normal("w_1", 0, sigma=1, shape=(layer_in, layer_nodes[0]))
b_1 = pm.Normal("b_1", 0, sigma=3, shape=1)
w_2 = pm.Normal("w_2", 0, sigma=1, shape=(layer_nodes[0], layer_nodes[1]))
b_2 = pm.Normal("b_2", 0, sigma=3, shape=1)
w_out = pm.Normal("w_out", 0, sigma=1, shape=(layer_nodes[1], ))
b_out = pm.Normal("b_out", 0, sigma=3, shape=1)
#activations and flow
act_1 = pm.Deterministic('act_1', pm.math.tanh(pm.math.dot(x_data, w_1) + b_1))
act_2 = pm.Deterministic('act_2', pm.math.tanh(pm.math.dot(act_1, w_2) + b_2))
act_out = pm.Deterministic('act_out', pm.math.dot(act_2, w_out) + b_out)
sigma = pm.HalfNormal('sigma', sigma=0.1)
output = pm.Normal('output', act_out, sigma=1, observed=y_data, total_size=y_input.shape[0])
return bnn
bnn = contruct_bnn(X_train, Y_train)
num_samples = 5000
burn_out = 1000
with bnn:
trace = pm.sample(num_samples, tune=burn_out)
samples = pm.sample_posterior_predictive(trace, model=bnn)
plt.scatter(X_train, Y_train, alpha=0.5)
plt.scatter(X_train, samples['output'].mean(0), alpha=0.5)