I tried to run this as well and played around with the parameters.
Some remarks:
- you carried around some extra dimensions that might be unnecessary (see code below)
- it might be that your data generation (reg\cdot X) did not match the model (X\cdot reg), but could be that that is just transposed.
- also, there were some hard coded standard deviations, I tried to simplify to match simulation and model.
- in addition to @junpenglao’s comment on standard deviations, maybe the chain failure comes from the fact that your simulated (observed)
y
s are not really normally distributed (this gives the mentioned tail areas where sampling might be stuck).
Hope this helps!
Falk
import numpy as np
import theano as tt
import pymc3 as pm
import matplotlib.pyplot as plt
import sys
n = 2
num_regressors = n ** 2
num_data_samples = 1000
# hyperprior
d0_mean_mu = 0.2
d0_mean_sd = 1.
d1_mean_mu = -0.3
d1_mean_sd = 1.
# sample true regressor vector
true_bii_mean = d0_mean_mu#np.random.normal(d0_mean_mu, 0.1)
true_bij_mean = d1_mean_mu#np.random.normal(d1_mean_mu, 0.1)
true_bii = np.random.normal(true_bii_mean, d0_mean_sd, size=n)
true_bij = np.random.normal(true_bij_mean, d1_mean_sd, size=num_regressors - n)
# generate data
X = np.random.randint(2, size=[num_data_samples, num_regressors])
diag_idxs = np.diag_indices(n)
off_diag_idxs = np.where(~np.eye(n,dtype=bool))
regress = np.zeros((n, n))
regress[diag_idxs] = true_bii
regress[off_diag_idxs] = true_bij
# y = np.zeros((num_data_samples,))
y = X.dot(regress.reshape(-1,))
# for k in range(num_data_samples):
# y[k] = np.dot(regress.reshape(-1,), X[k, :])
plt.hist(y)
plt.show()
# update weights
with pm.Model() as hierarchical_model:
# hyperpriors for group nodes
mu_theta_s = pm.Normal('mu_theta_s', mu=d0_mean_mu, sd=1)
mu_phi_m = pm.Normal('mu_phi_m', mu=d1_mean_mu, sd=1)
# distributions for diagonal weights (b_ii) and off-diagonal weights (b_ij)
b_ii = pm.Normal('b_ii', mu=mu_theta_s, sd=d0_mean_sd, shape=(n))
b_ij = pm.Normal('b_ij', mu=mu_phi_m, sd=d1_mean_sd, shape=(num_regressors - n))
# model error
eps = pm.HalfNormal('eps', 1)
# reward estimate
reg = tt.tensor.zeros((n, n))
reg = tt.tensor.inc_subtensor(reg[diag_idxs], b_ii)
reg = tt.tensor.inc_subtensor(reg[off_diag_idxs], b_ij)
reward_est = pm.math.dot(X, tt.tensor.reshape(reg, (num_regressors, 1)))
# data likelihood
reward_like = pm.Normal('reward_like', mu=reward_est, sd=eps, observed=y)
with hierarchical_model:
trace = pm.sample()
pm.traceplot(trace)
plt.show()