Thanks for your help. Here is an example of hierarchal model with toy data (attachment):
sample.csv (379.7 KB)
import numpy as np
import pandas as pd
import pymc3 as pm3
import matplotlib.pylab as plt
samples = pd.read_csv('sample.csv', sep=',')
# some records may have missing values. So, skip them during the modelling
masked = samples[~samples.gc.isnull()]
num_ind = len(masked.idx.unique())
idx = np.asarray(masked.idx.astype(int))
with pm3.Model() as hierarchical_model:
mu_intercept = pm3.Normal('mu_intercept', mu=0, sd=30)
mu_d = pm3.Normal('mu_d', mu=0, sd=30)
sigma_intercept = pm3.Uniform('sigma_intercept', lower=0, upper=10)
sigma_d = pm3.Uniform('sigma_d', lower=0, upper=500)
sigma_gc = pm3.Uniform('sigma_gc', lower=0, upper=500)
sigma_p = pm3.Uniform('sigma_p', lower=0, upper=500)
intercept_prior = pm3.Normal('intercept_prior',
mu=mu_intercept,
sd=sigma_intercept,
shape=num_ind)
d_prior = pm3.Normal('d_prior',
mu=mu_d,
sd=sigma_d,
shape=num_ind)
gc_prior = pm3.HalfNormal('gc_prior',
sd=sigma_gc,
shape=num_ind)
p0_prior = pm3.Normal('p0_prior', mu=0, sd=sigma_p, shape=num_ind)
p1_prior = pm3.Normal('p1_prior', mu=0, sd=sigma_p, shape=num_ind)
p2_prior = pm3.Normal('p2_prior', mu=0, sd=sigma_p, shape=num_ind)
p3_prior = pm3.Normal('p3_prior', mu=0, sd=sigma_p, shape=num_ind)
p4_prior = pm3.Normal('p4_prior', mu=0, sd=sigma_p, shape=num_ind)
y_est = (
intercept_prior[idx] +
d_prior[idx] * masked.d +
gc_prior[idx] * masked.gc +
p0_prior[idx] * masked.p0 +
p1_prior[idx] * masked.p1 +
p2_prior[idx] * masked.p2 +
p3_prior[idx] * masked.p3 +
p4_prior[idx] * masked.p4
)
y_est = pm3.Deterministic('y_est', y_est)
sigma = pm3.HalfNormal('sigma', sd=100)
epsilon = pm3.HalfNormal('epsilon', sd=sigma)
pm3.Normal('y_pred',
mu=y_est,
sd=epsilon,
observed=masked.y)
with hierarchical_model:
start, sds, elbo = pm3.variational.advi(n=20000)
step = pm3.Metropolis()
hierarchical_trace = pm3.sample(300000, start=start, step=step)
varnames = ['mu_intercept', 'sigma_intercept', 'sigma_p', 'epsilon', 'sigma']
pm3.traceplot(hierarchical_trace[-1000:], varnames)
pm3.plot_posterior(hierarchical_trace[-1000:], varnames)
pm3.autocorrplot(hierarchical_trace[-1000:], varnames=varnames)
bd = hierarchical_trace[-1000:]['y_est']
bd_hpd = pm3.hpd(bd)
masked['pred'] = bd.mean(axis=0)
masked['prctile_95'] = bd_hpd[:, 1]
masked['prctile_5'] = bd_hpd[:,0]
for idx, sample in masked.groupby('idx'):
plt.figure(figsize=(20,15))
plt.plot(sample.d, sample.y, '*', markersize=30)
plt.plot(sample.d, sample.pred, '-', markersize=20, color='g')
plt.fill_between(sample.d, sample.prctile_5, sample.prctile_95, color='r', alpha=0.5);
plt.title('Actual and Prediction for sample#{}'.format(idx), fontsize=30, fontweight='bold')
plt.yticks(size=20)
plt.xticks(size=20)
plt.legend(fontsize=30)
NUTS sampling is too slow for this problem so I had to use Metropolis.