The 95% interval of PyMC3 trace doesn't cove the real values

Thanks for your help. Here is an example of hierarchal model with toy data (attachment):
sample.csv (379.7 KB)

import numpy as np
import pandas as pd
import pymc3 as pm3
import matplotlib.pylab as plt

samples = pd.read_csv('sample.csv', sep=',')

# some records may have missing values. So, skip them during the modelling
masked = samples[~samples.gc.isnull()]
num_ind = len(masked.idx.unique())
idx = np.asarray(masked.idx.astype(int))

with pm3.Model() as hierarchical_model:
    mu_intercept = pm3.Normal('mu_intercept', mu=0, sd=30)
    mu_d = pm3.Normal('mu_d', mu=0, sd=30)
    sigma_intercept = pm3.Uniform('sigma_intercept', lower=0, upper=10)
    sigma_d = pm3.Uniform('sigma_d', lower=0, upper=500)
    sigma_gc = pm3.Uniform('sigma_gc', lower=0, upper=500)
    sigma_p = pm3.Uniform('sigma_p', lower=0, upper=500)
    intercept_prior = pm3.Normal('intercept_prior',
                                 mu=mu_intercept,
                                 sd=sigma_intercept,
                                 shape=num_ind)
    d_prior = pm3.Normal('d_prior',
                         mu=mu_d,
                         sd=sigma_d,
                         shape=num_ind)
    gc_prior = pm3.HalfNormal('gc_prior',
                              sd=sigma_gc,
                              shape=num_ind)

    p0_prior = pm3.Normal('p0_prior', mu=0, sd=sigma_p, shape=num_ind)
    p1_prior = pm3.Normal('p1_prior', mu=0, sd=sigma_p, shape=num_ind)
    p2_prior = pm3.Normal('p2_prior', mu=0, sd=sigma_p, shape=num_ind)
    p3_prior = pm3.Normal('p3_prior', mu=0, sd=sigma_p, shape=num_ind)
    p4_prior = pm3.Normal('p4_prior', mu=0, sd=sigma_p, shape=num_ind)

    y_est = (
        intercept_prior[idx] +
        d_prior[idx] * masked.d +
        gc_prior[idx] * masked.gc +
        p0_prior[idx] * masked.p0 +
        p1_prior[idx] * masked.p1 +
        p2_prior[idx] * masked.p2 +
        p3_prior[idx] * masked.p3 +
        p4_prior[idx] * masked.p4
    )

    y_est = pm3.Deterministic('y_est', y_est)

    sigma = pm3.HalfNormal('sigma', sd=100)
    epsilon = pm3.HalfNormal('epsilon', sd=sigma)
    pm3.Normal('y_pred',
               mu=y_est,
               sd=epsilon,
               observed=masked.y)

with hierarchical_model:
    start, sds, elbo = pm3.variational.advi(n=20000)
    step = pm3.Metropolis()
    hierarchical_trace = pm3.sample(300000, start=start, step=step)

varnames = ['mu_intercept', 'sigma_intercept', 'sigma_p', 'epsilon', 'sigma']
pm3.traceplot(hierarchical_trace[-1000:], varnames)
pm3.plot_posterior(hierarchical_trace[-1000:], varnames)
pm3.autocorrplot(hierarchical_trace[-1000:], varnames=varnames)

bd = hierarchical_trace[-1000:]['y_est']
bd_hpd = pm3.hpd(bd)
masked['pred'] = bd.mean(axis=0)
masked['prctile_95'] = bd_hpd[:, 1]
masked['prctile_5'] = bd_hpd[:,0]

for idx, sample in masked.groupby('idx'):
    plt.figure(figsize=(20,15))
    plt.plot(sample.d, sample.y, '*', markersize=30)
    plt.plot(sample.d, sample.pred, '-', markersize=20, color='g')
    plt.fill_between(sample.d, sample.prctile_5, sample.prctile_95, color='r', alpha=0.5);
    plt.title('Actual and Prediction for sample#{}'.format(idx), fontsize=30, fontweight='bold')
    plt.yticks(size=20)
    plt.xticks(size=20)
    plt.legend(fontsize=30)

NUTS sampling is too slow for this problem so I had to use Metropolis.