I have an issue where a noncentered hierarchical model suffers from lots of divergences while the centered version works just fine. This is counter to my expectations so I suspect I’m probably implementing the noncentered variables incorrectly. I create a dataset:
np.random.seed(123)
idist = scipy.stats.norm(loc=10, scale=2)
sdist = scipy.stats.norm(loc=10, scale=0.5)
noise = scipy.stats.norm(loc=0, scale=0.1)
nsamples = 4
intercepts = idist.rvs(nsamples)
slopes = sdist.rvs(nsamples)
point_min = 2
point_range = 10
npoints = [int(point_range * np.random.rand()) + point_min for _ in range(nsamples)]
density_min = 0.90
density_range = 0.20
densities = [np.sort(density_range * np.random.rand(n) + density_min) for n in npoints]
density_mean = np.mean(np.hstack(densities))
density_std = np.std(np.hstack(densities))
normdensities = [(d - density_mean)/density_std for d in densities]
moduli = [s * (d - 0.5 * density_range + density_min) + i + noise.rvs(d.size) for d, s, i in zip(densities, slopes, intercepts)]
moduli_mean = np.mean(np.hstack(moduli))
moduli_std = np.std(np.hstack(moduli))
normmoduli = [(m - moduli_mean)/moduli_std for m in moduli]
So I can specify my centered model as
with pymc3.Model() as noncentered_model:
m = pymc3.Normal('m', mu=0, sd=1)
b = pymc3.Normal('b', mu=0, sd=1)
zb = pymc3.Normal('zb', mu=0, sd=1, shape=(nsamples,))
bi = pymc3.Deterministic('bi', b + zb)
noise = pymc3.HalfNormal('noise', sd = 1)
_ynorm = [None for _ in range(nsamples)]
_y = [None for _ in range(nsamples)]
_l = [None for _ in range(nsamples)]
for i in range(nsamples):
with noncentered_model:
_ynorm[i] = pymc3.Deterministic(f'ynorm_{i+1}', m * normdensities[i]+ bi[i])
_y[i] = pymc3.Deterministic(f'y_{i+1}', _ynorm[i] * moduli_std + moduli_mean)
_l[i] = pymc3.Normal(f'l{i+1}', mu = _ynorm[i], sd=noise, observed = normmoduli[i])
This approach fails with MANY divergences. Specifying bi directly (bi = pymc3.Normal(‘bi’, mu=b, sd=1, shape=(nsamples,))) works great.
Does anyone see where I’m going wrong?