The docs state that passing the argument total_size
to the observed variables would fix posterior variance but it appears to have no effect at all.
PS: I’m using Metropolis as my sampler because it is the only one that works with this problem.
with pm.Model() as residual_model:
# Setup
batch_size = 10000
sample_size = len(df)
residuals = pm.Minibatch(df['RESIDUAL'].values, batch_size)
# Prior
bias_all = pm.Normal('bias_all', mu=0, sd=100)
sd_all = pm.HalfNormal('sd_all', sd=100)
bias_inlier = pm.Normal('bias_inlier', mu=0, sd=100)
beta = pm.Normal('beta', mu=0, sd=100)
beta_0 = pm.Normal('beta_0', mu=0, sd=100)
sd_inlier = pm.HalfNormal('sd_inlier', sd=100)
sd_outlier = pm.HalfNormal('sd_outlier', sd=100) + sd_inlier
# Likelihood Models
likelihood_all = pm.Normal('all', mu=bias_all, sd=sd_all, observed=residuals, total_size=sample_size)
outlier_prob = pmath.invlogit(beta*abs(residuals) + beta_0)
inlier_loglike = pm.Normal.dist(mu=bias_inlier, sd=sd_inlier).logp(residuals)
outlier_loglike = pm.Normal.dist(mu=bias_inlier, sd=sd_outlier).logp(residuals)
likelihood_inlier = pm.Potential(
'inlier',
((1-outlier_prob) * inlier_loglike).sum() + (outlier_prob * outlier_loglike).sum()
)
# Posterior
resid_trace = pm.sample(4000, step=pm.Metropolis(), cores=32)