I have realized that one can also replicate the same behaviour with an all zero intercept model too. For the sake of simplicity I will drop it here:
import pymc as pm
import numpy as np
import arviz as az
import matplotlib.pyplot as plt
def _broadcast_border(border, data):
if isinstance(border, (float,int)):
if data is None:
return np.array([border])
else:
return np.ones(data.shape)*border
elif border.ndim==1:
border = border[:,None]
return border
def get_model(x, noise_mu, noise_sd, a_mu, a_sd, nlines,
lower, upper, observed=None, centre_data=False, rng=None):
if rng is None:
rng = np.random.default_rng(0)
lower = _broadcast_border(lower, observed)
upper = _broadcast_border(upper, observed)
if centre_data and observed is not None:
observed = observed.copy()
lower = lower.copy()
upper = upper.copy()
x = x.copy()
lower -= observed.mean(axis=-1)[:,None]
upper -= observed.mean(axis=-1)[:,None]
observed -= observed.mean(axis=-1)[:,None]
x -= x.mean()
with pm.Model() as model:
x = pm.Data("x", x)
noise = pm.InverseGamma("noise", mu=noise_mu, sigma=noise_sd)
a = pm.Normal("a", a_mu, a_sd)
line_mean = (a*x)[None,:]
dist = pm.Normal.dist(line_mean, noise)
pm.Censored("obs", dist, lower=lower, upper=upper,
observed=observed)
if observed is None:
nsamples = 1000
with model:
idata=pm.sample_prior_predictive(nsamples, random_seed=rng)
obs = idata["prior"]["obs"].to_numpy()[0, rng.integers(0, nsamples),:,:]
return obs
return model
seed = 0
rng = np.random.default_rng(seed)
nlines = 100
lower = rng.integers(-10,-8, size=nlines).astype(float)
upper = rng.integers(-1,1, size=nlines).astype(float)
x = np.linspace(0, 10, num=10)
fake_data=\
get_model(x, noise_mu=1, noise_sd=0.1, a_mu=-1, a_sd=0.1,
nlines=nlines, lower=lower, upper=upper)
#fit to data
model=\
get_model(x, noise_mu=1, noise_sd=0.5, a_mu=0, a_sd=2,
nlines=nlines, lower=lower, upper=upper,
observed=fake_data, centre_data=True)
with model:
idata = pm.sample(draws=1000, tune=1000, chains=4, cores=4,
random_seed=rng)
pm.sample_posterior_predictive(idata, extend_inferencedata=True,
random_seed=rng)
pm.compute_log_likelihood(idata, extend_inferencedata=True)
fig,ax = plt.subplots(1, 2, figsize=(10, 5))
az.plot_ppc(idata, ax=ax[0], num_pp_samples=1000)
az.plot_loo_pit(idata, ax=ax[1], y="obs")
In this case ppc and loo pit looks like:
But when I set for instance just upper = np.inf (lower is still variable) then it is fixed!
or when lower = -np.inf and upper=np.inf


