Censored linear regression, relatively good ppc, horrible loo pit

I ran into an issue where for a censored model, ppc looks quite good but loo pit looks quite bad (tilted integral transform as if there is a bias). After playing around a bit I was able to strip it down to a toy model where similar behaviour happens. It is basically a linear regression with the following differences:

1- It is censored, upper and lower (possibly variable to some extend and not constant for all data)
2- Each observation line has a different intercept but each have the same slope.

The model is below:

import pymc as pm
import numpy as np
import arviz as az
import matplotlib.pyplot as plt


def _broadcast_border(border, data):

  if isinstance(border, (float,int)):
    if data is None:
      return np.array([border])
    else:
      return np.ones(data.shape)*border
  elif border.ndim==1:
    border = border[:,None]

  return border


def get_model(x, noise_mu, noise_sd, a_mu, a_sd, b_mu, b_sd, nlines,
              lower, upper, observed=None, centre_data=False, rng=None):

  if rng is None:
    rng = np.random.default_rng(0)

  lower = _broadcast_border(lower, observed)
  upper = _broadcast_border(upper, observed)

  if centre_data and observed is not None:

    observed = observed.copy()
    lower = lower.copy()
    upper = upper.copy()
    x = x.copy()

    lower -= observed.mean(axis=-1)[:,None]
    upper -= observed.mean(axis=-1)[:,None]
    observed -= observed.mean(axis=-1)[:,None]
    x -= x.mean()

  with pm.Model() as model:

    x = pm.Data("x", x)

    noise = pm.InverseGamma("noise", mu=noise_mu, sigma=noise_sd)
    a = pm.Normal("a", a_mu, a_sd)
    b = pm.Normal("b", b_mu, b_sd, size=nlines)

    line_mean = (a*x)[None,:] + b[:,None]

    dist = pm.Normal.dist(line_mean, noise)
    pm.Censored("obs", dist, lower=lower, upper=upper,
                observed=observed)

  if observed is None:

    nsamples = 1000
    with model:
      idata=pm.sample_prior_predictive(nsamples, random_seed=rng)

    obs = idata["prior"]["obs"].to_numpy()[0, rng.integers(0, nsamples),:,:]

    return obs

  return model

So I generated and fitted my data using the following settings:

seed = 0
rng = np.random.default_rng(seed)
nlines = 100

lower = rng.integers(-1,3, size=nlines).astype(float)
upper = rng.integers(9,12, size=nlines).astype(float)

x = np.linspace(0, 10, num=10)

fake_data=\
  get_model(x, noise_mu=1, noise_sd=0.1, a_mu=-1, a_sd=0.1,
            b_mu = 10, b_sd=2, nlines=nlines, lower=lower, upper=upper)


#fit to data
model=\
get_model(x, noise_mu=1, noise_sd=0.5, a_mu=-1, a_sd=0.5,
          b_mu = 10, b_sd=5, nlines=nlines, lower=lower, upper=upper,
          observed=fake_data, centre_data=False)

with model:
  idata = pm.sample(draws=1000, tune=1000, chains=4, cores=4,
                    random_seed=rng)
  pm.sample_posterior_predictive(idata, extend_inferencedata=True,
                                 random_seed=rng)
  pm.compute_log_likelihood(idata, extend_inferencedata=True)

The ppc and loo pit I get from this is

ppc looks relatively good but loo-pit is horrible (parameter estimates correct). In this case there are couple bad pareto values.

Of course the sane thing to do might be centring the data with respect to intercepts first, but it does not help (that option is available in get_model):

It, I think, improves the ppc and also now there are no observables with bad pareto values yet still horrible loo pit (parameter estimates again correct). loo pit looks a bit like what you would get from a biased model (a tilted noisy line) although not completely.

lo(o) and behold if when fitting the data I set lower and upper as -infinity, +infinity (and also do centring), I get the following:

Bad ppc (missing the accumulation of data at the borders), much better looking loo-pit with only perhaps a bit of bias (and no bad pareto values again). As expected slope estimate is off.

So is there something inherently wrong in applying loo-pit to models with censoring? I roughly know how it works (weighted mean of probability integral transform of observable with respect to samples where weight are log p of each sample) but cant really see why loo pit should look so bad given the ppc. Perhaps I can try to look at integral transforms one by one and see if there is an outlier with high weight but I don’t know seems like that probably wouldn’t be the answer.

ps: I have tried a case where lower and upper are the same for all data (0 and 10 respectively), still has the same problem.

@OriolAbril

Kudos for this :joy:

1 Like

I have realized that one can also replicate the same behaviour with an all zero intercept model too. For the sake of simplicity I will drop it here:

import pymc as pm
import numpy as np
import arviz as az
import matplotlib.pyplot as plt


def _broadcast_border(border, data):

  if isinstance(border, (float,int)):
    if data is None:
      return np.array([border])
    else:
      return np.ones(data.shape)*border
  elif border.ndim==1:
    border = border[:,None]

  return border


def get_model(x, noise_mu, noise_sd, a_mu, a_sd, nlines,
              lower, upper, observed=None, centre_data=False, rng=None):

  if rng is None:
    rng = np.random.default_rng(0)

  lower = _broadcast_border(lower, observed)
  upper = _broadcast_border(upper, observed)

  if centre_data and observed is not None:

    observed = observed.copy()
    lower = lower.copy()
    upper = upper.copy()
    x = x.copy()

    lower -= observed.mean(axis=-1)[:,None]
    upper -= observed.mean(axis=-1)[:,None]
    observed -= observed.mean(axis=-1)[:,None]
    x -= x.mean()

  with pm.Model() as model:

    x = pm.Data("x", x)

    noise = pm.InverseGamma("noise", mu=noise_mu, sigma=noise_sd)
    a = pm.Normal("a", a_mu, a_sd)

    line_mean = (a*x)[None,:]

    dist = pm.Normal.dist(line_mean, noise)
    pm.Censored("obs", dist, lower=lower, upper=upper,
                observed=observed)

  if observed is None:

    nsamples = 1000
    with model:
      idata=pm.sample_prior_predictive(nsamples, random_seed=rng)

    obs = idata["prior"]["obs"].to_numpy()[0, rng.integers(0, nsamples),:,:]

    return obs

  return model


seed = 0
rng = np.random.default_rng(seed)
nlines = 100

lower = rng.integers(-10,-8, size=nlines).astype(float)
upper = rng.integers(-1,1, size=nlines).astype(float)

x = np.linspace(0, 10, num=10)

fake_data=\
  get_model(x, noise_mu=1, noise_sd=0.1, a_mu=-1, a_sd=0.1,
            nlines=nlines, lower=lower, upper=upper)

#fit to data
model=\
get_model(x, noise_mu=1, noise_sd=0.5, a_mu=0, a_sd=2,
          nlines=nlines, lower=lower, upper=upper,
          observed=fake_data, centre_data=True)

with model:
  idata = pm.sample(draws=1000, tune=1000, chains=4, cores=4,
                    random_seed=rng)
  pm.sample_posterior_predictive(idata, extend_inferencedata=True,
                                 random_seed=rng)
  pm.compute_log_likelihood(idata, extend_inferencedata=True)


fig,ax = plt.subplots(1, 2, figsize=(10, 5))
az.plot_ppc(idata, ax=ax[0], num_pp_samples=1000)
az.plot_loo_pit(idata, ax=ax[1], y="obs")

In this case ppc and loo pit looks like:

But when I set for instance just upper = np.inf (lower is still variable) then it is fixed!

or when lower = -np.inf and upper=np.inf

That is interesting. I’ll take a good look whenever I have some available time.

I suspect the bad loo pit with the excess density around 1 might be due to the interaction between censoring and the pit transformation. I don’t remember right now which one it is but it uses \leq or \geq, so one of the bounds would behave like an uncensored bound (with the probability at the bound going to 0 or 1) whereas the other won’t, it will have a discrete probability exactly at the bound.

The good loo pit has me a bit more puzzled. There is no intuition “speaking” to me (yet?) so I’ll have to carefully check what is going on.

3 Likes

@OriolAbril Thanks for the reply. I assume you are talking about the first line here:

def _loo_pit(y, y_hat, log_weights):
    """Compute LOO-PIT values."""
    sel = y_hat <= y
    if np.sum(sel) > 0:
        value = np.exp(_logsumexp(log_weights[sel]))
        return min(1, value)
    else:
        return 0

where y_hat are the samples and y the observed value. Indeed looking at the the list of loo pit values shows one sees an excess of values equal to 1. These seem to result from cases where sel = y_hat <= y being satisfied by all data points for a given observation and corresponding samples because y_hat == y is satisfied for almost all of them. So these are samples which are (almost) completely (right I guess?) censored. On the other hand if you change this to sel = y_hat < y then very few data points satisfy this and you get an excess of near-zero values. Would it fair to try to exclude observations which are censored from such analysis or is there are a better way to improve the loo-pit analysis to account for such values?

@OriolAbril I think the issue with loo pit failing for censored data is indeed a problem with the borders. For instance for y=upper and \hat{y} mostly = upper, we have a situation where loo pit is very high (close to 1) because \hat{y} \leq y despite the fact that most of \hat{y} s could still potentially be higher than y. I think same loss of precision also happens in the lower border since for censored y, we really don’t know what its true value is (although to some extend \hat{y} \leq y is less biased in this case).

I think the most sensible option for now is to create a new Inference object where censored observations and their respective samples are removed. I tested removing both upper and lower and each individually. It only seems to get fully fixed when both are removed however the biggest bias seems to come from the upper boundary as would be expected.

nothing removed:
nothing_removed

both removed:
all_removed

upper censored removed:
upper_removed

lower censored removed:
lower_removed

Note that I am not doing as above where I replace lower and upper during fitting. I am using the correct lower and upper for fitting but only removing the censored values for loo pit. I don’t know how to do it in the case when the observation dimension is multidimensional so I had to do a version where it is flattened:


import pymc as pm
import numpy as np
import matplotlib.pyplot as plt
import pytensor.tensor as pt
import arviz as az

from arviz.stats import loo_pit
from arviz import InferenceData
from xarray import DataArray, Dataset

def get_model(x, noise_mu, noise_sd, a_mu, a_sd, b_mu, b_sd, nlines,
              lower, upper, observed=None, centre_data=False, rng=None):

  if rng is None:
    rng = np.random.default_rng(0)


  if centre_data and observed is not None:

    observed = observed.copy()
    lower = lower.copy()
    upper = upper.copy()
    x = x.copy()

    s0 = observed.shape
    s1 = s0/nlines

    assert s1 == int(s1)
    s1 = int(s1)

    observed = np.reshape(observed, (nlines, s1))
    lower = np.reshape(lower, (nlines, s1))
    upper = np.reshape(upper, (nlines, s1))


    lower -= observed.mean(axis=-1)[:,None]
    upper -= observed.mean(axis=-1)[:,None]
    observed -= observed.mean(axis=-1)[:,None]
    x -= x.mean()


    lower = lower.flatten()
    upper = upper.flaten()
    observed = observed.flatten()


  with pm.Model() as model:

    x = pm.Data("x", x)

    noise = pm.InverseGamma("noise", mu=noise_mu, sigma=noise_sd)
    a = pm.Normal("a", a_mu, a_sd)
    b = pm.Normal("b", b_mu, b_sd, size=nlines)

    line_mean = (a*x)[None,:] + b[:,None]
    line_mean = pt.flatten(line_mean)

    dist = pm.Normal.dist(line_mean, noise)
    pm.Censored("obs", dist, lower=lower, upper=upper,
                observed=observed)

  if observed is None:

    nsamples = 1000
    with model:
      idata=pm.sample_prior_predictive(nsamples, random_seed=rng)

    obs = idata["prior"]["obs"].to_numpy()[0, rng.integers(0, nsamples),:]

    return obs

  return model


seed = 0
rng = np.random.default_rng(seed)
nlines = 100

lower = rng.integers(-1,3, size=nlines).astype(float)
upper = rng.integers(9,12, size=nlines).astype(float)

upper = np.tile(upper[:,None], (1, 10)).flatten().squeeze()
lower = np.tile(lower[:,None], (1, 10)).flatten().squeeze()
x = np.linspace(0, 10, num=10)

fake_data=\
  get_model(x, noise_mu=1, noise_sd=0.1, a_mu=-1, a_sd=0.1,
            b_mu = 10, b_sd=2, nlines=nlines, lower=lower, upper=upper)


#fit to data
model=\
get_model(x, noise_mu=1, noise_sd=0.5, a_mu=-1, a_sd=0.5,
          b_mu = 10, b_sd=5, nlines=nlines, lower=lower, upper=upper,
          observed=fake_data, centre_data=False)

with model:
  idata = pm.sample(draws=1000, tune=1000, chains=4, cores=4,
                    random_seed=rng)
  pm.sample_posterior_predictive(idata, extend_inferencedata=True,
                                 random_seed=rng)
  pm.compute_log_likelihood(idata, extend_inferencedata=True)



pp_samples = idata.posterior_predictive.obs.copy()
obs_samples = idata.observed_data.obs.copy()
log_likelihood = idata.log_likelihood["obs"].copy()
pos = idata.posterior

I = np.argwhere((fake_data != lower) & (fake_data != upper)).flatten()

pp_subset = Dataset({"obs":DataArray(pp_samples.isel({"obs_dim_2":I}))})
obs_subset = Dataset({"obs":DataArray(obs_samples.isel({"obs_dim_0":I}))})
log_subset = Dataset({"obs":DataArray(log_likelihood.isel({"obs_dim_0":I}))})

idata_subset = InferenceData()
idata_subset.add_groups({"posterior_predictive":pp_subset})
idata_subset.add_groups({"observed_data":obs_subset})
idata_subset.add_groups({"log_likelihood":log_subset})
idata_subset.add_groups({"posterior":idata.posterior.copy()})
ax = az.plot_loo_pit(idata_subset, y="obs")

For eye balling whether or not amount of censoring is reasonable, I guess the best option is look at the ends of the pcc to see there is a similar accumulation for samples at the borders as in for the observables (less easier to understand when there are multiple lower or upper boundaries though…)

2 Likes