I’m curious if there is a way to use Bound or Potential to fit a regression model where the response variable is censored. I’ll set up a regression problem, loosely based on this example: https://docs.pymc.io/notebooks/censored_data.html
import pymc3 as pm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import theano.tensor as tt
plt.style.use('seaborn-darkgrid')
np.random.seed(1618)
size = 500
x = np.random.normal(0, 3, size)
samples = np.random.normal(1 + 2*x, 1, size)
# Set censoring limits
high = 7.
low = -5.
# Censor samples
censored = samples[(samples > low) & (samples < high)]
# Visualize uncensored and censored data
_, axarr = plt.subplots(ncols=2, figsize=[16, 4], sharex=True, sharey=True)
for i, data in enumerate([samples, censored]):
sns.distplot(data, ax=axarr[i])
axarr[0].set_title('Uncensored')
axarr[1].set_title('Censored')
plt.show()
## Baseline with uncensored data:
# Uncensored model
with pm.Model() as uncensored_model:
a = pm.Normal('a', 0, 10)
b = pm.Normal('b', 0, 2)
mu = pm.Deterministic('mu', a + b*x)
sigma = pm.HalfNormal('sigma', sigma=3)
observed = pm.Normal('observed', mu=mu, sigma=sigma, observed=samples)
with uncensored_model:
trace = pm.sample(tune=1000) # Increase `tune` to avoid divergences
varnames = ['a', 'b', 'sigma']
pm.traceplot(trace, varnames);
### now how to do the same, but using censored data using Potential or Bound (or something else)?
Any ideas?