Pm.sample_ppc breaks with AttributeError

I have the following code:

#std_range = df.std_range.values[:, np.newaxis]
#shared(std_range, broadcastable=(False, True))
#x1_shift = ppc['obs'][:K]
x1_shift = shared(df['x1'].values[:, np.newaxis], broadcastable=(False, True))
x2obs = df['x2'].values[:, np.newaxis]
#Model for x2
with pm.Model() as x2model:
    alpha = pm.Gamma('alpha', 1, 1)
    beta = pm.Beta('beta', 1, alpha, shape=K)
    w = pm.Deterministic('w', stick_breaking(beta))
    #a = pm.Uniform('a', 0, 50, shape=K)
    #b = pm.Uniform('b', 0, 50, shape=K)
    eta = pm.Uniform('eta', 0, 20, shape=K)
    delta = pm.Uniform('delta', 0, 10., shape=K)
    mu = pm.Deterministic('mu', eta + delta * x1_shift)
    bp = [pm.Bound(pm.Poisson, lower=1, upper=70).dist(mu=mu[i]) for i in range(K)] 
    #x = pm.Mixture('obs', w, pm.Poisson.dist(mu), observed=df['x1'].values)
    x2 = pm.Mixture('obsx2', w, bp, observed=x2obs)
    
with x2model:
    x2trace = pm.sample(1000, step=pm.Metropolis(), n_init=1000, random_seed=SEED)

And now sample_ppc breaks:

with x2model:
    x2ppc = pm.sample_ppc(x2trace, 1000)
    
fig, ax = plt.subplots(figsize=(8,6))
ax.hist(x2ppc['obsx2'], bins=50, normed=True, alpha=0.3, color='green', label='Posterior predictions')
ax.hist(df['x2'], bins=50, normed=True, alpha=0.3, label='Training data')
ax.legend(loc=1)

The error:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/mixture.py in _comp_samples(self, point, size, repeat)
    101         try:
--> 102             samples = self.comp_dists.random(point=point, size=size, repeat=repeat)
    103         except AttributeError:

AttributeError: 'list' object has no attribute 'random'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-38-802c9cc44cd5> in <module>()
      1 with x2model:
----> 2     x2ppc = pm.sample_ppc(x2trace, 1000)
      3 
      4 fig, ax = plt.subplots(figsize=(8,6))
      5 ax.hist(x2ppc['obsx2'], bins=50, normed=True, alpha=0.3, color='green', label='Posterior predictions')

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/sampling.py in sample_ppc(trace, samples, model, vars, size, random_seed, progressbar)
    389         for var in vars:
    390             ppc[var.name].append(var.distribution.random(point=param,
--> 391                                                          size=size))
    392 
    393     return {k: np.asarray(v) for k, v in ppc.items()}

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/mixture.py in random(self, point, size, repeat)
    131                                      dist_shape=self.shape,
    132                                      size=size).squeeze()
--> 133         comp_samples = self._comp_samples(point=point, size=size, repeat=repeat)
    134 
    135         if comp_samples.ndim > 1:

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/mixture.py in _comp_samples(self, point, size, repeat)
    103         except AttributeError:
    104             samples = np.column_stack([comp_dist.random(point=point, size=size, repeat=repeat)
--> 105                                        for comp_dist in self.comp_dists])
    106 
    107         return np.squeeze(samples)

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/mixture.py in <listcomp>(.0)
    103         except AttributeError:
    104             samples = np.column_stack([comp_dist.random(point=point, size=size, repeat=repeat)
--> 105                                        for comp_dist in self.comp_dists])
    106 
    107         return np.squeeze(samples)

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/continuous.py in random(self, point, size, repeat)
   1183         return generate_samples(self._random, lower, upper, point,
   1184                                 dist_shape=self.shape,
-> 1185                                 size=size)
   1186 
   1187     def logp(self, value):

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/distribution.py in generate_samples(generator, *args, **kwargs)
    336             samples = generator(size=size, *args, **kwargs)
    337         else:
--> 338             samples = generator(size=1, *args, **kwargs)
    339     else:
    340         if size is not None:

/Users/vmullachery/anaconda/envs/dl3.6/lib/python3.6/site-packages/pymc3/distributions/continuous.py in _random(self, lower, upper, point, size)
   1171             sample = self.dist.random(point=point, size=n)
   1172             select = sample[np.logical_and(sample > lower, sample <= upper)]
-> 1173             samples[i:(i + len(select))] = select[:]
   1174             i += len(select)
   1175             n -= len(select)

ValueError: could not broadcast input array from shape (12) into shape (1)

I dont have your data to reproduce the error, but the following seems to work for me:

K = 3
mu0 = [2, 25, 80]
p = [.1,.7,.2]
nsamp = 100
tmp = np.asarray([np.random.poisson(mu_, size=(100,)) for mu_ in mu0]).T
obs = tmp[range(nsamp), np.random.choice(K, nsamp, p)]

obs[obs<1] = 1
obs[obs>70] = 70

boundPoisson = pm.Bound(pm.Poisson, lower=1, upper=70)
with pm.Model() as m:
    w = pm.Dirichlet('w', np.ones(K))
    mu = pm.HalfNormal('mu', shape=K)
    bp = [boundPoisson.dist(mu=mu[i]) for i in range(K)] 
    x2 = pm.Mixture('obsx2', w, bp, observed=obs)
    trace = pm.sample()
    x2ppc = pm.sample_ppc(trace, 1000)

Hi @junpenglao
Thanks for the response. I have sent you the data and jupyter notebook separately

Got it, I will have a look.

OK, having a look at your data, the problem is:
mu = pm.Deterministic('mu', eta + delta * x1_shift)
resulting in a 2D tensor which does not play well in the mixture class. In this case I would suggest writing a custom mixture logp function for PoissonMixture. You can have a look in our doc the example for NormalMixture custom logp implementation.

ok thank you @junpenglao
So the trick will be to write:
1- a logp function for the PoissonMixture
2- then combine them in a DensityDist (or it’s discrete equivalent)?

If you are going to reuse it later on, wrapping the logp and the random method into a new distribution might worth the effort. Otherwise I would just write the logp function, and do ppc via selecting points from the trace and generate random samples.

Also some tips:

  • Mixture model is difficult to sample, check out related post here on discourse and also my gist for more information on how to model it properly
  • try NUTS when possible, looking at the trace from the notebook, i dont think Metropolis converge
  • n_init is probably not doing what you intend to do. You should control the number of tunning samples using tune parameter
  • in your example, the boundPoisson is not really doing anything, as the observed is bounded already between 1 and 70.

So what I would do is something like:

from pymc3.math import logsumexp
with pm.Model() as x2model:
    alpha = pm.Gamma('alpha', 1, 1)
    beta = pm.Beta('beta', 1, alpha, shape=K)
    w = pm.Deterministic('w', stick_breaking(beta))
    eta = pm.Uniform('eta', 0, 20, shape=K)
    delta = pm.Uniform('delta', 0, 10., shape=K)
    mu = pm.Deterministic('mu', eta + delta * x1_shift)
    # use a potential to add the mixture logp to the model
    pm.Potential('logp', logsumexp(tt.log(w) +
                                   pm.Poisson.dist(mu).logp(x2obs), axis=-1).sum())

After you do the inference, posterior prediction check:

from scipy.stats import poisson
wpost = x2trace['w']
mupost = x2trace['mu']
ndraws = wpost.shape[0]
nppc = 100
ppc = np.zeros((nppc, len(x2obs)))
for n in range(nppc):
    idx = np.random.randint(ndraws)
    pchoice = np.random.choice(K, len(x2obs), p=wpost[idx, :])
    ppc[n, : ] = poisson.rvs(mupost[10, :, :])[range(len(x2obs)), pchoice]

Thank you @junpenglao.
That was very helpful. However not having bounds actually makes the ppc samples poor

fig, ax = plt.subplots(figsize=(8,6))
ax.hist(x2ppc[80], bins=50, normed=True, alpha=0.3, color='green', label='Posterior predictions')
ax.hist(df['x2'].values, bins=50, normed=True, alpha=0.3, label='Training data')
ax.legend(loc=1)

As an example,

mupost[0].max(), x2ppc[0].max(), x2ppc[10].max(), df['x2'].values.max(), 

yields:

(454.5148963473755, 108.0, 97.0, 56)

I tried Bounding mu as below, but that does not work:

mu = pm.Bound(pm.Deterministic, lower=1, upper=70)('mu', eta + delta * x1_shift)
AttributeError: 'function' object has no attribute 'dist'

You can not bound a deterministic. It would be easier to bound the ppc in the random generation:

ppc[ppc<1] = 1
ppc[ppc>70] = 70

For completeness, if you would like to bound the likelihood:

from pymc3.distributions.dist_math import bound
lower, upper = 1, 70
with pm.Model() as x2model:
    ...
    pm.Potential('logp', bound(
                               logsumexp(tt.log(w) +
                                         pm.Poisson.dist(mu).logp(x2obs), axis=-1).sum(), 
                               value >= lower, value <= upper)

ok @junpenglao

I tried that however, it throws the distribution off:

# with x2model:
#     x2ppc = pm.sample_ppc(x2trace, 1000)
# A direct sample_ppc is not possible, so we have this contraption from
# @junpenglao at pymc_devs

from scipy.stats import poisson
wpost = x2trace['w']
mupost = x2trace['mu']
ndraws = wpost.shape[0]
nppc = 100
x2ppc = np.zeros((nppc, len(x2obs)))
for n in range(nppc):
    idx = np.random.randint(ndraws)
    pchoice = np.random.choice(K, len(x2obs), p=wpost[idx, :]/wpost[idx, :].sum())
#     mupost[mupost<=1] = 1
#     mupost[mupost>=70] = 70
    x2ppc[n, :] = poisson.rvs(mupost[idx, :, :])[range(len(x2obs)), pchoice]
x2ppc[x2ppc<1] = 1
x2ppc[x2ppc>70] = 70

sns.distplot(np.reshape(sp.stats.mode(x2ppc, axis=0), (-1, 1)), label='mode')
sns.distplot(np.reshape(np.mean(x2ppc, axis=0), (-1, 1)), label='mean')
sns.distplot(df['x2'].values, label='x2')
plt.legend(loc=1)

image
True data labeled ‘x2’

Are you sure your trace is converged?

Since you asked @junpenglao, I reran.
It appears like it did:

It takes a long time to sample, so only 10 samples:
image

I dont think you can judge the convergence with only 10 samples and without plotting the trace.
If you think the sampling is taking too long and you are happy to settle with approximation even if it is a bit bias, you can just use advi:

with x2model:
    inference = pm.ADVI()
    approx = inference.fit(n=20000)
    x2trace = approx.sample(3000, include_transformed=True) 
    
    elbos1 = -inference.hist
# check convergence
plt.plot(elbos1)

Also, have a look at http://docs.pymc.io/notebooks/dp_mix.html if you have not done so. I think cell [26] and [37] in the notebook gives much better visualization of how good the model fitting and posterior prediction given the fit than just plotting the mean.

That notebook is where I started. Austin has a real valued prediction (normal mixture of real values). I have a discrete integer valued prediction - that’s why I took a Mixture of poissons and then the mode().
The challenges I ran into along the way were:

  • discrete mixtures (for x1)
  • bounded discrete mixtures (for x1)
  • dependent bounded discrete mixtures (for x2)
  • non-convergence (with Metropolis)
  • ppc errors

I am waiting for a test run with 50 samples

Yeah the bounded discrete is really the challenge here - did you manage to make an unbounded model work (same as the model 2 in Austin’s notebook)?

Yes, unbounded works. If you look at my notebook, that is how I get ‘x1’.
With 50 samples for x2:
image