Dear PyMC3 community,
I have a question regarding a dirichlet-multinomial model I implemented.
I get very different standard deviations for my parameter estimates if I compare the model with nuance parameter with the model where the nuance parameters were integrated out. The mean estimates are the same nevertheless.
%matplotlib inline
import seaborn as sns
import pymc3 as pm
from dirichlet_multinomial import DirichletMultinomial
import numpy as np
from numpy import random as nr
# Data generation
alphas = np.array([5, 7, 1, 0.5])
ndraws = 2000
N = 40
dirichlets = np.array([nr.dirichlet(alphas) for i in range(N)])
data = np.array([nr.multinomial(ndraws, p) for p in dirichlets])
with pm.Model() as model:
lowerbound = pm.Bound(pm.Flat, lower=0)
a = lowerbound('alphas', shape=len(alphas))
d = pm.Dirichlet('dirichlets', a, shape=(N, len(alphas)))
obs = pm.Multinomial('data', n=ndraws,p=d, observed=data)
# model with nuance parameters
with model:
trace = pm.sample()
pm.traceplot(trace, ['alphas'])
pm.summary(trace, ['alphas'])
Gives the following summary of the parameters alpha:
Mean SD MC Error 95% HPD interval
-------------------------------------------------------------------
6.062 0.851 0.038 [4.550, 7.803]
8.400 1.203 0.051 [6.072, 10.692]
1.032 0.160 0.006 [0.730, 1.339]
0.595 0.096 0.004 [0.420, 0.775]
Posterior quantiles:
2.5 25 50 75 97.5
|--------------|==============|==============|--------------|
4.562 5.488 6.024 6.621 7.843
6.176 7.532 8.392 9.201 10.878
0.755 0.922 1.028 1.138 1.366
0.423 0.524 0.588 0.663 0.782
Whereas if I run the inference without nuance parameters:
with pm.Model() as implicit_model:
lowerbound = pm.Bound(pm.Flat, lower=0)
a = lowerbound('alphas', shape=len(alphas))
obs = DirichletMultinomial('data', n=np.array([ndraws]*N, dtype=np.uint), a=a, observed=data)
with implicit_model:
implicit_trace = pm.sample()
pm.traceplot(implicit_trace, ['alphas'])
pm.summary(implicit_trace, ['alphas'])
the results are much more confident (much lower standard deviation of the estimates):
Mean SD MC Error 95% HPD interval
-------------------------------------------------------------------
5.756 0.131 0.008 [5.520, 6.033]
7.977 0.185 0.011 [7.625, 8.341]
0.986 0.026 0.001 [0.937, 1.038]
0.566 0.015 0.001 [0.539, 0.595]
Posterior quantiles:
2.5 25 50 75 97.5
|--------------|==============|==============|--------------|
5.486 5.670 5.754 5.848 6.015
7.610 7.850 7.982 8.105 8.334
0.934 0.968 0.987 1.003 1.035
0.539 0.556 0.567 0.578 0.595
Traceplot (unable to add more than one image)
Is this behaviour expected or do I have an error in my likelihood specification?
I defined the DirichletMultinomial log likelihood as follows:
class DirichletMultinomial(pm.Discrete):
def __init__(self, n, a, *args, **kwargs):
super(DirichletMultinomial, self).__init__(*args, **kwargs)
self.K = tt.as_tensor_variable(a.shape[-1])
self.n = tt.as_tensor_variable(n[:, np.newaxis])
if a.ndim == 1:
self.alphas = tt.as_tensor_variable(a[np.newaxis, :]) # alphas[1, #classes]
self.A = tt.as_tensor_variable(a.sum(axis=-1)) # A is scalar
self.mean = self.n * (self.alphas / self.A)
else:
self.alphas = tt.as_tensor_variable(a) # alphas[1, #classes]
self.A = tt.as_tensor_variable(a.sum(axis=-1, keepdims=True)) # A[#samples, 1]
self.mean = self.n * (self.alphas / self.A)
self.mode = tt.cast(pm.math.tround(self.mean), 'int32')
def logp(self, value):
k = self.K
a = self.alphas
res = bound(
tt.sum(
factln(self.n) + gammaln(self.A) - gammaln(self.A + self.n)
+ tt.sum(gammaln(self.alphas + value) - gammaln(self.alphas) - factln(value), axis=-1),
axis=-1),
tt.all(value >= 0),
tt.all(tt.eq(tt.sum(value, axis=-1, keepdims=True), self.n)),
tt.all(a > 0),
k > 1,
tt.all(tt.ge(self.n, 0), axis=-1),
broadcast_conditions=False
)
return res
Here also:
JuPyter Notebook with example
I would really appreciate any help regarding this issue.
Best,
Max