I’m confident that this has to be an error in my understanding of what I’m doing, but I’m struggling to even figure out where to go to solve what I believe to be the issue.
I started with this model:
n1_c = 300; n2_c = 300; n3_c = 300
cluster1 = np.random.randn(n1_c) + -1
cluster2 = np.random.randn(n2_c) + 0
cluster3 = np.random.randn(n3_c) + 2
x = np.concatenate((cluster1, cluster2, cluster3))
y = np.concatenate((1*np.ones(n1_c),
2*np.ones(n2_c),
3*np.ones(n3_c))) - 1
with pm.Model() as model:
cutpoints = pm.Normal("cutpoints", mu=[-1,1], sigma=10, shape=2,
transform=pm.distributions.transforms.ordered)
y_ = pm.OrderedLogistic("y", cutpoints=cutpoints, eta=x, observed=y)
idata = pm.sample()
Taken from the documentation of the OrderedLogistic distribution. This runs as expected. I take this and adapt it to my problem with random data:
import numpy as np
import pymc as pm
from scipy.stats import randint
points = randint.rvs(0, 82, size=100)
hfa = randint.rvs(0, 2, size=100)
all_possible_points = np.arange(0,82)
cutpoints_init = np.linspace(all_possible_points.min(), all_possible_points.max(), len(all_possible_points) - 1)
with pm.Model() as model:
cutpoints = pm.Normal("cutpoints", mu=10, sigma=1, shape=len(all_possible_points) - 1, initval=cutpoints_init)
# cutpoints = pm.Normal("cutpoints", mu=0, sigma=1, shape=len(all_possible_points) - 1,
# transform=pm.distributions.transforms.ordered, initval=cutpoints_init)
hfa_effect = pm.Normal('hfa_effect', mu=0, sigma=1)
alpha = pm.Normal('alpha', mu=25, sigma=14)
mu = alpha + hfa_effect * hfa
points_lik = pm.OrderedLogistic('points_lik ', cutpoints=cutpoints, eta=mu, observed=points)
trace = pm.sample()
With or without the initval I get the failure. As far as I can tell, the models are the same, except for the fact that when I run cutpoints.eval() to get a random sample of the distribution the values in the array are unordered unexpectedly whereas when I run the same after the first model the values are ordered. model.debug tells me that the values are associated with non-finite logp, but it’s not clear why and then the debug throws an exception:
IndexError Traceback (most recent call last)
in <cell line: 1>()
----> 1 model.debug()
/usr/local/lib/python3.10/dist-packages/pymc/model/core.py in debug(self, point, fn, verbose)
1824 )
1825 mask = ~np.isfinite(rv_fn_eval)
→ 1826 for value, fn_eval in zip(values[mask], rv_fn_eval[mask]):
1827 print_(f" value = {value} → {fn} = {fn_eval}")
1828 print_()
IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed
In a couple final attempts, I attempted to change up the distribution based on examples I’ve seen in the gallery and in other posts in this forum without success.
with pm.Model() as model:
cutpoints = pm.Dirichlet("cutpoints", a=np.ones(len(all_possible_points) - 1)/81, shape=(len(all_possible_points) - 1))
hfa_effect = pm.Normal('hfa_effect', mu=0, sigma=1)
alpha = pm.Normal('alpha', mu=0, sigma=1)
mu = alpha + hfa_effect * hfa
points_lik = pm.OrderedLogistic('points', cutpoints=cutpoints, eta=mu, observed=points)
trace = pm.sample()
and
def constrainedUniform(N, min=0, max=1):
return pm.Deterministic(
"cutpoints",
pt.concatenate(
[
np.ones(1) * min,
pt.extra_ops.cumsum(pm.Dirichlet("cuts_unknown", a=np.ones(N - 2))) * (max - min)
+ min,
]
),
)
K = len(all_possible_points)
with pm.Model() as model:
cutpoints = constrainedUniform(K, 0, K)
hfa_effect = pm.Normal('hfa_effect', mu=0, sigma=1)
alpha = pm.Normal('alpha', mu=0, sigma=1)
mu = alpha + hfa_effect * hfa
points_lik = pm.OrderedLogistic('points', cutpoints=cutpoints, eta=mu, observed=points)
trace = pm.sample()
And at this point I’ve admitted defeat. Could anyone help point me in the right direction as to where I’m going wrong?