Ok what do you think of this?
with pm.Model() as m_slope:
mus_cats = []
for p in range(Ncategories - 1):
sd_dist = pm.Exponential.dist(1)
packed_chol = pm.LKJCholeskyCov(f"chol_cov_{p}", eta=4, n=2, sd_dist=sd_dist)
chol = pm.expand_packed_triangular(2, packed_chol, lower=True)
cov = pm.math.dot(chol, chol.T)
# extract rho and sds:
cov = pm.math.dot(chol, chol.T)
sigma_ab = pm.Deterministic(f"sigma_cluster_{p}", tt.sqrt(tt.diag(cov)))
corr = tt.diag(sigma_ab ** -1).dot(cov.dot(tt.diag(sigma_ab ** -1)))
r = pm.Deterministic(f"Rho_{p}", corr[np.triu_indices(2, k=1)])
ab = pm.Normal(
f"ab_{p}", mu=np.array([-1.8, -0.5]), sd=np.array([0.1, 0.2]), shape=2
)
# varying effects:
ab_cluster = pm.MvNormal(
f"ab_cluster{p}", mu=ab, chol=chol, shape=(Nclusters, 2)
)
mus_cats.append(
ab_cluster[cluster_id, 0]
+ ab_cluster[cluster_id, 1] * predictor
)
mus_cats = tt.as_tensor_variable(mus_cats).T
# append last category:
vary_pivot = tt.as_tensor_variable(np.full(shape=(Nclusters, 1), fill_value=-2.2))
mus_cats = tt.horizontal_stack(mus_cats, vary_pivot[cluster_id])
# preferences in each cluster:
lat_p = tt.nnet.softmax(mus_cats)
# keep only preferences for available categories:
cat_prob = available_cats * lat_p
# normalize preferences:
cat_prob = cat_prob / tt.sum(cat_prob, axis=1, keepdims=True)
R = pm.Multinomial("R", n=N, p=cat_prob, observed=sim_R)
There is no divergences yet, but sampling is reeeeeaaaaaally slow, so I’m probably doing something wrong
Note that the intercept-only model samples very fast, so I think it’s adding the covariance structure that slows things up.
Do you see a way to optimize the above implementation?
Thank you very much in advance 