This is what I have running now. But the diagnostics aren’t looking too hot.
with pm.Model() as multinomial_model:
global_alpha_mean = pm.Normal("global_alpha_mean", mu=0, sd=1)
global_alpha_sigma = pm.HalfCauchy("global_alpha_sigma", beta=1)
global_beta_mean = pm.Normal("global_beta_mean", mu=0, sd=1)
global_beta_sigma = pm.HalfCauchy("global_beta_sigma", beta=1)
#Non centered reparametrization
alpha_offset = pm.Normal('a_offset', mu=0, sd=1, shape= n_labels)
local_alphas = pm.Deterministic("local_alphas", global_alpha_mean + alpha_offset * global_alpha_sigma)
#Non centered reparametrization
beta_offset = pm.Normal('beta_offset', mu=0, sd=1, shape=(n_labels,n_cols))
local_betas = pm.Deterministic("local_betas", global_beta_mean + beta_offset * global_beta_sigma)
#centered
#local_alphas = pm.Normal("local_alphas", mu=global_alpha_mean, sd=global_alpha_sigma, shape=n_labels)
#local_betas = pm.Normal("local_betas", mu=global_beta_memodelan, sd=global_beta_sigma, shape=(n_labels,n_cols))
mu = local_alphas[labels] + (local_betas[labels] * input_var).sum(axis=1)
probs = T.nnet.softmax(mu)
y = pm.Categorical('y', p=probs, observed=target_var)
#y = pm.Multinomial('y', n=n_counts, p=probs, observed=target_var)
with multinomial_model:
start=pm.findMAP()
step = pm.Metropolis()
trace = pm.sample(10000, step=step, start=start, tune=1000, njobs=4)[2000:10000:5]