Hierarchical Multinomial Logit Model (conjoint analysis)

import pymc as pm
import pytensor.tensor as pt
import numpy as np
import scipy as sp
import arviz as az
from itertools import product
from sklearn.datasets import make_spd_matrix

# ground truth theta's
theta_syn = np.array([0.5, 1, 0.1])
# possible values of each attribute: binary
attribute_vals = np.array([0, 1])
# 3 attributes
K_syn = 3
# 1000 tasks
N_syn = 1000
# Each task has two alternatives
C_syn = 2
# Generate the possible alternatives: pick one of the two values (1/0) from each attribute
alternatives_syn = np.array(list(product(attribute_vals, repeat=K_syn)))
# All possible choice tasks: combination of two distinct alternatives
tasks_syn = np.array(list(product(alternatives_syn, repeat=2)))
# Drop those identical choice tasks
tasks_syn = np.array([[choice_A, choice_B]\
                      for choice_A, choice_B in tasks_syn \
                      if not np.all(choice_A==choice_B)])
# Generate choice task (two alternatives each)
## generate the index first
tasks_syn_idx = np.random.choice(np.arange(tasks_syn.shape[0]),
                                 size=N_syn, replace=True)
## indexing
X_syn = tasks_syn[tasks_syn_idx]

## Generate 2 respondents
R_syn = 2
R_arr_syn = np.repeat(np.arange(R_syn), repeats=N_syn//R_syn)
## Now generate the beta values
cov_syn = make_spd_matrix(n_dim=theta_syn.size, random_state=0)
cov_syn = (cov_syn.T * cov_syn)
## Sample individual partworth
beta_syn = np.array(np.random.multivariate_normal(mean=theta_syn, cov=cov_syn, size=R_syn))

# compute utility
U_syn = list()
for i, xi in enumerate(X_syn):
    ui_syn = (xi * beta_syn[R_arr_syn[i]]).sum(axis=1)
    U_syn.append(ui_syn)
U_syn = np.array(U_syn)
## softmax
P_syn = sp.special.softmax(U_syn, axis=1)
## simulate choices
y_syn_multi = np.array([np.random.multinomial(n=1, pvals=p_syn) for p_syn in P_syn])
y_syn = np.array([yi.argmax() for yi in y_syn_multi])


with pm.Model() as hmnl:
    # population priors
    # K_syn: the number of attributes in an alternative
    theta = pm.Normal('theta', mu=0, sigma=1, shape=K_syn)
    # LKJ Prior
    chol, corr, stds = pm.LKJCholeskyCov(
        "chol", n=K_syn, eta=1,
        sd_dist=pm.Exponential.dist(1.0),
        store_in_trace=False,
        compute_corr=True)
    # individual prior
    beta = pm.MvNormal('beta', mu=theta, chol=chol,
                       shape=(R_syn, K_syn))
    # utility
    
    U_left = (X_syn[:, 0, :] * beta[R_arr_syn]).sum(axis=1)
    U_right = (X_syn[:, 1, :] * beta[R_arr_syn]).sum(axis=1)
    U = pt.math.stack((U_left, U_right), axis=1)
    U_normed = pt.special.softmax(U, axis=1)
    # the expected value of the outcome
    y_obs = pm.Categorical('y_obs', p=U_normed, observed=y_syn)
    trace = pm.sample(1000, tune=1000, chains=6, cores=6, 
                      target_accept=0.95)
    
    
with hmnl:
  pm.sample_posterior_predictive(trace, extend_inferencedata=True)
  
az.plot_ppc(trace)
az.plot_trace(trace)
az.plot_posterior(trace)

Compared to the code above, I have changed

1- R_idx with R_arr_syn
2- shape=(R_codes.size, K_syn) with shape=(R_syn, K_syn)
3- removed coords cause I don’t really understand why dims of beta is “subject_id”. If the aim is to tile it so it can be multiplied with X_syn then beta[R_arr_syn] is sufficient.

Can’t guarantee this is what the op has intended. Mind that there are a lot of divergences and a bad looking posterior predictive. So you probably need to study the model carefully and make sure it aligns with the data generation process.