How to marginalize latent discrete variables in CDMs (cognitive diagnosis models)?

To marginalize the latent discrete variable, I’ve translated a DINA model from a Stan tutorial DINA Independent Case Study to PyMC recently. However, I am still seeing a significant difference in the results. The correlation between the estimated ‘slip’ (s) and ‘guess’ (g) parameters and their true values is nearly zero, which diverges from the Stan model’s output. Here’s the crux of my PyMC model:

import numpy as np
import pymc as pm
from pytensor import tensor as pt
import itertools
import arviz as az

# Define the number of students, items, and skills
N = 150   # Total number of students
I = 30    # Total number of items
K = 3     # Total number of skills

# Generate all possible combinations of skill mastery (2^K possible classes)
all_class = np.array(list(itertools.product(*[[0, 1] for _ in range(K)])))

# Randomly assign students to one of the possible classes
class_idx = np.random.randint(0, len(all_class), size=N)
student_attribute = all_class[class_idx]

# Generate a Q-matrix that maps items to the skills they require
Q = np.concatenate([all_class[1:] for _ in range(10)], axis=0)[:I]

# Calculate the ideal response for each student based on their attribute mastery and the Q-matrix
eta = (student_attribute.reshape(N, 1, K) >= Q.reshape(1, I, K)).prod(axis=-1)

# Define slip and guess parameters for each item
s = np.random.uniform(0, 0.1, (1, I))
g = np.random.uniform(0, 0.1, (1, I))

# Generate observed data (Y) based on the ideal response with some noise from slip and guess parameters
Y = np.random.binomial(1, eta * (1 - s - g) + g)

# Compute the ideal response for all possible classes
all_c_eta = (all_class.reshape(-1, 1, K) >= Q.reshape(1, I, K)).prod(axis=-1)
all_c_eta = pt.as_tensor(all_c_eta)

# Begin PyMC model definition
with pm.Model() as DINA:

    # Define the prior for the probability of each student belonging to each class using a Dirichlet distribution
    student_class_p = pm.Dirichlet("student_class_p", pt.ones(2**K), shape=(1, 2**K))
    
    # Define slip parameter for each item with a Beta distribution
    s_sample = pm.Beta("s", alpha=5, beta=25, shape=(1, I))
    
    # Define guess parameter for each item with a Beta distribution
    g_sample = pm.Beta("g", alpha=5, beta=25, shape=(1, I))
    
    # Calculate the probability for all classes, considering the slip and guess parameters
    all_c_p = pm.Deterministic("all_class_p", all_c_eta * (1 - s_sample - g_sample) + g_sample)
    
    # Calculate the log likelihood of the observed data (Y) given the model
    logL_pY = pm.logp(pm.Bernoulli.dist(all_c_p.reshape((2**K, 1, I))), Y.reshape((1, N, I)))
    
    # Add log likelihoods as potentials to the model to inform the posterior
    pm.Potential("logL_Y", logL_pY)
    pm.Potential("logL_C", pt.log(student_class_p))
    
    # Compute the joint log probability across all classes and normalize to get probabilities
    joint_logP = pt.exp(logL_pY.sum(axis=-1)).T * student_class_p
    prob_resp_class = pm.Deterministic("prob_resp_class", joint_logP / joint_logP.sum(axis=-1, keepdims=True))
    
    # Calculate the probability of each student mastering each skill
    pm.Deterministic("att_p", prob_resp_class.dot(all_class))

# Perform sampling from the model
with DINA:
    tr = pm.sample(1000, tune=3000, chains=2, return_inferencedata=True)

I got the result:


Additionally, the prob_resp_class estimates suggest that the probabilities of a student belonging to any class are very similar, which doesn’t seem right.

Could I have possibly missed something vital in my translation that might account for this discrepancy? I would be deeply grateful for any advice or insights that could steer me toward a resolution.

Thanks for the time and help in advance!