How to deal with missing values when modeling human reinforcement learning data

Dear all,

I’m new to PyMC and have limited experience in cognitive modeling.

Recently, I’ve been working with human reinforcement learning data. In some trials, participants may time out, resulting in missing data for those trials. I’m wondering how to handle such situations.

Below is a portion of the hierarchical model code I’m testing:

with pm.Model(coords=coords) as hbandit:
    # ----- hyper‑priors (non‑centred) -----
    mu_alpha = pm.Normal("mu_alpha", 0.0, 1.0)
    sigma_alpha = pm.HalfNormal("sigma_alpha", 1.0)
    mu_beta = pm.Normal("mu_beta", 0.0, 1.0)
    sigma_beta = pm.HalfNormal("sigma_beta", 1.0)

    # ----- individual parameters -----
    alpha_logit = pm.Normal(
        "alpha_logit", mu=mu_alpha, sigma=sigma_alpha, dims="subject"
    )
    beta_log = pm.Normal("beta_log", mu=mu_beta, sigma=sigma_beta, dims="subject")

    alpha = pm.Deterministic("alpha", pm.math.sigmoid(alpha_logit), dims="subject")
    beta = pm.Deterministic("beta", pm.math.exp(beta_log), dims="subject")

    # ----- observed data -----
    a_data = pm.Data("actions", actions.astype("int64"))
    r_data = pm.Data("rewards", rewards.astype("int64"))

    # ----- trial‑by‑trial scan -----
    def step(a_t, r_t, Q_prev, alpha, beta, n_subj):
        logits = beta[:, None] * Q_prev  # shape (N,2)
        p = pm.math.softmax(logits, axis=1)
        logp = pt.log(p[pt.arange(n_subj), a_t])  # (N,)

        # chosen Q update
        idx = pt.arange(n_subj)
        q_chosen = Q_prev[idx, a_t]
        q_new = q_chosen + alpha * (r_t - q_chosen)
        Q_next = pt.set_subtensor(Q_prev[idx, a_t], q_new)

        return Q_next, pt.sum(logp)  # scalar log‑likelihood at t

    Q0 = pm.math.zeros((n_subj, 2))
    (_, ll_seq), _ = scan(
        fn=step,
        sequences=[a_data.T, r_data.T],
        outputs_info=[Q0, None],
        non_sequences=[alpha, beta, n_subj],
    )

    pm.Potential("likelihood", pt.sum(ll_seq))

    idata = pm.sample(
        1000,
        tune=1000,
        chains=4,
        target_accept=0.9,
        nuts_sampler="nutpie",
        progressbar=True,
        random_seed=42,
    )

Thank you!

Best,
Kun