Hierarchical Bayesian Choice modeling with PYMC4

Hello!

I have been trying to transcribe a model that I had in PyMC3 (it was working there with theano as the tensor library) to PyMC4 (based on aesara). After defining my model, sampling runs into an error. What follows is my code and also the error I get.
BTW, win_loss is an array that was defined outside of the model and which contains the choice data in the form of ones and zeros. This array is converted into a tensor before being used in the model definition.

My code:

basic_model = pm.Model()
with basic_model:
    # Priors for unknown model parameters
    alphas = pm.Normal("alphas", mu=0, sigma=1, shape=num_items)
    sd_distrib = pm.HalfNormal.dist(10)
    L_sigma, corr, stds = pm.LKJCholeskyCov('L_sigma', eta=1, n=num_items, sd_dist=sd_distrib, compute_corr=True, store_in_trace=False)
    cov = pm.Deterministic("cov", L_sigma.dot(L_sigma.T))
    betas_init = pm.MvNormal('betas_init', mu=np.zeros((num_items)), cov=np.eye(num_items), shape=(num_respondents, num_items))

    def logp(win_loss):
        win = win_loss[:winloss_size,]
        loss = win_loss[winloss_size:,]
        betas = at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma)
        betas_effects_coded = at.concatenate([betas,at.reshape(-at.sum(betas, axis = 1),(num_respondents,1))], axis=1)
        winners = at.sum(betas_effects_coded[ind_list,]*win, axis = 1) 
        losers = at.sum(betas_effects_coded[ind_list,]*loss, axis = 1)
        shp = choicedata.shape[0]
        mat_tmp = at.concatenate([at.reshape(winners,(shp,1)),at.reshape(losers,(shp,1))],axis=1)
        prob_tmp = softmax(mat_tmp)
        log_prob_tmp = at.log(prob_tmp)
        return at.reshape(at.bincount(ind_list, weights=log_prob_tmp),(num_respondents,1))
    
    likelihood = pm.DensityDist(
        name="likelihood",
        logp=logp,
        #observed = {"win": win, "loss": loss}
        observed = (win_loss))

with basic_model:
    trace_full = pm.sample(
    tune=tune,
    draws=draws,
    chains=chains,
    target_accept=target_accept,
    return_inferencedata=True,
    cores = 4)

And here is the error I get:

ValueError                                Traceback (most recent call last)
Input In [71], in <cell line: 2>()
      1 t1 = time.perf_counter()
      2 with basic_model:
----> 3     trace_full = pm.sample(
      4     tune=tune,
      5     draws=draws,
      6     chains=chains,
      7     target_accept=target_accept,
      8     return_inferencedata=True,
      9     cores = 4
     10     #,jobs = 4
     11     )
     12 t2 = time.perf_counter()
     13 sampling_time = (t2 - t1)/60

File ~/anaconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/sampling.py:524, in sample(draws, step, init, n_init, initvals, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, **kwargs)
    521         auto_nuts_init = False
    523 initial_points = None
--> 524 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    526 if isinstance(step, list):
    527     step = CompoundStep(step)

File ~/anaconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/sampling.py:207, in assign_step_methods(model, step, methods, step_kwargs)
    204 # Use competence classmethods to select step methods for remaining
    205 # variables
    206 selected_steps = defaultdict(list)
--> 207 model_logp = model.logp()
    209 for var in model.value_vars:
    210     if var not in assigned_vars:
    211         # determine if a gradient can be computed

File ~/anaconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/model.py:753, in Model.logp(self, vars, jacobian, sum)
    751 rv_logps: List[TensorVariable] = []
    752 if rv_values:
--> 753     rv_logps = joint_logp(list(rv_values.keys()), rv_values, sum=False, jacobian=jacobian)
    754     assert isinstance(rv_logps, list)
    756 # Replace random variables by their value variables in potential terms

File ~/anaconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/distributions/logprob.py:257, in joint_logp(var, rv_values, jacobian, scaling, transformed, sum, **kwargs)
    247 unexpected_rv_nodes = [
    248     node
    249     for node in aesara.graph.ancestors(list(temp_logp_var_dict.values()))
   (...)
    254     )
    255 ]
    256 if unexpected_rv_nodes:
--> 257     raise ValueError(
    258         f"Random variables detected in the logp graph: {unexpected_rv_nodes}.\n"
    259         "This can happen when DensityDist logp or Interval transform functions "
    260         "reference nonlocal variables."
    261     )
    263 # aeppl returns the logp for every single value term we provided to it. This includes
    264 # the extra values we plugged in above, so we filter those we actually wanted in the
    265 # same order they were given in.
    266 logp_var_dict = {}

ValueError: Random variables detected in the logp graph: [alphas, betas_init, L_sigma, normal_rv{0, (0, 0), floatX, False}.out].
This can happen when DensityDist logp or Interval transform functions reference nonlocal variables.

I’m wondering whether “nonlocal variables” refers to my win_loss array and if that’s the case if there’s a solution to this problem.

All model variables that are used by your logp function need to be passed as inputs to the DensityDist and to the logp function as well. The constant array should not need to

See this thread for more information.

Thanks for your reply. I understand what you mean. However, the reason I introduced the constant array as input to the logp function was to be able to use it as the “observed” values in my likelihood function. At least, this is what I thought I needed to do in PyMC3 and so repeated it here too.
I followed the tread suggested by @cluhmann (in which you also help someone lese) and realized I need to enter the other variables I’m using in the logp function as input to both logp and likelihood (also in a proper order) and that solved my problem. Thank you!

1 Like

DensityDist is more strict about inputs than in PyMC3. You are free to use nonlocal constants and/or have observed values (which are also constant and will show up as the first input to your logp function).

What you are not allowed to do is have non-constant variables in the logp function which are not passed as explicit inputs to the DensityDist and respective logp function .

1 Like

Thank you, this was helpful!

For the record, here’s the code that solved the issue:

basic_model = pm.Model()

with basic_model:

    # Priors for unknown model parameters
    alphas = pm.Normal("alphas", mu=0, sigma=1, shape=num_items)
    #sd_distrib = pm.Exponential.dist(100.0)
    #sd_distrib = pm.HalfNormal.dist(10)
    sd_distrib = pm.Normal.dist(mu=1, sigma=10, shape=num_items)
    #sd_distrib = pm.ChiSquared.dist(nu=1, shape=num_items)
    #sd_distrib = pm.HalfCauchy.dist(beta=1, shape=num_items)    
    L_sigma, corr, stds = pm.LKJCholeskyCov('L_sigma', eta=1, n=num_items, sd_dist=sd_distrib, compute_corr=True, store_in_trace=False)
    cov = pm.Deterministic("cov", L_sigma.dot(L_sigma.T))
    betas_init = pm.MvNormal('betas_init', mu=np.zeros((num_items)), cov=np.eye(num_items), shape=(num_respondents, num_items))
    
    

    def logp(win_loss, alphas, betas_init, L_sigma):
        #win = win_loss[:int(win_loss.shape[0]/2),]
        #loss = win_loss[int(win_loss.shape[0]/2):,]
        win = win_loss[:winloss_size,]
        loss = win_loss[winloss_size:,]
        #betas = at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + pm.math.dot(betas_init,L_sigma)
        #at.printing.Print('betas_init')(betas_init.shape)
        #at.printing.Print('L_sigma')(L_sigma.shape)
        betas = at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma)
        betas_effects_coded = at.concatenate([betas,at.reshape(-at.sum(betas, axis = 1),(num_respondents,1))], axis=1)
        winners = at.sum(betas_effects_coded[ind_list,]*win, axis = 1) 
        losers = at.sum(betas_effects_coded[ind_list,]*loss, axis = 1)
        shp = choicedata.shape[0]
        mat_tmp = at.concatenate([at.reshape(winners,(shp,1)),at.reshape(losers,(shp,1))],axis=1)
        prob_tmp = softmax(mat_tmp)
        log_prob_tmp = at.log(prob_tmp)
        return at.reshape(at.bincount(ind_list, weights=log_prob_tmp),(num_respondents,1))

    #logp = function(inputs=[alphas, L_sigma, betas_init], givens={win:win, loss:loss}, outputs=at.reshape(at.bincount(ind_list, weights=at.log(softmax(at.concatenate([at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*win, axis = 1),(choicedata.shape[0],1)),at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*loss, axis = 1),(choicedata.shape[0],1))],axis=1)))),(num_respondents,1)))
    #logp = theano.function([In(win_dummy, value=win), In(loss_dummy, value=loss)], at.reshape(at.bincount(ind_list, weights=at.log(softmax(at.concatenate([at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*win_dummy, axis = 1),(choicedata.shape[0],1)),at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*loss_dummy, axis = 1),(choicedata.shape[0],1))],axis=1)))),(num_respondents,1)))
    #logp = at.reshape(at.bincount(ind_list, weights=at.log(softmax(at.concatenate([at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*win, axis = 1),(choicedata.shape[0],1)),at.reshape(at.sum(at.concatenate([at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma),at.reshape(-at.sum(at.reshape(at.tile(alphas.T,num_respondents), (num_respondents, num_items)) + at.dot(betas_init,L_sigma), axis = 1),(num_respondents,1))], axis=1)[ind_list,]*loss, axis = 1),(choicedata.shape[0],1))],axis=1)))),(num_respondents,1))
    #logp = theano.function([In(win_tn, value = win), In(loss_tn, value = loss)], ll)
    
    
    likelihood = pm.DensityDist(
        "likelihood",
        alphas, betas_init, L_sigma,
        logp=logp,
        observed = win_loss
        )
1 Like