Reinforcement learning - help building a model

Thanks for the pointers!
I have used the following code, using MLE to recover the parameters. Found out that it can do so relatively accurately only when using the student-t distribution.

def llik_td(x, *args):
    # Extract the arguments as they are passed by scipy.optimize.minimize
    alpha, beta = x
    stim, shock, scr  = args
    
    scrSim = np.zeros(len(stim))
    scrCSp = 0.5
    scrCSm = 0.5
    # set intercept and slopes
    for i,(s,t) in enumerate(zip(stim,shock)):
       
        if s==1:      
            pe = shockVec[i] - scrCSp   # prediction error
            scrCSp = scrCSp + alpha*pe
            scrSim[i] = scrCSp
        if s==0:
            pe = shockVec[i] - scrCSm   # prediction error
            scrCSm = scrCSm + alpha*pe
            scrSim[i] = scrCSm
        # add intercept and slope
        scrSim[i] = scrSim[i] 
        
        scrSim[i] =  beta*scrSim[i]
   
    scrPred =  slope * scrSim
    # Calculate the log-likelihood for normal distribution
    LL = np.sum(scipy.stats.t.logpdf(scr, scrPred))
    # Calculate the negative log-likelihood
    neg_LL = -1*LL
    return neg_LL 

estLog = []
for i in np.arange(n_subj):
    x0 = [alphalist[i], slopeList[i]]
    estLog.append(scipy.optimize.minimize(llik_td, x0, args=( stimVec,shockVec, subjects[:,i]), method='L-BFGS-B'))
    print(estLog[i].x)

Now I’m not sure how to implement a similar method into PyMC