Issue passing data to a model with Custom Likelihood

Hi there! Thanks for the suggestion. It sort of worked in the sense that it changed the error. I created an array of arrays:

test_array = np.array(test_dict[0])
for i in range (1, n_groups):
    test_array = np.append(test_array, test_dict[i])

and then modified the code to accept this and run it:

with pm.Model() as unpooled_sbg_model:
    alpha = pm.Uniform('alpha', 0.0001, 100, shape = n_groups)
    beta = pm.Uniform('beta', 0.0001, 100, shape = n_groups)
    
    def logp(data_array, alpha = alpha, beta = beta):
        log_like = tt.cast(0., 'float64')
        
        for i, group in enumerate(group_cats):
    
            survived = data_array[i]['survived']
            churned = data_array[i]['churned']
            
            n_obs = np.shape(survived)[0]
            
            ii = theano.shared(i)
            # Calculate the final surivival probability (eq. 6)
            ln_surv_prob = betaln(alpha[ii], beta[ii] + n_obs) - betaln(alpha[ii], beta[ii])

            # Find the probability of churn for all values prior
            ln_prob_vec = []
            
            for j in range(1, n_obs + 1):
                ln_prob_vec.append(betaln(alpha[ii] + 1, beta[ii] + j - 1) - betaln(alpha[ii], beta[ii]))
            ln_prob_vec = tt.as_tensor_variable(ln_prob_vec, 'ln_prob_vec', ndim = 1)
            
            log_like += pm.math.dot(churned, ln_prob_vec) + survived[-1] * ln_surv_prob 

        return log_like
    
    likelihood = pm.DensityDist('likelihood', logp, observed = {'data_array': test_array})
    unpooled_sbg_trace = pm.sample(tune = 1000, draws = 200, chains = 4, cores = 4)

Only now I get:

TypeErrorTraceback (most recent call last)
<ipython-input-40-a27502279da0> in <module>
     28         return log_like
     29 
---> 30     likelihood = pm.DensityDist('likelihood', logp, observed = {'data_array': test_array})
     31     unpooled_sbg_trace = pm.sample(tune = 12000, draws = 2000, target_accept = 0.99, chains = 4, cores = 4) # just a sanity check to see if it even runs...

/usr/local/lib/python3.8/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
    120         else:
    121             dist = cls.dist(*args, **kwargs)
--> 122         return model.Var(name, dist, data, total_size, dims=dims)
    123 
    124     def __getnewargs__(self):

/usr/local/lib/python3.8/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size, dims)
   1160         elif isinstance(data, dict):
   1161             with self:
-> 1162                 var = MultiObservedRV(
   1163                     name=name,
   1164                     data=data,

/usr/local/lib/python3.8/site-packages/pymc3/model.py in __init__(self, name, data, distribution, total_size, model)
   1864         """
   1865         self.name = name
-> 1866         self.data = {
   1867             name: as_tensor(data, name, model, distribution) for name, data in data.items()
   1868         }

/usr/local/lib/python3.8/site-packages/pymc3/model.py in <dictcomp>(.0)
   1865         self.name = name
   1866         self.data = {
-> 1867             name: as_tensor(data, name, model, distribution) for name, data in data.items()
   1868         }
   1869 

/usr/local/lib/python3.8/site-packages/pymc3/model.py in as_tensor(data, name, model, distribution)
   1745 def as_tensor(data, name, model, distribution):
   1746     dtype = distribution.dtype
-> 1747     data = pandas_to_array(data).astype(dtype)
   1748 
   1749     if hasattr(data, "mask"):

/usr/local/lib/python3.8/site-packages/pymc3/model.py in pandas_to_array(data)
   1716         else:
   1717             # already a ndarray, but not masked
-> 1718             mask = np.isnan(data)
   1719             if np.any(mask):
   1720                 ret = np.ma.MaskedArray(data, mask)

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

but I have no idea why it’s calling pandas to array and hating np.isnan. Do you have ideas?