Hi there! Thanks for the suggestion. It sort of worked in the sense that it changed the error. I created an array of arrays:
test_array = np.array(test_dict[0])
for i in range (1, n_groups):
test_array = np.append(test_array, test_dict[i])
and then modified the code to accept this and run it:
with pm.Model() as unpooled_sbg_model:
alpha = pm.Uniform('alpha', 0.0001, 100, shape = n_groups)
beta = pm.Uniform('beta', 0.0001, 100, shape = n_groups)
def logp(data_array, alpha = alpha, beta = beta):
log_like = tt.cast(0., 'float64')
for i, group in enumerate(group_cats):
survived = data_array[i]['survived']
churned = data_array[i]['churned']
n_obs = np.shape(survived)[0]
ii = theano.shared(i)
# Calculate the final surivival probability (eq. 6)
ln_surv_prob = betaln(alpha[ii], beta[ii] + n_obs) - betaln(alpha[ii], beta[ii])
# Find the probability of churn for all values prior
ln_prob_vec = []
for j in range(1, n_obs + 1):
ln_prob_vec.append(betaln(alpha[ii] + 1, beta[ii] + j - 1) - betaln(alpha[ii], beta[ii]))
ln_prob_vec = tt.as_tensor_variable(ln_prob_vec, 'ln_prob_vec', ndim = 1)
log_like += pm.math.dot(churned, ln_prob_vec) + survived[-1] * ln_surv_prob
return log_like
likelihood = pm.DensityDist('likelihood', logp, observed = {'data_array': test_array})
unpooled_sbg_trace = pm.sample(tune = 1000, draws = 200, chains = 4, cores = 4)
Only now I get:
TypeErrorTraceback (most recent call last)
<ipython-input-40-a27502279da0> in <module>
28 return log_like
29
---> 30 likelihood = pm.DensityDist('likelihood', logp, observed = {'data_array': test_array})
31 unpooled_sbg_trace = pm.sample(tune = 12000, draws = 2000, target_accept = 0.99, chains = 4, cores = 4) # just a sanity check to see if it even runs...
/usr/local/lib/python3.8/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
120 else:
121 dist = cls.dist(*args, **kwargs)
--> 122 return model.Var(name, dist, data, total_size, dims=dims)
123
124 def __getnewargs__(self):
/usr/local/lib/python3.8/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size, dims)
1160 elif isinstance(data, dict):
1161 with self:
-> 1162 var = MultiObservedRV(
1163 name=name,
1164 data=data,
/usr/local/lib/python3.8/site-packages/pymc3/model.py in __init__(self, name, data, distribution, total_size, model)
1864 """
1865 self.name = name
-> 1866 self.data = {
1867 name: as_tensor(data, name, model, distribution) for name, data in data.items()
1868 }
/usr/local/lib/python3.8/site-packages/pymc3/model.py in <dictcomp>(.0)
1865 self.name = name
1866 self.data = {
-> 1867 name: as_tensor(data, name, model, distribution) for name, data in data.items()
1868 }
1869
/usr/local/lib/python3.8/site-packages/pymc3/model.py in as_tensor(data, name, model, distribution)
1745 def as_tensor(data, name, model, distribution):
1746 dtype = distribution.dtype
-> 1747 data = pandas_to_array(data).astype(dtype)
1748
1749 if hasattr(data, "mask"):
/usr/local/lib/python3.8/site-packages/pymc3/model.py in pandas_to_array(data)
1716 else:
1717 # already a ndarray, but not masked
-> 1718 mask = np.isnan(data)
1719 if np.any(mask):
1720 ret = np.ma.MaskedArray(data, mask)
TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
but I have no idea why it’s calling pandas to array and hating np.isnan. Do you have ideas?