Hi! I’m just starting to use PyMC. I was just playing around with the following toy data:
data = pd.DataFrame({
'Keyword': [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3],
'Date': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
'Clicks': [1, 2, 0, 0, 1, 10, 15, 17, 8, 13, 0, 0, 1, 3, 0],
'Conversions': [1, 1, 0, 0, 0, 3, 4, 2, 5, 6, 0, 0, 0, 1, 0]
})
… and the following toy model:
N_dates = 5
N_keywords = 3
N_obs = len(data)
with pm.Model() as model:
clicks = pm.Data('clicks', data['Clicks'])
conversions = pm.Data('conversions', data['Conversions'])
keyword_idx = pm.Data('keyword_idx', data['Keyword'])
date = pm.Data('date', data['Date'])
obs_idx = pm.Data('obs_idx', data['obs_idx'])
μ_cl = pm.Exponential('μ_cl', lam=1, shape=N_keywords)
N_cl = pm.Poisson('N_cl', mu=μ_cl[keyword_idx-1], shape=N_obs,
observed=clicks)
conv_logit_keyword = pm.Normal('conv_logit_kw', mu=0, sigma=1, shape=N_keywords)
conv_logit_date = pm.Normal('conv_logit_date', mu=0, sigma=1, shape=N_dates)
p_conv = pm.Deterministic('p_conv',
pm.math.invlogit(conv_logit_keyword[keyword_idx-1]
+ conv_logit_date[date-1]))
N_conv = pm.Binomial('N_conv', n=N_cl[:, None], p=p_conv[:, None], observed=conversions)
trace = pm.sample(1000)
Output:
---------------------------------------------------------------------------
SamplingError Traceback (most recent call last)
<ipython-input-34-09294434e75c> in <module>
17 N_conv = pm.Binomial('N_conv', n=N_cl[:, None], p=p_conv[:, None], observed=conversions)
18
---> 19 trace = pm.sample(1000)
2 frames
/usr/local/lib/python3.7/dist-packages/pymc3/util.py in check_start_vals(start, model)
238 "Initial evaluation of model at starting point failed!\n"
239 "Starting values:\n{}\n\n"
--> 240 "Initial evaluation results:\n{}".format(elem, str(initial_eval))
241 )
242
SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'μ_cl_log__': array([-0.36651292, -0.36651292, -0.36651292]), 'conv_logit_kw': array([0., 0., 0.]), 'conv_logit_date': array([0., 0., 0., 0., 0.])}
Initial evaluation results:
μ_cl_log__ -3.18
conv_logit_kw -2.76
conv_logit_date -4.59
N_cl -148.57
N_conv -inf
Name: Log-probability of test_point, dtype: float64
What does this mean or how could this be fixed?