I have already checked the other threads related to this error (this one and this one) but they don’t help. I’m pretty new to pymc.
This is my code:
# Independent Variables from a pandas dataframe
a1 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev_stage1_action']
a2 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev2_stage1_action']
a3 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev3_stage1_action']
r1 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev_R']
r2 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev2_R']
r3 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev3_R']
trans1 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev_trans']
trans2 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev2_trans']
trans3 = data.loc[data['ID'] == str(idx)].iloc[1:]['prev3_trans']
# Convert response variable to categorical variable for logistic regression model
y0 = pd.Categorical(data.loc[data['ID'] == str(idx)].iloc[1:]['response_stage1']).codes
# Define model
with pm.Model() as model_3:
## Define Priors over parameters
alpha = pm.Normal('alpha', mu=1, sd=0.1)
gamma = pm.Normal('gamma', mu=0.5, sd=0.1)
mu = alpha*a1 + a1*r1*trans1 + gamma*a2*r2*trans2 + gamma**2*a3*r3*trans3
theta = pm.Deterministic('theta', 1 / (1 + pm.math.exp(-mu)))
## Define Likelihood function
yl = pm.Bernoulli('yl', theta, observed=y0)
start = pm.find_MAP()
step = pm.NUTS()
trace_3 = pm.sample(10000, step, start)
It returns the following error:
---------------------------------------------------------------------------
SamplingError Traceback (most recent call last)
<ipython-input-152-f102c22ce77e> in <module>
27 yl = pm.Bernoulli('yl', theta, observed=y0)
28
---> 29 start = pm.find_MAP()
30 step = pm.NUTS()
31
/usr/lib/python3.9/site-packages/pymc3/tuning/starting.py in find_MAP(start, vars, method, return_raw, include_transformed, progressbar, maxeval, model, *args, **kwargs)
104 else:
105 update_start_vals(start, model.test_point, model)
--> 106 check_start_vals(start, model)
107
108 start = Point(start, model=model)
/usr/lib/python3.9/site-packages/pymc3/util.py in check_start_vals(start, model)
235
236 if not np.all(np.isfinite(initial_eval)):
--> 237 raise SamplingError(
238 "Initial evaluation of model at starting point failed!\n"
239 "Starting values:\n{}\n\n"
SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'alpha': array(1.), 'gamma': array(0.5)}
Initial evaluation results:
alpha 1.38
gamma 1.38
yl -inf
Name: Log-probability of test_point, dtype: float64
Already I find this weird because when I evaluate the model at that point with the same data, I don’t get such values:
alpha = 1.38
gamma = 1.38
mu = alpha*a1 + a1*r1*trans1 + gamma*a2*r2*trans2 + gamma**2*a3*r3*trans3
np.min(1/(1+np.exp(-mu)))
returns 0.003 as the smallest value, so nowhere near -inf. Btw I also don’t understand why the error says yl=-inf, when yl is sampled from the Bernoulli distribution and can thus be only 0 and 1.
Next, I tried to initiate the model at custom points:
with pm.Model() as model_3:
## Define Priors over parameters
alpha = pm.Normal('alpha', mu=1, sd=0.1)
gamma = pm.Normal('gamma', mu=0.5, sd=0.1)
mu = alpha*a1 + a1*r1*trans1 + gamma*a2*r2*trans2 + gamma**2*a3*r3*trans3
theta = pm.Deterministic('theta', 1 / (1 + pm.math.exp(-mu)))
## Define Likelihood function
yl = pm.Bernoulli('yl', theta, observed=y0)
trace_3 = pm.sample(2000, tune=1000, start={'alpha': np.array(0.5), 'gamma': np.array(0.5)})
It returns the following error:
---------------------------------------------------------------------------
SamplingError Traceback (most recent call last)
<ipython-input-155-be2d745ccf64> in <module>
27 yl = pm.Bernoulli('yl', theta, observed=y0)
28
---> 29 trace_3 = pm.sample(2000, tune=1000, start={'alpha': np.array(0.5), 'gamma': np.array(0.5)})
/usr/lib/python3.9/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
433 for chain_start_vals in start:
434 update_start_vals(chain_start_vals, model.test_point, model)
--> 435 check_start_vals(start, model)
436
437 if cores is None:
/usr/lib/python3.9/site-packages/pymc3/util.py in check_start_vals(start, model)
235
236 if not np.all(np.isfinite(initial_eval)):
--> 237 raise SamplingError(
238 "Initial evaluation of model at starting point failed!\n"
239 "Starting values:\n{}\n\n"
SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'alpha': array(0.5), 'gamma': array(0.5)}
Initial evaluation results:
alpha -11.12
gamma 1.38
yl -inf
Name: Log-probability of test_point, dtype: float64
This I find strange because it apparently used the initial points alppha = -11.12 and gamma 1.38, which are not the points that I defined (0.5 and 0.5).
If anyone has any idea what could be causing this problem, that would be highly appreciated!