Hello everyone. I’ve been checking various related posts but so far still unable to perform sample_posterior_predictive.
I have defined a model constructor function
def model_ordered_logistic_partial_pooled(coords, cohort=cohort_):
with pm.Model(coords=coords) as model:
x_0 = pm.Data("x_0", training_data['score_chg'].astype(float).values, mutable=True)
x_1 = pm.Data("x_1", training_data['odds_zs'].values, mutable=True)
y = pm.Data("y", training_data['cohort_4gp'].values - 1, mutable=True)
race_idx = pm.Data("race_idx", race_id_training, dims="obs_id", mutable=True)
# Top level prior
a_mean = pm.Normal('a_mean', 0., 10.)
a_stdev = pm.HalfNormal('a_stddev', 10.)
# Individual race
a = pm.Normal(
'a', a_mean, a_stdev,
transform=pm.distributions.transforms.univariate_ordered,
shape=(len(coords['race']), cohort - 1),
dims="race",
)
a_ = pm.Deterministic('a_', a, dims="race")
a_t = pt.as_tensor_variable(a_)
b_mean = pm.Normal('b_mean', [-0.05, 1., 0., 0.], 4*[5.])
b_stddev = pm.HalfNormal('b_stddev', 4*[10.])
b0 = pm.Normal('b0', b_mean[0], b_stddev[0], dims="race", shape=(len(coords['race'])))
b1 = pm.Normal('b1', b_mean[1], b_stddev[1], dims="race", shape=(len(coords['race'])))
phi = pm.Deterministic("phi", b0[race_idx] * x_0.get_value()
+ b1[race_idx] * x_1.get_value())
resp_obs = pm.OrderedLogistic(
'cohort_obs', phi, a_t[race_idx],
observed=y.get_value(),
dims="obs_id"
)
return model
Sampling is successful
olmpp_4 = model_ordered_logistic_partial_pooled(coords_training, 4)
with olmpp_4:
trace_olmpp_4 = pm.sample(10, tune=10, chains=1, random_seed=42, return_inferencedata=True, idata_kwargs={'log_likelihood':True})
However, I then attempted to do out-of-sample testing. I created new model with new index coord dictionary to act on the trace I got from the training model
race_id_testing, races_testing = testing_data.race_idx.factorize(sort=True)
coords_testing = {
"race": races_testing,
"race_id": race_id_testing,
"obs_id": np.arange(len(testing_data))
}
olmpp_4_oos = model_ordered_logistic_partial_pooled(coords_testing, 4)
with olmpp_4_oos:
pm.set_data({'x_0': testing_data['score_chg'].astype(float).values,
'x_1': testing_data['odds_zs'].values,
'y': testing_data['cohort_4gp'].values - 1,
"race_idx": race_id_testing})
pp_olmpp_4 = pm.sample_posterior_predictive(trace_olmpp_4)
trace_olmpp_4.extend(pp_olmpp_4)
But unfortunately got an error
TypeError: ("The type's shape ((1498,)) is not compatible with the data's ((5949,))", 'Container name "b0"')
where 1498 is the length of testing set’s races_testing and 5949 training set’s races_training. What did I miss? I also tried working with pytensor shared as suggested in here but got similar results…thanks in advance for any suggestion.