num_cohort = 2
data_list = [-1.2083030673141326,
0.5652466185040567,
-0.7851719239971027,
0.20513500717041422,
0.5652466185040567,
-0.1549766041632283,
2.0056930638386268,
-1.2983309701475432,
0.745302424170878,
1.1054140355045206,
-0.42506031266346017,
-0.6951440211636921,
-1.280325389580861,
0.6552745213374673,
-0.7873164649263759,
-0.6484249514565011,
0.5817570249909609,
2.089722028378173,
-0.410325214079573,
-0.8746197019645828,
0.8198567623678892,
-0.410325214079573,
-1.0214478733470218,
0.6611236041166038,
-0.9220549344577036,
-0.8455301825797763,
0.24973032867305697,
0.9193219076049202,
-0.6111731299536243,
0.05841844897823892]
index_list = [0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
3,
3,
3,
3,
3,
3]
output_list = [4,
4,
4,
4,
4,
4,
4,
3,
4,
3,
2,
4,
2,
1,
2,
1,
4,
4,
4,
2,
4,
3,
3,
4,
1,
2,
2,
4,
4,
4]
prediction_len = 30
training_data_lite = pd.DataFrame({'in1':data_list, 'in2':data_list, 'out':output_list, 'race_idx':index_list})
testing_data_lite = pd.DataFrame({'in1':data_list[:prediction_len], 'in2':data_list[:prediction_len],'race_idx':index_list[:prediction_len]})
# Re-assign consecutive index starting from zero
training_race_idx_mapping = dict(zip(set(training_data_lite.race_idx), range(len(set(training_data_lite.race_idx)))))
training_data_lite = training_data_lite.replace({"race_idx": training_race_idx_mapping})
testing_race_idx_mapping = dict(zip(set(testing_data_lite.race_idx), range(len(set(testing_data_lite.race_idx)))))
testing_data_lite = testing_data_lite.replace({"race_idx": testing_race_idx_mapping})
temp_df = training_data_lite
idx = temp_df.race_idx
_, races_full = temp_df.race_idx.factorize(sort=True)
coords = {
"id": races_full,
"obs_idx": list(range(len(idx))),
}
with pm.Model(coords=coords) as olmpp_4:
x_0 = pm.Data("x_0", temp_df['in1'].astype(float).values, mutable=True, dims=("obs_idx",))
x_1 = pm.Data("x_1", temp_df['in2'].values, mutable=True, dims=("obs_idx",))
y = pm.Data("y", temp_df['out'].values - 1, mutable=True, dims=("obs_idx",))
b_mean = pm.Normal('b_mean', [-0.05, 1., 0., 0.], 4*[5.])
b_stddev = pm.HalfNormal('b_stddev', 4*[10.])
b0 = pm.Normal('b0', b_mean[0], b_stddev[0], dims=("id",))
b1 = pm.Normal('b1', b_mean[1], b_stddev[1], dims=("id",))
phi = b0[idx] * x_0\
+ b1[idx] * x_1\
a_mean = pm.Normal('a_mean', 0., 10.)
a_stdev = pm.HalfNormal('a_stddev', 10.)
a = pm.Normal(
'a', a_mean, a_stdev,
transform=pm.distributions.transforms.univariate_ordered,
shape=(len(coords['id']), 4 - 1),
testval=len(coords['id']) * [[-2.512425133191401, -1.1671272329327658, -0.39780512413988334]],
dims=("id",),
)
obs = pm.OrderedLogistic("obs", phi, a[idx], observed=y, dims=("obs_idx",))
idata = pm.sample(10, tune=10, chains=4, return_inferencedata=True, idata_kwargs={'log_likelihood':True})
temp_df2 = testing_data_lite
idx2 = temp_df2.race_idx
_, races_full2 = temp_df2.race_idx.factorize(sort=True)
new_coords = {
"id": races_full2,
"obs_idx": list(range(len(idx2))),
}
with pm.Model(coords=new_coords) as olmpp_4_pre:
x_0 = pm.Data("x_0", temp_df2['in1'].astype(float).values, mutable=True, dims=("obs_idx",))
x_1 = pm.Data("x_1", temp_df2['in2'].values, mutable=True, dims=("obs_idx",))
b_mean = pm.Normal('b_mean', [-0.05, 1., 0., 0.], 4*[5.])
b_stddev = pm.HalfNormal('b_stddev', 4*[10.])
new_b0 = pm.Normal('new_b0', b_mean[0], b_stddev[0], dims=("id",))
new_b1 = pm.Normal('new_b1', b_mean[1], b_stddev[1], dims=("id",))
phi = new_b0[idx] * x_0\
+ new_b1[idx] * x_1\
a_mean = pm.Normal('a_mean', 0., 10.)
a_stdev = pm.HalfNormal('a_stddev', 10.)
new_a = pm.Normal(
'new_a', a_mean, a_stdev,
transform=pm.distributions.transforms.univariate_ordered,
shape=(len(coords['id']), 4 - 1),
dims=("id",),
)
obs = pm.OrderedLogistic("obs", phi, new_a[idx], dims=("obs_idx",))
idata_pre = pm.sample_posterior_predictive(idata, var_names=["obs"], predictions=True, extend_inferencedata=False)
If prediction_len < 30, I got an error