Hello.
I’m trying to sample the posterior with out of sample data to test my model. I thought I would have to update the coords like the shared variables to account for the update in the time value but I’m getting the following error while running the below code block.
#update time, items, and months
time_idxs_test, times_test = pd.factorize(df_test.index.get_level_values(0))
location_idxs_test, locations_test = pd.factorize(df_test.index.get_level_values(1))
item_idxs_test, items_test = pd.factorize(df_test.index.get_level_values(2))
month_idxs_test, months_test = pd.factorize(df_test.index.get_level_values(0).month)
#update matrix
t_test = time_idxs_test/max(time_idxs_test)
n_changepoints_test = 8
s_test = np.linspace(0, np.max(t_test), n_changepoints+2)[1:-1]
A_test = (t_test[:, None] > s)*1
#update target variable
y_test = np.array(df_test['eaches'])
#update fourier matrix
yearly_fourier_test = create_fourier_features(t_test, n=5, p=12/max(time_idxs_test))
#update model
coords_test={"locations":locations_test,
"items":items_test,
'months':months_test,
'changepoints':df_test.index.get_level_values(0)[np.argwhere(np.diff(A, axis=0) != 0)[:, 0]],
"yearly_components": [f'yearly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(yearly_fourier_test.shape[1] // 2)],
"obs_id":[f'{loc}_{time.year}_month_{time.month}_item_{item}' for time, loc, item in df_test.index.values]}
pm.set_data(new_data = {'t':t_test,
's':s_test,
'A':A_test,
'yearly_season':yearly_fourier_test},
coords=coords_test,
model = model)
---------------------------------------------------------------------------
ShapeError Traceback (most recent call last)
/tmp/ipykernel_3891/2405461556.py in <module>
28 'yearly_season':yearly_fourier_test},
29 coords=coords_test,
---> 30 model = model)
31
32 # test_ppc = pm.sample_posterior_predictive(trace, model=model)
/opt/conda/lib/python3.7/site-packages/pymc/model.py in set_data(new_data, model, coords)
1873
1874 for variable_name, new_value in new_data.items():
-> 1875 model.set_data(variable_name, new_value, coords=coords)
1876
1877
/opt/conda/lib/python3.7/site-packages/pymc/model.py in set_data(self, name, values, coords)
1261 # definitely lead to shape problems.
1262 raise ShapeError(
-> 1263 f"Resizing dimension '{dname}' is impossible, because "
1264 "a `TensorConstant` stores its length. To be able "
1265 "to change the dimension length, pass `mutable=True` when "
ShapeError: Resizing dimension 'obs_id' is impossible, because a `TensorConstant` stores its length. To be able to change the dimension length, pass `mutable=True` when registering the dimension via `model.add_coord`, or define it via a `pm.MutableData` variable.
At this point I tried the following:
with model:
model.add_coords(coords_test)
That threw the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_3891/1802801453.py in <module>
23
24 with model:
---> 25 model.add_coords(coords_test)
26
27 pm.set_data(new_data = {'t':t_test,
/opt/conda/lib/python3.7/site-packages/pymc/model.py in add_coords(self, coords, lengths)
1163
1164 for name, values in coords.items():
-> 1165 self.add_coord(name, values, length=lengths.get(name, None))
1166
1167 def set_dim(self, name: str, new_length: int, coord_values: Optional[Sequence] = None):
/opt/conda/lib/python3.7/site-packages/pymc/model.py in add_coord(self, name, values, mutable, length)
1136 if name in self.coords:
1137 if not np.array_equal(values, self.coords[name]):
-> 1138 raise ValueError(f"Duplicate and incompatible coordinate: {name}.")
1139 if length is not None and not isinstance(length, (int, Variable)):
1140 raise ValueError(
ValueError: Duplicate and incompatible coordinate: items.
So I’m not sure how to update all the coords with the out of sample dataset without duplicating items or the obs_id. Is there a proper way to do this?