Hello,
I’m trying to run an out of sample test on fitted model. I’m getting the following error:
---------------------------------------------------------------------------
IncorrectArgumentsError Traceback (most recent call last)
/tmp/ipykernel_27850/1536611220.py in <module>
48 'monthly_m_': test_monthly_fourier,
49 'eaches':eaches_test}, model = partial_pooled_model)
---> 50 test_ppc = pm.sample_posterior_predictive(trace, partial_pooled_model)
/opt/conda/lib/python3.7/site-packages/pymc/sampling.py in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
1857 if keep_size and samples is not None:
1858 raise IncorrectArgumentsError(
-> 1859 "Should not specify both keep_size and samples arguments. "
1860 "See the docstring of the samples argument for more details."
1861 )
IncorrectArgumentsError: Should not specify both keep_size and samples arguments. See the docstring of the samples argument for more details.
Here is the model.
def create_fourier_features(t, n, p=365.25):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))
t = time_idxs/max(time_idxs)
month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)
n_yearly = 10
n_monthly = 5
# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)
location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))
n_changepoints = 8
s = np.linspace(0, np.max(t), n_changepoints+2)[1:-1]
a = (t[:, None] > s)*1
n_components = 10
def create_fourier_features(t, n, p=365.25):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)
n_yearly = 10
n_monthly = 5
# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)
coords_={
"location":locations,
"yearly_components": [f'yearly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_yearly)],
"monthly_components":[f'monthly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_monthly)],
'changepoints':df_train.index.get_level_values(0)[np.argwhere(np.diff(a, axis=0) != 0)[:, 0]],
"obs_id":[f'{loc}_{time.year}_month_{time.month}' for time, loc in df_train.index.values]
}
with pm.Model(coords=coords_) as partial_pooled_model:
location_idxs = pm.ConstantData('location_idx', location_idxs, dims = 'obs_id')
t_ = pm.MutableData('t', t)
a_ = pm.MutableData('a', a)
s_ = pm.MutableData('s', s)
yearly_f_ = pm.MutableData('yearly_f_', yearly_fourier)
monthly_m_ = pm.MutableData('monthly_m_',monthly_fourier)
sept_month = pm.MutableData('sept_month',sept)
march_month = pm.MutableData('march_month', march)
eaches = pm.MutableData('eaches', df_train['eaches'])
mu_k = pm.Normal('mu_k', 0, 1)
sigma_k = pm.HalfCauchy('sigma_k', 5)
k = pm.Normal('k', mu_k, sigma_k, dims = 'location')
mu_m = pm.Normal('mu_m', 0, 1)
sigma_m = pm.HalfCauchy('sigma_m', 5)
m = pm.Normal('m', mu_m, sigma_m, dims = 'location')
delta = pm.Laplace('delta', 0, 0.1, dims=['location', 'changepoints'])
growth = pm.Deterministic('growth', k[location_idxs] + (a_[time_idxs] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
offset = pm.Deterministic('offset', m[location_idxs] + (a_[time_idxs] * -s_[None, :] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
trend = pm.Deterministic('trend', growth[location_idxs] * t_[time_idxs] + offset[location_idxs], dims=['obs_id'])
yearly_mu = pm.Normal('yearly_mu', 0, 0.1)
yearly_sigma = pm.HalfNormal('yearly_sigma', sigma=0.1)
yearly_beta = pm.Normal('yearly_beta', yearly_mu, yearly_sigma, dims=['location', 'yearly_components'])
yearly_seasonality = pm.Deterministic('yearly_seasonality', (yearly_f_[time_idxs] * yearly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
monthly_mu = pm.Normal('monthly_mu', 0, 0.1)
monthly_sigma = pm.HalfNormal('monthly_sigma', sigma=0.1)
monthly_beta = pm.Normal('monthly_beta', monthly_mu, monthly_sigma, dims=['location', 'monthly_components'])
monthly_seasonality = pm.Deterministic('monthly_seasonality', (monthly_m_[time_idxs] * monthly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
sept_mu = pm.Normal('sept_mu', 0, 1)
sept_sigma = pm.HalfCauchy('sept_sigma', 5)
sept_beta = pm.Normal('sept_beta', sept_mu, sept_sigma, dims = 'location')
march_mu = pm.Normal('march_mu', 0, 1)
march_sigma = pm.HalfCauchy('march_sigma', 5)
march_beta = pm.Normal('march_beta', march_mu, march_sigma, dims = 'location')
predictions = pm.Deterministic('predictions', np.exp(trend[location_idxs] + yearly_seasonality[location_idxs] + monthly_seasonality[location_idxs] + (sept_beta[location_idxs]*sept_month) + (march_beta[location_idxs]*march_month)))
# error = pm.HalfCauchy('error', 0.5)
pm.Poisson('obs',
predictions,
observed=eaches,
dims = 'obs_id')
trace = pymc.sampling_jax.sample_numpyro_nuts(tune=2000, draws = 2000)
Here is the code snippet being used to run the out of sample data:
location_idxs, locations = pd.factorize(df_test.index.get_level_values(1))
time_idxs, times = pd.factorize(df_test.index.get_level_values(0))
t_test = time_idxs/max(time_idxs)
test_month = pd.get_dummies(df_test.index.get_level_values(0).month)
test_sept = np.array(test_month[9])
test_march = np.array(test_month[3])
test_month = np.array(df_test.index.get_level_values(0).month)
test_n_yearly = 10
test_n_monthly = 5
def create_fourier_features(t, n, p=365.25):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
n_yearly = 10
n_monthly = 5
# Weekly data
test_yearly_fourier = create_fourier_features(t_test, n_yearly, 12)
test_monthly_fourier = create_fourier_features(t_test, n_monthly, 1)
location_idxs, locations = pd.factorize(df_test.index.get_level_values(1))
time_idxs, times = pd.factorize(df_test.index.get_level_values(0))
n_changepoints = 8
s_test = np.linspace(0, np.max(t_test), n_changepoints+2)[1:-1]
a_test = (t_test[:, None] > s)*1
n_components = 10
eaches_test = np.array(df_test['eaches'])
pm.set_data(new_data = {'sept_month':test_sept,
'march_month':test_march,
't':t_test,
's':s_test,
'a':a_test,
'yearly_f_': test_yearly_fourier,
'monthly_m_': test_monthly_fourier,
'eaches':eaches_test}, model = partial_pooled_model)
test_ppc = pm.sample_posterior_predictive(trace, partial_pooled_model)
I haven’t specified keep or sample sizes so I’m not sure what is throwing this error.