Hello,
I have a prophet-like time series model. I’ve lopped off the year 2021 for an out of sample test data set. When I draw from the posterior, then use arviz to pull the mean values of the predictions, I’m getting 48 values, not 12. I’m sure it has to do with the 4 chains but I’m calculating the dimensions along the chains and draws.
Here is the model and sample code:
n_changepoints = 8
t = np.arange(len(df_sat)) / len(df_sat)
s = np.linspace(0, np.max(t), n_changepoints+2)[1:-1]
a = (t[:, None] > s)*1
n_components = 10
def yearly_X(t, p=365.25, n=n_components):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
monthly_n_components = 5
def monthly_X(t, p=30.5, n=monthly_n_components):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
df_sat['promo_flag_month_before'] = df_sat['promo_flag'].shift(1)
df_sat['promo_flag_month_before'] = df_sat['promo_flag_month_before'].fillna(method = 'bfill')
promo = np.array(df_sat['promo_flag_month_before'])
df_sat_test['promo_flag_month_before'] = df_sat_test['promo_flag'].shift(1)
df_sat_test['promo_flag_month_before'] = df_sat_test['promo_flag_month_before'].fillna(method = 'bfill')
promo_test = np.array(df_sat_test['promo_flag_month_before'])
with pm.Model() as model:
promo_ = pm.MutableData('promo', promo)
t_ = pm.MutableData('t', t)
a_ = pm.MutableData('a', a)
s_ = pm.MutableData('s', s)
k = pm.Normal('k', 0, 1)
m = pm.Normal('m', 0, 5)
delta = pm.Laplace('delta', 0, 0.1, shape=n_changepoints)
growth = k + at.dot(a, delta)
offset = (m + at.dot(a, -s * delta))
trend = pm.Deterministic('trend', growth * t + offset)
yearly_beta = pm.Normal('yearly_beta', 0, 1, shape = n_components*2)
yearly_seasonality = pm.Deterministic('yearly_seasonality',at.dot(yearly_X(t, 365.25/len(t)), yearly_beta))
monthly_beta = pm.Normal('monthly_beta', 0, 1, shape = monthly_n_components*2)
monthly_seasonality = pm.Deterministic('monthly_seasonality',at.dot(monthly_X(t, 30.5/len(t)), monthly_beta))
promo_beta = pm.Normal('promo_beta', 0, 1)
predictions = pm.Deterministic('predictions', np.exp(trend + yearly_seasonality + monthly_seasonality + promo_beta*promo))
# error = pm.HalfCauchy('error', 0.5)
pm.Poisson('obs',
predictions,
observed=df_sat['eaches']
)
trace = pymc.sampling_jax.sample_numpyro_nuts(tune=2000, draws = 2000)
t_test = np.arange(len(df_sat_test)) / len(df_sat_test)
s_test = np.linspace(0, np.max(t_test), n_changepoints+2)[1:-1]
a_test = (t_test[:, None] > s_test)*1
pm.set_data(new_data = {'promo':promo_test,
't':t_test,
's':s_test,
'a':a_test}, model = model)
test_ppc = pm.sample_posterior_predictive(trace, model=model)
test_ppc.posterior_predictive.obs.mean(dim=['draw','chain'])
The length of t_test and a_test and promo_test are all 12. The only other thing I can think of, is that the model was fit on 48 samples (4 years of monthly data). Not sure what to do if that’s the case. If the model is being ran to forecast out, I wouldn’t have observed data at that point.