Hello!
As I’m trying to turn a prophet model into a hierarchical model, I’m having trouble with seasonality. The shape of seasonality should be the dimensions*2 (in the case of yearly, 10 dimensions).
Using PyMC 4, I’m trying to only use dimensions but keep getting the following error:
---------------------------------------------------------------------------
ShapeError Traceback (most recent call last)
/tmp/ipykernel_7820/3842427766.py in <module>
77 predictions,
78 observed=eaches,
---> 79 dims = 'obs_id')
80
81 # trace = pm.sample(tune=2000, draws=2000)
/opt/conda/lib/python3.7/site-packages/pymc/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
592 raise ValueError("Transformations for discrete distributions")
593
--> 594 return super().__new__(cls, name, *args, **kwargs)
595
596
/opt/conda/lib/python3.7/site-packages/pymc/distributions/distribution.py in __new__(cls, name, rng, dims, initval, observed, total_size, transform, *args, **kwargs)
276 dims=dims,
277 transform=transform,
--> 278 initval=initval,
279 )
280
/opt/conda/lib/python3.7/site-packages/pymc/model.py in register_rv(self, rv_var, name, data, total_size, dims, transform, initval)
1311 actual=len(new_coords),
1312 expected=new_length,
-> 1313 )
1314 # store it as tuple for immutability as in add_coord
1315 self._coords[dname] = tuple(new_coords)
/opt/conda/lib/python3.7/site-packages/pymc/model.py in make_obs_var(self, rv_var, data, dims, transform)
1338 initval
1339 The initial value of the random variable.
-> 1340
1341 Returns
1342 -------
ShapeError: Dimensionality of data and RV don't match. (actual 1 != expected 2)
I’ve ran this model fine when only changing the trend
factor into a hierarchical framework. The error started when I tried to change the yearly seasonality into the hierarchical framework. Model below:
t = np.arange(len(df_train)) / len(df_train)
s = np.linspace(0, np.max(t), n_changepoints+2)[1:-1]
a = (t[:, None] > s)*1
n_components = 10
def yearly_X(t, p=365.25, n=n_components):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
monthly_n_components = 5
def monthly_X(t, p=30.5, n=monthly_n_components):
x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
return np.concatenate((np.cos(x), np.sin(x)), axis = 1)
month = pd.get_dummies(df_train['month'])
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train['month'])
yearly_fourier = yearly_X(t, 365.25/len(t))
monthly_fourier = monthly_X(t, 30.5/len(t))
location_idxs, locations = pd.factorize(df_train['location'])
coords_={
"location":df_train['location'].unique(),
"n_components": np.arange(n_components*2),
"obs_id":np.arange(len(location_idxs))
}
with pm.Model(coords=coords_) as partial_pooled_model:
location_idxs = pm.ConstantData('location_idx', location_idxs, dims = 'obs_id')
t_ = pm.MutableData('t', t)
a_ = pm.MutableData('a', a)
s_ = pm.MutableData('s', s)
yearly_f_ = pm.MutableData('yearly_f_', yearly_fourier)
monthly_m_ = pm.MutableData('monthly_m_',monthly_fourier)
sept_month = pm.MutableData('sept_month',sept)
march_month = pm.MutableData('march_month', march)
eaches = pm.MutableData('eaches', df_train['eaches'])
mu_k = pm.Normal('mu_k', 0, 1)
sigma_k = pm.HalfCauchy('sigma_k', 3)
k = pm.Normal('k', mu_k, sigma_k, dims = 'location')
mu_m = pm.Normal('mu_m', 0, 1)
sigma_m = pm.HalfCauchy('sigma_m', 3)
m = pm.Normal('m', mu_m, sigma_m, dims = 'location')
delta = pm.Laplace('delta', 0, 0.1, shape=n_changepoints)
growth = pm.Deterministic('growth', k[location_idxs] + at.dot(a_, delta))
offset = pm.Deterministic('offset', (m[location_idxs] + at.dot(a_, -s_ * delta)))
trend = pm.Deterministic('trend', growth[location_idxs] * t_ + offset[location_idxs])
yearly_mu = pm.Normal('yearly_mu',0,1)
yearly_sigma = pm.HalfCauchy('yearly_sigma', 3)
yearly_beta = pm.Normal('yearly_beta', yearly_mu, yearly_sigma, dims=('n_components', 'location'))
yearly_seasonality = pm.Deterministic('yearly_seasonality',at.dot(yearly_f_, yearly_beta[location_idxs]))
monthly_beta = pm.Normal('monthly_beta', 0, 10, shape = monthly_n_components*2)
monthly_seasonality = pm.Deterministic('monthly_seasonality',at.dot(monthly_m_, monthly_beta))
sept_beta = pm.Normal('sept_beta', 0,10)
march_beta = pm.Normal('march_beta', 0,10)
predictions = pm.Deterministic('predictions', np.exp(trend[location_idxs] + yearly_seasonality[location_idxs] + monthly_seasonality + (sept_beta*sept_month) + (march_beta*march_month)))
pm.Poisson('obs',
predictions,
observed=eaches,
dims = 'obs_id')
trace = pymc.sampling_jax.sample_numpyro_nuts(tune=2000, draws = 2000)
That model won’t even print out a graphviz object.
When I change seasonality to
yearly_mu = pm.Normal('yearly_mu',0,1)
yearly_sigma = pm.HalfCauchy('yearly_sigma', 3)
yearly_beta = pm.Normal('yearly_beta', yearly_mu, yearly_sigma, shape = n_components*2, dims=( 'location'))
yearly_seasonality = pm.Deterministic('yearly_seasonality',at.dot(yearly_f_, yearly_beta[location_idxs]))
The graph prints, but does not sample throwing another dimensionality error. How should this variable be coded?