How do I correctly add an additional level to a hierarchical model?

Hello.

With the generous help of many people through this site, I have my first hierarchical model working. This model is a sales forecasting model (based on prophet/timeseers/etc) for one item sold in multiple stores. I now need to expand this out to many items sold in multiple stores.

Here is a toy dataset with the working model:

import scipy.stats as stats
import pandas as pd
import pymc as pm
import numpy as np

import pymc.sampling_jax



import aesara
from aesara import tensor as at
import arviz as az

data = stats.poisson(mu=[5,2,1,7,2]).rvs([72, 5]).T.ravel()
dates = pd.date_range('2016-01-01', freq='M', periods=72)
locations = [f'location_{i}' for i in range(5)]
df = pd.DataFrame(data, index=pd.MultiIndex.from_product([dates, locations]), columns=['eaches'])
df.reset_index(inplace=True)
df.rename(columns = {'level_0':'date','level_1':'location'}, inplace=True)
df.set_index(['date', 'location'],inplace=True)
df_train = df.loc[:'2020-12-31',] 
df_test = df.loc['2020-01-31':]

def create_fourier_features(t, n, p=365.25):
    x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
    return np.concatenate((np.cos(x), np.sin(x)), axis = 1)

location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))

t = time_idxs/max(time_idxs)

month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)

n_yearly = 10
n_monthly = 5

# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)

location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))
n_changepoints = 8


s = np.linspace(0, np.max(t), n_changepoints+2)[1:-1]


a = (t[:, None] > s)*1

n_components = 10

def create_fourier_features(t, n, p=365.25):
    x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
    return np.concatenate((np.cos(x), np.sin(x)), axis = 1)

month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)

n_yearly = 10
n_monthly = 5

# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)

coords_={
    "location":locations,
    "yearly_components": [f'yearly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_yearly)],
    "monthly_components":[f'monthly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_monthly)],
    'changepoints':df_train.index.get_level_values(0)[np.argwhere(np.diff(a, axis=0) != 0)[:, 0]],
    "obs_id":[f'{loc}_{time.year}_month_{time.month}' for time, loc in df_train.index.values]
}

with pm.Model(coords=coords_) as partial_pooled_model:
    
    location_idxs = pm.ConstantData('location_idx', location_idxs, dims = 'obs_id')
    t_ = pm.MutableData('t', t)
    a_ = pm.MutableData('a', a)
    s_ = pm.MutableData('s', s)
    yearly_f_ = pm.MutableData('yearly_f_', yearly_fourier)
    monthly_m_ = pm.MutableData('monthly_m_',monthly_fourier)
    sept_month = pm.MutableData('sept_month',sept)
    march_month = pm.MutableData('march_month', march)
    eaches = pm.MutableData('eaches', df_train['eaches'])

    mu_k = pm.Normal('mu_k', 0, 1)
    sigma_k = pm.HalfCauchy('sigma_k', 5)
    k = pm.Normal('k', mu_k, sigma_k, dims = 'location')
    
    mu_m = pm.Normal('mu_m', 0, 1)
    sigma_m = pm.HalfCauchy('sigma_m', 5)
    m = pm.Normal('m', mu_m, sigma_m, dims = 'location')
    
    delta = pm.Laplace('delta', 0, 0.1, dims=['location', 'changepoints'])

    growth = pm.Deterministic('growth', k[location_idxs] + (a_[time_idxs] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    offset = pm.Deterministic('offset', m[location_idxs] + (a_[time_idxs] * -s_[None, :] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    trend = pm.Deterministic('trend', growth[location_idxs] * t_[time_idxs] + offset[location_idxs], dims=['obs_id'])

    yearly_mu = pm.Normal('yearly_mu', 0, 0.1)
    yearly_sigma = pm.HalfNormal('yearly_sigma', sigma=0.1)
    yearly_beta = pm.Normal('yearly_beta', yearly_mu, yearly_sigma, dims=['location', 'yearly_components'])
    yearly_seasonality = pm.Deterministic('yearly_seasonality', (yearly_f_[time_idxs] * yearly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])

    monthly_mu = pm.Normal('monthly_mu', 0, 0.1)
    monthly_sigma = pm.HalfNormal('monthly_sigma', sigma=0.1)
    monthly_beta = pm.Normal('monthly_beta', monthly_mu, monthly_sigma, dims=['location', 'monthly_components'])
    monthly_seasonality = pm.Deterministic('monthly_seasonality', (monthly_m_[time_idxs] * monthly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    
    sept_mu = pm.Normal('sept_mu', 0, 1)
    sept_sigma = pm.HalfCauchy('sept_sigma', 5)
    sept_beta = pm.Normal('sept_beta', sept_mu, sept_sigma, dims = 'location')
    
    march_mu = pm.Normal('march_mu', 0, 1)
    march_sigma = pm.HalfCauchy('march_sigma', 5)
    march_beta = pm.Normal('march_beta', march_mu, march_sigma, dims = 'location')

    predictions =  pm.Deterministic('predictions', np.exp(trend[location_idxs] + yearly_seasonality[location_idxs]  + monthly_seasonality[location_idxs] + (sept_beta[location_idxs]*sept_month) + (march_beta[location_idxs]*march_month)))


    pm.Poisson('obs',
               predictions,
               observed=eaches,
               dims = 'obs_id')
    
    # trace = pm.sample(tune=3000, draws=4000)
    trace = pymc.sampling_jax.sample_numpyro_nuts(tune=3000, draws = 4000)

To expand this to all products in multiple locations, do I need to concatenate product and location or add a product dim to each piece that uses dims in the model? Just trying to get a sense of where to start.

Have you familiarised yourself with he various examples of hierarchical modelling? Posts tagged hierarchical model — PyMC example gallery

If not, it’s very worth doing that before going further :slight_smile:

Thanks. I did try to mimic this particular case, A Primer on Bayesian Methods for Multilevel Modeling — PyMC example gallery. In this, when adding a level, they simply add that to the ‘dims’ and indexes. However when I do that, I get the following error.

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_5034/940735004.py in <module>
      1 with pm.Model(coords=coords_) as partial_pooled_model:
      2 
----> 3     location_idxs = pm.MutableData('location_idx', location_idxs, dims = 'obs_id')
      4     item_idxs = pm.MutableData('item_idx', item_idxs, dims = 'obs_id')
      5     t_ = pm.MutableData('t', t)

/opt/conda/lib/python3.7/site-packages/pymc/data.py in MutableData(name, value, dims, coords, export_index_as_coords, **kwargs)
    563         export_index_as_coords=export_index_as_coords,
    564         mutable=True,
--> 565         **kwargs,
    566     )
    567     return cast(SharedVariable, var)

/opt/conda/lib/python3.7/site-packages/pymc/data.py in Data(name, value, dims, coords, export_index_as_coords, mutable, **kwargs)
    673             )
    674     if mutable:
--> 675         x = aesara.shared(arr, name, **kwargs)
    676     else:
    677         x = at.as_tensor_variable(arr, name, **kwargs)

/opt/conda/lib/python3.7/site-packages/aesara/compile/sharedvalue.py in shared(value, name, strict, allow_downcast, **kwargs)
    268         if isinstance(value, Variable):
    269             raise TypeError(
--> 270                 "Shared variable constructor needs numeric "
    271                 "values and not symbolic variables."
    272             )

TypeError: Shared variable constructor needs numeric values and not symbolic variables.

Before I added the item_idxs level, this worked with just the location_idx level. Here is my model code.

with pm.Model(coords=coords_) as partial_pooled_model:
    
    location_idxs = pm.MutableData('location_idx', location_idxs, dims = 'obs_id')
    item_idxs = pm.MutableData('item_idx', item_idxs, dims = 'obs_id')
    t_ = pm.MutableData('t', t)
    a_ = pm.MutableData('a', a)
    s_ = pm.MutableData('s', s)
    eaches = pm.MutableData('eaches', df_train['eaches'])

    mu_k = pm.Normal('mu_k', 0, 1)
    sigma_k = pm.HalfCauchy('sigma_k', 5)
    k = pm.Normal('k', mu_k, sigma_k, dims = ['item', 'location'])
    
    mu_m = pm.Normal('mu_m', 0, 1)
    sigma_m = pm.HalfCauchy('sigma_m', 5)
    m = pm.Normal('m', mu_m, sigma_m, dims = ['item', 'location'])
    
    delta = pm.Laplace('delta', 0, 0.1, dims=['item', 'location', 'changepoints'])

    growth = pm.Deterministic('growth', k[item_idxs, location_idxs] + (a_[time_idxs] * delta[item_idxs, location_idxs, :]).sum(axis=1), dims=['obs_id'])

    offset = pm.Deterministic('offset', m[item_idxs, location_idxs] + (a_[time_idxs] * -s_[None, :] * delta[item_idxs, location_idxs, :]).sum(axis=1), dims=['obs_id'])

    trend = pm.Deterministic('trend', growth[item_idxs, location_idxs] * t_[time_idxs] + offset[item_idxs, location_idxs], dims=['obs_id'])

    predictions =  pm.Deterministic('predictions', np.exp(trend[item_idxs, location_idxs]))

    pm.Poisson('obs',
               predictions,
               observed=eaches,
               dims = 'obs_id')

    # trace = pymc.sampling_jax.sample_numpyro_nuts(tune=3000, draws = 4000)
pm.model_to_graphviz( partial_pooled_model)

I’m not sure why it’s calling for a numeric value when I didn’t use one for locations in the previous version.

So I’m not an expert in Aesara yet, but my tips would be:

  • try using the regular pm.sample for the moment as jax sampling is experimental
  • try providing dimensions for the mutable data, just to help find any implementation issues
  • but the error seems to be pointing to the coords. Maybe double check what are you feeding in to obs_id in the coords_ dict. It could be that what’s going in to the fstring is not doing exactly what you intend

It’s not a sampling problem as I can’t even get the graphviz to populate without trying to sample.

I think the obs_id is doing what I want in coords_. I want an observation for each product, in each store location, for each month in the dataset, which is has. I’m not exactly sure where to go from here but will keep on plugging.

Thanks for the help thus far.