How do I correctly add an additional level to a hierarchical model?

jordan.howell2 · July 12, 2022, 12:53pm

Hello.

With the generous help of many people through this site, I have my first hierarchical model working. This model is a sales forecasting model (based on prophet/timeseers/etc) for one item sold in multiple stores. I now need to expand this out to many items sold in multiple stores.

Here is a toy dataset with the working model:

import scipy.stats as stats
import pandas as pd
import pymc as pm
import numpy as np

import pymc.sampling_jax



import aesara
from aesara import tensor as at
import arviz as az

data = stats.poisson(mu=[5,2,1,7,2]).rvs([72, 5]).T.ravel()
dates = pd.date_range('2016-01-01', freq='M', periods=72)
locations = [f'location_{i}' for i in range(5)]
df = pd.DataFrame(data, index=pd.MultiIndex.from_product([dates, locations]), columns=['eaches'])
df.reset_index(inplace=True)
df.rename(columns = {'level_0':'date','level_1':'location'}, inplace=True)
df.set_index(['date', 'location'],inplace=True)
df_train = df.loc[:'2020-12-31',] 
df_test = df.loc['2020-01-31':]

def create_fourier_features(t, n, p=365.25):
    x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
    return np.concatenate((np.cos(x), np.sin(x)), axis = 1)

location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))

t = time_idxs/max(time_idxs)

month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)

n_yearly = 10
n_monthly = 5

# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)

location_idxs, locations = pd.factorize(df_train.index.get_level_values(1))
time_idxs, times = pd.factorize(df_train.index.get_level_values(0))
n_changepoints = 8


s = np.linspace(0, np.max(t), n_changepoints+2)[1:-1]


a = (t[:, None] > s)*1

n_components = 10

def create_fourier_features(t, n, p=365.25):
    x = 2 * np.pi * (np.arange(n)+1) * t[:, None] / p
    return np.concatenate((np.cos(x), np.sin(x)), axis = 1)

month = pd.get_dummies(df_train.index.get_level_values(0).month)
sept = np.array(month[9])
march = np.array(month[3])
month = np.array(df_train.index.get_level_values(0).month)

n_yearly = 10
n_monthly = 5

# Weekly data
yearly_fourier = create_fourier_features(t, n_yearly, 12)
monthly_fourier = create_fourier_features(t, n_monthly, 1)

coords_={
    "location":locations,
    "yearly_components": [f'yearly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_yearly)],
    "monthly_components":[f'monthly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(n_monthly)],
    'changepoints':df_train.index.get_level_values(0)[np.argwhere(np.diff(a, axis=0) != 0)[:, 0]],
    "obs_id":[f'{loc}_{time.year}_month_{time.month}' for time, loc in df_train.index.values]
}

with pm.Model(coords=coords_) as partial_pooled_model:
    
    location_idxs = pm.ConstantData('location_idx', location_idxs, dims = 'obs_id')
    t_ = pm.MutableData('t', t)
    a_ = pm.MutableData('a', a)
    s_ = pm.MutableData('s', s)
    yearly_f_ = pm.MutableData('yearly_f_', yearly_fourier)
    monthly_m_ = pm.MutableData('monthly_m_',monthly_fourier)
    sept_month = pm.MutableData('sept_month',sept)
    march_month = pm.MutableData('march_month', march)
    eaches = pm.MutableData('eaches', df_train['eaches'])

    mu_k = pm.Normal('mu_k', 0, 1)
    sigma_k = pm.HalfCauchy('sigma_k', 5)
    k = pm.Normal('k', mu_k, sigma_k, dims = 'location')
    
    mu_m = pm.Normal('mu_m', 0, 1)
    sigma_m = pm.HalfCauchy('sigma_m', 5)
    m = pm.Normal('m', mu_m, sigma_m, dims = 'location')
    
    delta = pm.Laplace('delta', 0, 0.1, dims=['location', 'changepoints'])

    growth = pm.Deterministic('growth', k[location_idxs] + (a_[time_idxs] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    offset = pm.Deterministic('offset', m[location_idxs] + (a_[time_idxs] * -s_[None, :] * delta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    trend = pm.Deterministic('trend', growth[location_idxs] * t_[time_idxs] + offset[location_idxs], dims=['obs_id'])

    yearly_mu = pm.Normal('yearly_mu', 0, 0.1)
    yearly_sigma = pm.HalfNormal('yearly_sigma', sigma=0.1)
    yearly_beta = pm.Normal('yearly_beta', yearly_mu, yearly_sigma, dims=['location', 'yearly_components'])
    yearly_seasonality = pm.Deterministic('yearly_seasonality', (yearly_f_[time_idxs] * yearly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])

    monthly_mu = pm.Normal('monthly_mu', 0, 0.1)
    monthly_sigma = pm.HalfNormal('monthly_sigma', sigma=0.1)
    monthly_beta = pm.Normal('monthly_beta', monthly_mu, monthly_sigma, dims=['location', 'monthly_components'])
    monthly_seasonality = pm.Deterministic('monthly_seasonality', (monthly_m_[time_idxs] * monthly_beta[location_idxs, :]).sum(axis=1), dims=['obs_id'])
    
    sept_mu = pm.Normal('sept_mu', 0, 1)
    sept_sigma = pm.HalfCauchy('sept_sigma', 5)
    sept_beta = pm.Normal('sept_beta', sept_mu, sept_sigma, dims = 'location')
    
    march_mu = pm.Normal('march_mu', 0, 1)
    march_sigma = pm.HalfCauchy('march_sigma', 5)
    march_beta = pm.Normal('march_beta', march_mu, march_sigma, dims = 'location')

    predictions =  pm.Deterministic('predictions', np.exp(trend[location_idxs] + yearly_seasonality[location_idxs]  + monthly_seasonality[location_idxs] + (sept_beta[location_idxs]*sept_month) + (march_beta[location_idxs]*march_month)))


    pm.Poisson('obs',
               predictions,
               observed=eaches,
               dims = 'obs_id')
    
    # trace = pm.sample(tune=3000, draws=4000)
    trace = pymc.sampling_jax.sample_numpyro_nuts(tune=3000, draws = 4000)

To expand this to all products in multiple locations, do I need to concatenate product and location or add a product dim to each piece that uses dims in the model? Just trying to get a sense of where to start.

drbenvincent · July 12, 2022, 6:50pm

Have you familiarised yourself with he various examples of hierarchical modelling? Posts tagged hierarchical model — PyMC example gallery

If not, it’s very worth doing that before going further

jordan.howell2 · July 14, 2022, 11:49am

Thanks. I did try to mimic this particular case, A Primer on Bayesian Methods for Multilevel Modeling — PyMC example gallery. In this, when adding a level, they simply add that to the ‘dims’ and indexes. However when I do that, I get the following error.

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_5034/940735004.py in <module>
      1 with pm.Model(coords=coords_) as partial_pooled_model:
      2 
----> 3     location_idxs = pm.MutableData('location_idx', location_idxs, dims = 'obs_id')
      4     item_idxs = pm.MutableData('item_idx', item_idxs, dims = 'obs_id')
      5     t_ = pm.MutableData('t', t)

/opt/conda/lib/python3.7/site-packages/pymc/data.py in MutableData(name, value, dims, coords, export_index_as_coords, **kwargs)
    563         export_index_as_coords=export_index_as_coords,
    564         mutable=True,
--> 565         **kwargs,
    566     )
    567     return cast(SharedVariable, var)

/opt/conda/lib/python3.7/site-packages/pymc/data.py in Data(name, value, dims, coords, export_index_as_coords, mutable, **kwargs)
    673             )
    674     if mutable:
--> 675         x = aesara.shared(arr, name, **kwargs)
    676     else:
    677         x = at.as_tensor_variable(arr, name, **kwargs)

/opt/conda/lib/python3.7/site-packages/aesara/compile/sharedvalue.py in shared(value, name, strict, allow_downcast, **kwargs)
    268         if isinstance(value, Variable):
    269             raise TypeError(
--> 270                 "Shared variable constructor needs numeric "
    271                 "values and not symbolic variables."
    272             )

TypeError: Shared variable constructor needs numeric values and not symbolic variables.

Before I added the item_idxs level, this worked with just the location_idx level. Here is my model code.

with pm.Model(coords=coords_) as partial_pooled_model:
    
    location_idxs = pm.MutableData('location_idx', location_idxs, dims = 'obs_id')
    item_idxs = pm.MutableData('item_idx', item_idxs, dims = 'obs_id')
    t_ = pm.MutableData('t', t)
    a_ = pm.MutableData('a', a)
    s_ = pm.MutableData('s', s)
    eaches = pm.MutableData('eaches', df_train['eaches'])

    mu_k = pm.Normal('mu_k', 0, 1)
    sigma_k = pm.HalfCauchy('sigma_k', 5)
    k = pm.Normal('k', mu_k, sigma_k, dims = ['item', 'location'])
    
    mu_m = pm.Normal('mu_m', 0, 1)
    sigma_m = pm.HalfCauchy('sigma_m', 5)
    m = pm.Normal('m', mu_m, sigma_m, dims = ['item', 'location'])
    
    delta = pm.Laplace('delta', 0, 0.1, dims=['item', 'location', 'changepoints'])

    growth = pm.Deterministic('growth', k[item_idxs, location_idxs] + (a_[time_idxs] * delta[item_idxs, location_idxs, :]).sum(axis=1), dims=['obs_id'])

    offset = pm.Deterministic('offset', m[item_idxs, location_idxs] + (a_[time_idxs] * -s_[None, :] * delta[item_idxs, location_idxs, :]).sum(axis=1), dims=['obs_id'])

    trend = pm.Deterministic('trend', growth[item_idxs, location_idxs] * t_[time_idxs] + offset[item_idxs, location_idxs], dims=['obs_id'])

    predictions =  pm.Deterministic('predictions', np.exp(trend[item_idxs, location_idxs]))

    pm.Poisson('obs',
               predictions,
               observed=eaches,
               dims = 'obs_id')

    # trace = pymc.sampling_jax.sample_numpyro_nuts(tune=3000, draws = 4000)
pm.model_to_graphviz( partial_pooled_model)

I’m not sure why it’s calling for a numeric value when I didn’t use one for locations in the previous version.

drbenvincent · July 14, 2022, 2:29pm

So I’m not an expert in Aesara yet, but my tips would be:

try using the regular pm.sample for the moment as jax sampling is experimental
try providing dimensions for the mutable data, just to help find any implementation issues
but the error seems to be pointing to the coords. Maybe double check what are you feeding in to obs_id in the coords_ dict. It could be that what’s going in to the fstring is not doing exactly what you intend

jordan.howell2 · July 14, 2022, 6:28pm

It’s not a sampling problem as I can’t even get the graphviz to populate without trying to sample.

I think the obs_id is doing what I want in coords_. I want an observation for each product, in each store location, for each month in the dataset, which is has. I’m not exactly sure where to go from here but will keep on plugging.

Thanks for the help thus far.

Topic		Replies	Views
How to properly use dims on a 3 level hierarchical model v5 modeling	1	587	July 19, 2022
Hierarchical Model with Multiple Predictors	5	1673	August 15, 2023
I'm getting an index error on my hierarchical model v5	2	490	June 17, 2022
Multiple Hierarchy Level Modeling with Errors in PYMC v5 bug , modeling	4	201	May 15, 2024
Creating hierarchical models using 3 dimensional xarray data Questions	20	2023	April 7, 2021

How do I correctly add an additional level to a hierarchical model?

Related topics