MvNormal for covarying intercept & slope in hierarchical model

I have a hierarchical model for a simulated data set whose code I attached in the end. The outcome variable y varies by two variables, “market” and “model”. All observations also have a time variable “t” associated with them and there is some change over time in y related to t.

A varying effects model that separately learns about the intercepts and the time slope separately is not too complicated and works, here’s the definition:

Working varying effects model

coords = {
    'market':np.arange(n_markets),
    'model':np.arange(n_models),
}
with pm.Model(coords=coords) as mdl2:
    market = pm.MutableData("market",train["market"].values)    
    model = pm.MutableData("model",train["model"].values)
    T = pm.MutableData("T",train['t'].values)
    mu_alpha = pm.Normal("mu_alpha", 7, 2)
    alpha = pm.Normal("alpha",mu=mu_alpha, sigma=2, dims=("market","model"))
    bT = pm.Normal("bT", 0, 1, dims=("market","model"))
    mu = pm.Deterministic("mu", alpha[market, model] + bT[market,model]*T)
    sigma = pm.HalfNormal("sigma",1)
    likelihood = pm.StudentT("y", mu=mu, sigma=sigma, nu=3,observed=train['y'].values)
    trace2 = sampling_jax.sample_numpyro_nuts(chains=2)
    assert not trace2['sample_stats']['diverging'].values.any()
    pm.sample_posterior_predictive(trace2,extend_inferencedata=True)

I wanted to use a multivariate normal distribution to allow the market and model level parameters for the intercept and the slope to covary, but I’m apparently struggling handling the dimensionality correctly.

Attempted MvN model

coords["two"] = np.arange(2)
with pm.Model(coords=coords) as mdl4:
    market = pm.MutableData("market", train["market"].values)
    model = pm.MutableData("model", train["model"].values)
    T = pm.MutableData("T", train['t'].values)
    
    mu_alpha = pm.Normal("mu_alpha", 7, 2)
    cov_matrix = pm.LKJCholeskyCov("cov_matrix", n=2, eta=1, sd_dist=pm.HalfNormal.dist(1))
    alpha_bT = pm.MvNormal("alpha_bT", mu=[mu_alpha, 0], chol=cov_matrix[0], dims=("two","market","model"))  
    alpha = pm.Deterministic("alpha", alpha_bT[0,:,:], dims=("market","model"))
    bT = pm.Deterministic("bT", alpha_bT[1,:,:], dims=("market","model"))

    mu = alpha[market, model] + bT[market, model] * T
    sigma = pm.HalfNormal("sigma", 1)
    
    likelihood = pm.StudentT("y", mu=mu, sigma=sigma, nu=3, observed=train['y'].values)
    
    trace4 = pm.sample(chains=2)
    assert not trace4['sample_stats']['diverging'].values.any()
    pm.sample_posterior_predictive(trace4, extend_inferencedata=True)


This fails with ValueError: Invalid dimension for value: 3

alpha_bT.eval().shape returns (2,3,2) , although the dimensionality of the coords in order is (2,3,3), which puzzles me. I assume this is connected to cov_matrix not being specified correctly, because I’ve so far failed to define that in a way that I get a separate LKJCholeskyCov per subgroup, which I assume to be required.

Can someone help me figure out what I’m doing wrong ? I couldn’t find any examples where a MvN is used in conjunction with dims, is that just happenstance or is my approach to this wrong?

Library versions:

numpy: 1.21.6
pandas: 1.3.3
pymc: 4.0.1
arviz: 0.12.1

Data generating code:

import numpy as np
import pandas as pd
from scipy import stats
import pymc as pm
from pymc import sampling_jax
import arviz as az

rng = np.random.default_rng(0)
N = 1000
n_markets = 3
n_models = 4
markets = rng.choice(np.arange(n_markets),size=N, p=[0.5,0.3,0.2])
models = rng.choice(np.arange(n_models),size=N, p=[0.25,0.25, 0.1,0.4])

t = rng.uniform(0,1,size=N)
years = np.zeros(shape=(N,))
years[t<0.2] = 0
years[(t>=0.2)&(t<0.4)] = 1
years[(t>=0.4)&(t<0.6)] = 2
years[(t>=0.6)&(t<0.8)] = 3
years[t>=0.8] = 4
df = pd.DataFrame({'market':markets, 'model':models, 't':t,'year':np.array(years).astype(int), 'y':np.zeros((N,))})

# specify group parameters
mu_model = [7,5,4,3]
mu_market = [2, -1, 0]
b_year = rng.normal(0.0,1,size=(n_markets,n_models))



# make y
y = np.zeros(shape=(N,))
for market in np.arange(n_markets):
    for model in np.arange(n_models):
        df.loc[(df['market']==market)&(df['model']==model),'y'] = stats.norm.rvs(loc=mu_model[model] + mu_market[market] + b_year[market,model]*df.loc[(df['market']==market)&(df['model']==model),'year'], scale=1)
        

# reassign some subgroups with non-linear changes
df.loc[(df['market']==0)&(df['model']==0),'y'] = stats.norm.rvs(loc=mu_model[0] + mu_market[0] + np.sin(df.loc[(df['market']==0)&(df['model']==0),'t']*12), scale=0.5) # sinus trend
df.loc[(df['market']==1)&(df['model']==1),'y'] = stats.norm.rvs(loc=mu_model[1] + mu_market[1] + np.cos(df.loc[(df['market']==1)&(df['model']==1),'t']*15), scale=0.9) # cosinus trend
df.loc[(df['market']==1)&(df['model']==0)&(df['t']>0.6),'y'] = df.loc[(df['market']==1)&(df['model']==0)&(df['t']>0.6),'y']+4 # demand shift

df.loc[(df['market']==n_markets-1)&(df['model']==n_models-1),'y'] = stats.norm.rvs(loc=mu_model[n_models-1] + 
                                                                               mu_market[n_markets-1] + 
                                                                               2 +
                                                                               3*df.loc[(df['market']==n_markets-1)&(df['model']==n_models-1),'t'].values**3+
                                                                               5*df.loc[(df['market']==n_markets-1)&(df['model']==n_models-1),'t'].values**2+
                                                                               -1*df.loc[(df['market']==n_markets-1)&(df['model']==n_models-1),'t'],
                                                                                 scale=0.5) # cubic polynomial trend


# specify train and test sets
train = df.loc[df['t']<0.8].copy()
test = df.loc[df['t']>0.8].copy()