Hello,
I’ve tried some different things and here is where my model is at. It’s a lot to read, but basically it’s a hierarchical model that starts with a MvGaussianRandomWalk (technically one for the intercept and one for the coefficient) on the first level (across 100 time points), and then has levels for the 4 categorical variables. The 4th variable is the year (I have 4 years of data), the idea being I will use the coefficients from the level above for inference to avoid overfitting. I am almost certain there’s a better way to do this, as my trace dataframe has 450,000 rows. I’m also concerned because I saw in this post: Using Multivariate distributions for price optimization that I possibly shouldn’t use covariance matrices, which I’m doing. Any advice would be greatly welcome!
Thanks
N=100 #length of time series
lagged=theano.shared(df['lagged'].values) #previous value in time series
vals=theano.shared(df['vals'].values)
with pm.Model() as model:
global_beta_mu=pm.Normal('global_beta_mu',mu=0,sd=100,shape=(N-1,var1_count))
#var1_count is 5 (the number of unique categories in my first variable)
global_alpha_mu=pm.Normal('global_alpha_mu',mu=0,sd=100,shape=(N-1,var1_count))
packed_L_β = pm.LKJCholeskyCov('packed_L_beta', n=var1_count, eta=2., sd_dist=pm.HalfCauchy.dist(2.5))
L_β = pm.expand_packed_triangular(var1_count, packed_L_β)
packed_L_α = pm.LKJCholeskyCov('packed_L_alpha', n=var1_count, eta=2., sd_dist=pm.HalfCauchy.dist(2.5))
L_α = pm.expand_packed_triangular(var1_count, packed_L_α)
alpha = pm.MvGaussianRandomWalk('alpha', mu=global_alpha_mu[:,var1_indexes], shape=(N, var1_count), chol=L_α)
beta = pm.MvGaussianRandomWalk('beta', mu=global_beta_mu[:,var1_indexes], shape=(N, var1_count), chol=L_β)
#vars12_count is 10 (the unique combinations of categories from first 2 variables)
vars12_alpha_sd=pm.HalfCauchy('vars12_alpha_sd',beta=4,shape=(N,vars12_count))
vars12_beta_sd=pm.HalfCauchy('vars12_beta_sd',beta=4,shape=(N,vars12_count))
vars12_alpha_offset=pm.Normal('vars12_alpha_offset',mu=0,sd=1,shape=(N,vars12_count))
vars12_beta_offset=pm.Normal('vars12_beta_offset',mu=0,sd=1,shape=(N,vars12_count))
vars12_alpha_m=pm.Deterministic('vars12_alpha_m',alpha[:,vars12_indexes] +
vars12_alpha_offset * vars12_alpha_sd[:,var12_indexes])
vars12_beta_m=pm.Deterministic('vars12_beta_m',beta[:,vars12_indexes] +
vars12_beta_offset*vars12_beta_sd[:,var12_indexes])
#vars123_count is 130 (unique combinations of first three variables)
vars123_alpha_sd = pm.HalfCauchy('vars123_alpha_sd', beta=4, shape=(N,vars123_count))
vars123_beta_sd = pm.HalfCauchy('vars123_beta_sd', beta=4, shape=(N,vars123_count))
vars123_alpha_offset=pm.Normal('vars123_alpha_offset',mu=0,sd=1,shape=(N,vars123_count))
vars123_beta_offset=pm.Normal('vars123_beta_offset',mu=0,sd=1,shape=(N,vars123_count))
vars123_alpha_m=pm.Deterministic('vars123_alpha_m',vars12_alpha_m[:,vars123_indexes] +
vars123_alpha_offset*vars123_alpha_sd[:,vars123_indexes])
vars123_beta_m=pm.Deterministic('vars123_beta_m',vars12_beta_m[:,vars123_indexes]+
vars123_beta_offset*vars123_beta_sd[:,vars123_indexes])
#vars1234_count is 520 (unique combinations of all four variables)
vars1234_alpha_sd = pm.HalfCauchy('vars1234_alpha_sd', beta=4, shape=(N,vars1234_count))
vars1234_beta_sd = pm.HalfCauchy('vars1234_beta_sd', beta=4, shape=(N,vars1234_count))
vars1234_alpha_offset=pm.Normal('vars1234_alpha_offset',mu=0,sd=1,shape=(N,vars1234_count))
vars1234_beta_offset=pm.Normal('vars1234_beta_offset',mu=0,sd=1,shape=(N,vars1234_count))
vars1234_alpha_m=pm.Deterministic('vars1234_alpha_m',vars123_alpha_m[:,vars1234_indexes]+
var123_alpha_offset*vars1234_alpha_sd[:,vars1234_indexes])
vars1234_beta_m=pm.Deterministic('vars1234_beta_m',vars123_beta_m[:,vars1234_indexes] +
occ_month_yr_beta_offset*vars1234_beta_sd[:,vars1234_indexes])
regression=pm.Deterministic('regression',
vars1234_alpha_m[df['dates'].values,df['vars1234_index'].values]+
vars1234_beta_m[df['vals'].values,
df['vars1234_index'].values]*lagged)
obs_error=pm.Uniform('obs_error',lower=0,upper=100)
likelihood=pm.Normal('likelihood',mu=regression,sd=obs_error,observed=vals)
trace=pm.sample(1000)