Hierarchical Modeling Index Error

Hello. I’m trying to learn hierarchical modeling with a simple regeression model based on the Walmart sales data set found on Kaggle. The deptartments are 99 different departments represented by an integer. I tried following along with the tutorial found at https://docs.pymc.io/notebooks/GLM-hierarchical.html.

I keep getting an out of bounds error though. Models and error message below. Thank you.

dept_numbers = X_train[‘Dept’].unique()
dept_idx = X_train[‘Dept’].values
n_dept = len(X_train[‘Dept’].unique())

with pm.Model() as sales_model:

#define the priors
mu_a = pm.Normal('mu_a', mu = 0, sd = 100)
sigma_a = pm.HalfCauchy('sigma_a', 5)
mu_b = pm.Normal('mu_b', mu = 0, sd = 100)
sigma_b = pm.HalfCauchy('sigma_b', 5)


alpha = pm.Normal('intercept', mu= mu_a, sd = sigma_a, shape = n_dept)
beta_1 = pm.Normal('dept', mu = mu_b, sd = sigma_b, shape = n_dept)
beta_2 = pm.Normal('IsHoliday_T', mu = 0, sd = 1, shape = X_train['IsHoliday_True'].shape)
#beta_3 = pm.Normal('Week', mu=0, sd = 10)
#beta_4 = pm.Normal('Fuel_Prices', mu=0, sd = 10)
#beta_5 = pm.Normal('Temperature', mu=0, sd = 10)
#beta_6 = pm.Normal('Markdown1', mu=0, sd = 10)
#beta_7 = pm.Normal('Markdown2', mu=0, sd = 10)
#beta_8 = pm.Normal('Markdown4', mu=0, sd = 10)
#beta_9 = pm.Normal('Markdown5', mu=0, sd = 10)
#beta_10 = pm.Normal('CPI', mu=0, sd = 10)
#beta_11 = pm.Normal('Unemployment', mu=0, sd = 10)

s = pm.Normal('sd', mu = 0, sd = 50)

#define the likelihood
mu = alpha[dept_idx] + beta_1[dept_idx]*X_train['Dept'].values + 
beta_2*X_train['IsHoliday_True'].values

y = pm.StudentT('sales', nu=len(Y_train)-1, mu = mu, observed = Y_train, shape = 
Y_train.shape)

trace = pm.sample(draws=5000 ,init='advi' ,progressbar=True)
print(sales_model.check_test_point())

This is the errror:


IndexError Traceback (most recent call last)
in ()
26
27 #define the likelihood
—> 28 mu = alpha[dept_idx] + beta_1[dept_idx]X_train[‘Dept’].values + beta_2X_train[‘IsHoliday_True’].values
29
30 y = pm.StudentT(‘sales’, nu=len(Y_train)-1, mu = mu, observed = Y_train, shape = Y_train.shape)

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\var.py in getitem(self, args)
568 TensorVariable, TensorConstant,
569 theano.tensor.sharedvar.TensorSharedVariable))):
→ 570 return self.take(args[axis], axis)
571 else:
572 return theano.tensor.subtensor.advanced_subtensor(self, *args)

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\var.py in take(self, indices, axis, mode)
612
613 def take(self, indices, axis=None, mode=‘raise’):
→ 614 return theano.tensor.subtensor.take(self, indices, axis, mode)
615
616 # COPYING

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\subtensor.py in take(a, indices, axis, mode)
2429 return advanced_subtensor1(a.flatten(), indices)
2430 elif axis == 0:
→ 2431 return advanced_subtensor1(a, indices)
2432 else:
2433 if axis < 0:

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\op.py in call(self, *inputs, **kwargs)
672 thunk.outputs = [storage_map[v] for v in node.outputs]
673
→ 674 required = thunk()
675 assert not required # We provided all inputs
676

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\op.py in rval()
860
861 def rval():
→ 862 thunk()
863 for o in node.outputs:
864 compute_map[o][0] = True

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\cc.py in call(self)
1733 print(self.error_storage, file=sys.stderr)
1734 raise
→ 1735 reraise(exc_type, exc_value, exc_trace)
1736
1737

~\AppData\Local\Continuum\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.traceback is not tb:
692 raise value.with_traceback(tb)
→ 693 raise value
694 finally:
695 value = None

IndexError: index 79 is out of bounds for size 77

Nevermind. I figured it out. I added cat.codes for each department, then made those the idx vector.