Hello. I’m trying to learn hierarchical modeling with a simple regeression model based on the Walmart sales data set found on Kaggle. The deptartments are 99 different departments represented by an integer. I tried following along with the tutorial found at https://docs.pymc.io/notebooks/GLM-hierarchical.html.
I keep getting an out of bounds error though. Models and error message below. Thank you.
dept_numbers = X_train[‘Dept’].unique()
dept_idx = X_train[‘Dept’].values
n_dept = len(X_train[‘Dept’].unique())with pm.Model() as sales_model:
#define the priors mu_a = pm.Normal('mu_a', mu = 0, sd = 100) sigma_a = pm.HalfCauchy('sigma_a', 5) mu_b = pm.Normal('mu_b', mu = 0, sd = 100) sigma_b = pm.HalfCauchy('sigma_b', 5) alpha = pm.Normal('intercept', mu= mu_a, sd = sigma_a, shape = n_dept) beta_1 = pm.Normal('dept', mu = mu_b, sd = sigma_b, shape = n_dept) beta_2 = pm.Normal('IsHoliday_T', mu = 0, sd = 1, shape = X_train['IsHoliday_True'].shape) #beta_3 = pm.Normal('Week', mu=0, sd = 10) #beta_4 = pm.Normal('Fuel_Prices', mu=0, sd = 10) #beta_5 = pm.Normal('Temperature', mu=0, sd = 10) #beta_6 = pm.Normal('Markdown1', mu=0, sd = 10) #beta_7 = pm.Normal('Markdown2', mu=0, sd = 10) #beta_8 = pm.Normal('Markdown4', mu=0, sd = 10) #beta_9 = pm.Normal('Markdown5', mu=0, sd = 10) #beta_10 = pm.Normal('CPI', mu=0, sd = 10) #beta_11 = pm.Normal('Unemployment', mu=0, sd = 10) s = pm.Normal('sd', mu = 0, sd = 50) #define the likelihood mu = alpha[dept_idx] + beta_1[dept_idx]*X_train['Dept'].values + beta_2*X_train['IsHoliday_True'].values y = pm.StudentT('sales', nu=len(Y_train)-1, mu = mu, observed = Y_train, shape = Y_train.shape) trace = pm.sample(draws=5000 ,init='advi' ,progressbar=True) print(sales_model.check_test_point())
This is the errror:
IndexError Traceback (most recent call last)
in ()
26
27 #define the likelihood
—> 28 mu = alpha[dept_idx] + beta_1[dept_idx]X_train[‘Dept’].values + beta_2X_train[‘IsHoliday_True’].values
29
30 y = pm.StudentT(‘sales’, nu=len(Y_train)-1, mu = mu, observed = Y_train, shape = Y_train.shape)~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\var.py in getitem(self, args)
568 TensorVariable, TensorConstant,
569 theano.tensor.sharedvar.TensorSharedVariable))):
→ 570 return self.take(args[axis], axis)
571 else:
572 return theano.tensor.subtensor.advanced_subtensor(self, *args)~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\var.py in take(self, indices, axis, mode)
612
613 def take(self, indices, axis=None, mode=‘raise’):
→ 614 return theano.tensor.subtensor.take(self, indices, axis, mode)
615
616 # COPYING~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\tensor\subtensor.py in take(a, indices, axis, mode)
2429 return advanced_subtensor1(a.flatten(), indices)
2430 elif axis == 0:
→ 2431 return advanced_subtensor1(a, indices)
2432 else:
2433 if axis < 0:~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\op.py in call(self, *inputs, **kwargs)
672 thunk.outputs = [storage_map[v] for v in node.outputs]
673
→ 674 required = thunk()
675 assert not required # We provided all inputs
676~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\op.py in rval()
860
861 def rval():
→ 862 thunk()
863 for o in node.outputs:
864 compute_map[o][0] = True~\AppData\Local\Continuum\Anaconda3\lib\site-packages\theano\gof\cc.py in call(self)
1733 print(self.error_storage, file=sys.stderr)
1734 raise
→ 1735 reraise(exc_type, exc_value, exc_trace)
1736
1737~\AppData\Local\Continuum\Anaconda3\lib\site-packages\six.py in reraise(tp, value, tb)
691 if value.traceback is not tb:
692 raise value.with_traceback(tb)
→ 693 raise value
694 finally:
695 value = NoneIndexError: index 79 is out of bounds for size 77