How do I correctly structure a multi-dimensional variable in a regression problem

Hello.

I’m trying to structure a hierarchical regression problem modeling sales by location and the type of item within our marketing hierarchy.

The issues I’m running into is correctly structuring the random variable that accounts for both item and the location that item was sold. I keep getting dimensional errors.

Below is the function I use to make the different levels of each hierarchical RV.

def make_next_level_hierarchy_variable(name, mu, alpha, beta, dims=None):
    with modelcontext(None):
        sigma_ = pm.Gamma(f'{name}_sigma', alpha=alpha, beta=beta)
        offset = pm.Normal(f'{name}_offset', dims=dims)
        
        return pm.Deterministic(f'{name}', mu + sigma_ * offset, dims=dims)

The following is the code designating the RV that throws the dimensional error:

 loc_intercept = pm.Normal('loc_intercept', mu = 0, sigma = 1)
        loc_bl = utility_functions.make_next_level_hierarchy_variable(name='loc_bl', mu=loc_intercept, alpha=2, beta=1, dims=['business_line', 'location'])
        loc_cat = utility_functions.make_next_level_hierarchy_variable(name='loc_cat', mu=loc_bl, alpha=2, beta=1, dims=['category', 'location'])
        loc_subcat = utility_functions.make_next_level_hierarchy_variable(name='loc_subcat', mu=loc_cat, alpha=2, beta=1, dims=['subcategory', 'location'])
        loc_ic = utility_functions.make_next_level_hierarchy_variable(name='loc_ic', mu=loc_subcat, alpha=2, beta=1, dims=['ic', 'location'])
        loc_item = utility_functions.make_next_level_hierarchy_variable(name='loc_item', mu=loc_subcat, alpha=2, beta=1, dims=['item', 'location'])

This is how I tried to use it within the regression formula:

mu = (item_intercept[pm_item_idx]  * t_[pm_time_idx] + cann_*item_cann[pm_item_idx] + **loc_item[pm_item_idx, location_idx]** + dc_discount_*dc_discount_coeff + gwp_*gwp_coeff +  
             count_promo_*count_promo_coeff)

For reference, there are a lot of items but only two locations.

What am I doing wrong? (if it helps…full model is below).

coords = {'business_line':business_lines,
              'category':categories,
              'subcategory':subcats,
              'ic':ic,
              'item':items,
              'time':times,
             'location':locations,
             'yearly_components':[f'yearly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(x_fourier.shape[1] // 2)],
              'weekly_components':[f'weekly_{f}_{i+1}' for f in ['cos', 'sin'] for i in range(x_fourier_week.shape[1] // 2)]}
    
    #TRAINING THE MODEL STARTS HERE
    print("fitting model...")
    with pm.Model(coords=coords) as constant_model:    
        cat_to_bl_map = pm.Data('cat_to_bl_map', cat_to_bl_idx, mutable=False)
        subcat_to_cat_map = pm.Data('subcat_to_cat_map', subcat_to_cat_idx, mutable=False)
        ic_to_subcat_map = pm.Data('ic_to_subcat_map', ic_to_subcat_idx, mutable=False)
        ic_to_item_map = pm.Data('ic_to_item_map', ic_to_item_idx, mutable = False)
        
        pm_loc_idx = pm.Data('loc_idx', location_idx, mutable = True)
        pm_item_idx = pm.Data('item_idx', item_idx, mutable=True)
        pm_time_idx = pm.Data('time_idx', time_idx, mutable=True)
        log_eaches = pm.Data('log_eaches', df.residual, mutable=True)
        t_ = pm.Data('t', t, mutable = True)
        # x_fourier_ = pm.Data('x_fourier', x_fourier, mutable = True)
        # x_fourier_week_ = pm.Data('x_fourier_week', x_fourier_week, mutalbe = True)
        cann_ = pm.Data('cannibalization', cann_idx, mutable = True)
        dc_discount_ = pm.Data('dc_discount', dc_idx, mutable = True)
        gwp_ = pm.Data('gwp', gwp_idx, mutable = True)
        count_promo_ = pm.Data('count_promo', count_idx, mutable = True)
        # month_ = pm.Data('month', month_idx, mutable = True)

        mu_intercept = pm.Normal('mu_intercept', mu = 0, sigma = 10000)
        bl_intercept = utility_functions.make_next_level_hierarchy_variable(name='bl_intercept', mu=mu_intercept, alpha=2, beta=1, dims=['business_line'])
        cat_intercept = utility_functions.make_next_level_hierarchy_variable(name='cat_intercept', mu=bl_intercept[cat_to_bl_map], alpha=2, beta=1,  dims=['category'])
        subcat_intercept = utility_functions.make_next_level_hierarchy_variable(name='subcat_intercept', mu=cat_intercept[subcat_to_cat_map],  alpha=2, beta=1, dims=['subcategory'])
        ic_intercept = utility_functions.make_next_level_hierarchy_variable(name='ic_intercept', mu=subcat_intercept[ic_to_subcat_map],  alpha=2, beta=1, dims=['ic'])
        item_intercept = utility_functions.make_next_level_hierarchy_variable(name='item_intercept', mu=ic_intercept[ic_to_item_map], alpha=2, beta=1,  dims=['item'])
                                             
        mu_cann = pm.Normal('mu_can')
        bl_cann = utility_functions.make_next_level_hierarchy_variable(name='bl_cann', mu=mu_cann, alpha=2, beta=1, dims=['business_line'])
        cat_cann = utility_functions.make_next_level_hierarchy_variable(name='cat_cann', mu=bl_cann[cat_to_bl_map], alpha=2, beta=1,  dims=['category'])
        subcat_cann = utility_functions.make_next_level_hierarchy_variable(name='subcat_cann', mu=cat_cann[subcat_to_cat_map],  alpha=2, beta=1, dims=['subcategory'])
        ic_cann = utility_functions.make_next_level_hierarchy_variable(name='ic_cann', mu=subcat_cann[ic_to_subcat_map],  alpha=2, beta=1, dims=['ic'])
        item_cann = utility_functions.make_next_level_hierarchy_variable(name='item_cann', mu=ic_cann[ic_to_item_map], alpha=2, beta=1,  dims=['item'])
        
        loc_intercept = pm.Normal('loc_intercept', mu = 0, sigma = 1)
        loc_bl = utility_functions.make_next_level_hierarchy_variable(name='loc_bl', mu=loc_intercept, alpha=2, beta=1, dims=['business_line', 'location'])
        loc_cat = utility_functions.make_next_level_hierarchy_variable(name='loc_cat', mu=loc_bl, alpha=2, beta=1, dims=['category', 'location'])
        loc_subcat = utility_functions.make_next_level_hierarchy_variable(name='loc_subcat', mu=loc_cat, alpha=2, beta=1, dims=['subcategory', 'location'])
        loc_ic = utility_functions.make_next_level_hierarchy_variable(name='loc_ic', mu=loc_subcat, alpha=2, beta=1, dims=['ic', 'location'])
        loc_item = utility_functions.make_next_level_hierarchy_variable(name='loc_item', mu=loc_subcat, alpha=2, beta=1, dims=['item', 'location'])
        # location_coeff = (loc_item[pm_item_idx] * loc_intercept[pm_loc_idx]).sum(axis=1)
        
        dc_discount_coeff = pm.Normal('dc_discount_coeff')
        gwp_coeff = pm.Normal('gwp_coeff')
        count_promo_coeff = pm.Normal('count_promo_coeff')
        
        mu = (item_intercept[pm_item_idx]  * t_[pm_time_idx] + cann_*item_cann[pm_item_idx] + loc_item[pm_item_idx, location_idx] + dc_discount_*dc_discount_coeff + gwp_*gwp_coeff +  
             count_promo_*count_promo_coeff)

        sigma = pm.HalfNormal('sigma')
        
        eaches = pm.Normal('predicted_eaches',
                                    mu=mu,
                                    sigma=sigma,
                                    # lower = 0,
                                    observed=log_eaches)

What is the exact error you’re getting?

The model is quite complex. Do you get the error if it’s simpler, e.g. with just a single level in that hierarchy? The .eval() method is quite nice for debugging shape issues. I would comment out lines of the model until you can successfully get it partially compiled, then check shapes of all your variables and make sure they are what you expect.

Error is:

ValueError                                Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/aesara/compile/function/types.py in __call__(self, *args, **kwargs)
    975                 self.vm()
--> 976                 if output_subset is None
    977                 else self.vm(output_subset=output_subset)

ValueError: Input dimension mismatch. One other input has shape[0] = 6, but input[4].shape[0] = 26.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
/tmp/ipykernel_26145/3292791440.py in <module>
    198 
    199 
--> 200 run_korea(df)

/tmp/ipykernel_26145/3292791440.py in run_korea(df)
    149         loc_ic = utility_functions.make_next_level_hierarchy_variable(name='loc_ic', mu=loc_subcat, alpha=2, beta=1, dims=['ic', 'location'])
    150         loc_item = utility_functions.make_next_level_hierarchy_variable(name='loc_item', mu=loc_ic, alpha=2, beta=1, dims=['item', 'location'])
--> 151         loc_item.eval()
    152 
    153 

/opt/conda/lib/python3.7/site-packages/aesara/graph/basic.py in eval(self, inputs_to_values)
    600         args = [inputs_to_values[param] for param in inputs]
    601 
--> 602         rval = self._fn_cache[inputs](*args)
    603 
    604         return rval

/opt/conda/lib/python3.7/site-packages/aesara/compile/function/types.py in __call__(self, *args, **kwargs)
    990                     node=self.vm.nodes[self.vm.position_of_error],
    991                     thunk=thunk,
--> 992                     storage_map=getattr(self.vm, "storage_map", None),
    993                 )
    994             else:

/opt/conda/lib/python3.7/site-packages/aesara/link/utils.py in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    532         # Some exception need extra parameter in inputs. So forget the
    533         # extra long error message in that case.
--> 534     raise exc_value.with_traceback(exc_trace)
    535 
    536 

/opt/conda/lib/python3.7/site-packages/aesara/compile/function/types.py in __call__(self, *args, **kwargs)
    974             outputs = (
    975                 self.vm()
--> 976                 if output_subset is None
    977                 else self.vm(output_subset=output_subset)
    978             )

ValueError: Input dimension mismatch. One other input has shape[0] = 6, but input[4].shape[0] = 26.
Apply node that caused the error: Elemwise{Composite{(i0 + (i1 * i2) + (i3 * i4) + (i5 * i6) + (i7 * i8) + (i9 * i10))}}[(0, 2)](InplaceDimShuffle{x,0}.0, InplaceDimShuffle{x,x}.0, loc_bl_offset, InplaceDimShuffle{x,x}.0, loc_cat_offset, InplaceDimShuffle{x,x}.0, loc_subcat_offset, InplaceDimShuffle{x,x}.0, loc_ic_offset, InplaceDimShuffle{x,x}.0, loc_item_offset)
Toposort index: 17
Inputs types: [TensorType(float64, (1, None)), TensorType(float64, (1, 1)), TensorType(float64, (None, None)), TensorType(float64, (1, 1)), TensorType(float64, (None, None)), TensorType(float64, (1, 1)), TensorType(float64, (None, None)), TensorType(float64, (1, 1)), TensorType(float64, (None, None)), TensorType(float64, (1, 1)), TensorType(float64, (None, None))]
Inputs shapes: [(1, 2), (1, 1), (6, 2), (1, 1), (26, 2), (1, 1), (102, 2), (1, 1), (191, 2), (1, 1), (545, 2)]
Inputs strides: [(16, 8), (8, 8), (16, 8), (8, 8), (16, 8), (8, 8), (16, 8), (8, 8), (16, 8), (8, 8), (16, 8)]
Inputs values: [array([[ 2995.44591201, -1565.36841134]]), array([[0.42405651]]), 'not shown', array([[2.14519186]]), 'not shown', array([[1.2296369]]), 'not shown', array([[2.71865771]]), 'not shown', array([[0.48200603]]), 'not shown']
Outputs clients: [['output']]

HINT: Re-running with most Aesara optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the Aesara flag 'optimizer=fast_compile'. If that does not work, Aesara optimizations can be disabled with 'optimizer=None'.
HINT: Use the Aesara flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.

The model eval() returns the following:

{'mu_intercept': (),
 'bl_intercept_sigma_log__': (),
 'bl_intercept_sigma': (),
 'bl_intercept_offset': (6,),
 'cat_intercept_sigma_log__': (),
 'cat_intercept_sigma': (),
 'cat_intercept_offset': (26,),
 'subcat_intercept_sigma_log__': (),
 'subcat_intercept_sigma': (),
 'subcat_intercept_offset': (102,),
 'ic_intercept_sigma_log__': (),
 'ic_intercept_sigma': (),
 'ic_intercept_offset': (191,),
 'item_intercept_sigma_log__': (),
 'item_intercept_sigma': (),
 'item_intercept_offset': (545,),
 'mu_can': (),
 'bl_cann_sigma_log__': (),
 'bl_cann_sigma': (),
 'bl_cann_offset': (6,),
 'cat_cann_sigma_log__': (),
 'cat_cann_sigma': (),
 'cat_cann_offset': (26,),
 'subcat_cann_sigma_log__': (),
 'subcat_cann_sigma': (),
 'subcat_cann_offset': (102,),
 'ic_cann_sigma_log__': (),
 'ic_cann_sigma': (),
 'ic_cann_offset': (191,),
 'item_cann_sigma_log__': (),
 'item_cann_sigma': (),
 'item_cann_offset': (545,),
 'loc_intercept': (2,),
 'loc_bl_sigma_log__': (),
 'loc_bl_sigma': (),
 'loc_bl_offset': (6, 2),
 'loc_cat_sigma_log__': (),
 'loc_cat_sigma': (),
 'loc_cat_offset': (26, 2),
 'loc_subcat_sigma_log__': (),
 'loc_subcat_sigma': (),
 'loc_subcat_offset': (102, 2),
 'loc_ic_sigma_log__': (),
 'loc_ic_sigma': (),
 'loc_ic_offset': (191, 2),
 'loc_item_sigma_log__': (),
 'loc_item_sigma': (),
 'loc_item_offset': (545, 2)}

This worked when I tried to add the old seasonality variables you helped me with in the pat but I was multiplying those RVs by the Fourier transformation at the end of the RV sampling.

It did compile and sample before I tried to add my marketing hierarchy to the loc_intercept. But I’m still not getting what I want and know our locations see different sales for different items.

From the error and the shapes, it looks like the problem is mu + sigma_ * offset inside the make_next_level_hierarchy_variable function. You will need to provide a map that sends mu (the values at the previous level hierarchy, size 6 for bl) to the next size in the hierarchy (size 26 for cat).

ah crap, how did I miss that? Thank you!