Issues with Hierarchical model

I’m reading the book “Bayesian Analysis with Python - Third edition”.

And after the hierarchical chapter, i wanted to have a go at creating one my self.
Don’t mind the choice of priors, parameters etc. Right now i just wanted to test if i could be a model that works.

I have a dataset with lead times from two suppliers and some product_brands and i’d like to be able to created posteriors for each combination, so say i want to see the posterior for a speicifc product brand from a specific supplier. Therefor i did the following:

supplier_codes = np.array(df['supplierNumber'].unique())
supplier_idx = pd.Categorical(df['supplierNumber'], categories=supplier_codes).codes

product_brand_codes = np.array(df['product brand'].unique())
product_brand_idx = pd.Categorical(df['product brand'], categories=product_brand_codes).codes

coords = {'supplier': supplier_codes, 'product_brand': product_brand_codes}

with pm.Model(coords=coords) as lead_time_h:
    # Hyper-priors
    μ = pm.Normal('μ', mu=0, sigma=10)
    σ = pm.HalfNormal('σ', sigma=10)

    # suppliers
    μ_sup = pm.Normal('μ_sup', mu=μ, sigma=σ, dims='supplier')
    σ_sup = pm.HalfNormal('σ_sup', sigma=10, dims='supplier')

    # product_brands
    μ_pb = pm.Normal('μ_pb', mu=μ_sup[supplier_idx], sigma=σ_sup[supplier_idx], dims='product_brand')
    σ_pb = pm.HalfNormal('σ_pb', sigma=10, dims='product_brand')

    # Likelihood
    y = pm.Normal('y', mu=μ_pb, sigma=σ_pb, observed=lead_times)

    idata_lt = pm.sample()

pm.model_to_graphviz(lead_time_h)

but I get this error

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\link\vm.py:407, in Loop.__call__(self)
    404 for thunk, node, old_storage in zip_longest(
    405     self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
    406 ):
-->407     thunk()
    408     for old_s in old_storage:

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\graph\op.py:524, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
    522 @is_thunk_type
    523 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
-->524     r = p(n, [x[0] for x in i], o)
    525     for o in node.outputs:

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\tensor\elemwise.py:753, in Elemwise.perform(self, node, inputs, output_storage)
    751     nout = ufunc.nout
-->753 variables = ufunc(*ufunc_args, **ufunc_kwargs)
    755 if nout == 1:

ValueError: operands could not be broadcast together with shapes (2838,) (3,) 

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[17], line 17
     14     # Likelihood
     15     y = pm.Normal('y', mu=μ_pb, sigma=σ_pb, observed=lead_times)
-->-17     idata_lt = pm.sample()
     19 pm.model_to_graphviz(lead_time_h)

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py:746, in sample(draws, tune, chains, cores, random_seed, progressbar, progressbar_theme, step, var_names, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, blas_cores, model, **kwargs)
    744     _log.info("Auto-assigning NUTS sampler...")
    745     with joined_blas_limiter():
-->746         initial_points, step = init_nuts(
    747             init=init,
    748             chains=chains,
    749             n_init=n_init,
    750             model=model,
    751             random_seed=random_seed_list,
    752             progressbar=progressbar,
    753             jitter_max_retries=jitter_max_retries,
    754             tune=tune,
    755             initvals=initvals,
    756             **kwargs,
    757         )
    759 if initial_points is None:
    760     # Time to draw/evaluate numeric start points for each chain.
    761     ipfns = make_initial_point_fns_per_chain(
    762         model=model,
    763         overrides=initvals,
    764         jitter_rvs=set(),
    765         chains=chains,
    766     )

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py:1427, in init_nuts(init, chains, n_init, model, random_seed, progressbar, jitter_max_retries, tune, initvals, **kwargs)
   1420 _log.info(f"Initializing NUTS using {init}...")
   1422 cb = [
   1423     pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"),
   1424     pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"),
   1425 ]
-1427 initial_points = _init_jitter(
   1428     model,
   1429     initvals,
   1430     seeds=random_seed_list,
   1431     jitter="jitter" in init,
   1432     jitter_max_retries=jitter_max_retries,
   1433 )
   1435 apoints = [DictToArrayBijection.map(point) for point in initial_points]
   1436 apoints_data = [apoint.data for apoint in apoints]

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py:1318, in _init_jitter(model, initvals, seeds, jitter, jitter_max_retries)
   1316 rng = np.random.RandomState(seed)
   1317 for i in range(jitter_max_retries + 1):
-1318     point = ipfn(seed)
   1319     if i < jitter_max_retries:
   1320         try:

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\initial_point.py:169, in make_initial_point_fn.<locals>.make_seeded_function.<locals>.inner(seed, *args, **kwargs)
    166 @functools.wraps(func)
    167 def inner(seed, *args, **kwargs):
    168     reseed_rngs(rngs, seed)
-->169     values = func(*args, **kwargs)
    170     return dict(zip(varnames, values))

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\compile\function\types.py:970, in Function.__call__(self, *args, **kwargs)
    967 t0_fn = time.perf_counter()
    968 try:
    969     outputs = (
-->970         self.vm()
    971         if output_subset is None
    972         else self.vm(output_subset=output_subset)
    973     )
    974 except Exception:
    975     restore_defaults()

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\link\vm.py:411, in Loop.__call__(self)
    409                 old_s[0] = None
    410     except Exception:
-->411         raise_with_op(self.fgraph, node, thunk)
    413 return self.perform_updates()

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\link\utils.py:528, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    523     warnings.warn(
    524         f"{exc_type} error does not allow us to add an extra error message"
    525     )
    526     # Some exception need extra parameter in inputs. So forget the
    527     # extra long error message in that case.
-->528 raise exc_value.with_traceback(exc_trace)

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\link\vm.py:407, in Loop.__call__(self)
    403 try:
    404     for thunk, node, old_storage in zip_longest(
    405         self.thunks, self.nodes, self.post_thunk_clear, fillvalue=()
    406     ):
-->407         thunk()
    408         for old_s in old_storage:
    409             old_s[0] = None

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\graph\op.py:524, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
    522 @is_thunk_type
    523 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
-->524     r = p(n, [x[0] for x in i], o)
    525     for o in node.outputs:
    526         compute_map[o][0] = True

File ~\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pytensor\tensor\elemwise.py:753, in Elemwise.perform(self, node, inputs, output_storage)
    749         ufunc = node.tag.ufunc
    751     nout = ufunc.nout
-->753 variables = ufunc(*ufunc_args, **ufunc_kwargs)
    755 if nout == 1:
    756     variables = [variables]

ValueError: operands could not be broadcast together with shapes (2838,) (3,) 
Apply node that caused the error: Add(AdvancedSubtensor.0, μ_pb_jitter)
Toposort index: 18
Inputs types: [TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,))]
Inputs shapes: [(2838,), (3,)]
Inputs strides: [(8,), (8,)]
Inputs values: ['not shown', array([-0.32735514, -0.96662599, -0.16156075])]
Outputs clients: [['output']]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\k.drejer\AppData\Local\Temp\ipykernel_11976\1987296644.py", line 17, in <module>
    idata_lt = pm.sample()
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py", line 746, in sample
    initial_points, step = init_nuts(
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py", line 1427, in init_nuts
    initial_points = _init_jitter(
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\sampling\mcmc.py", line 1304, in _init_jitter
    ipfns = make_initial_point_fns_per_chain(
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\initial_point.py", line 86, in make_initial_point_fns_per_chain
    make_initial_point_fn(
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\initial_point.py", line 140, in make_initial_point_fn
    initial_values = make_initial_point_expression(
  File "C:\Users\k.drejer\AppData\Local\miniconda3\envs\pymc_env\Lib\site-packages\pymc\initial_point.py", line 262, in make_initial_point_expression
    value = value + jitter

HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.

These are the shapes of my indexes and categories

print(supplier_idx.shape)
print(product_brand_idx.shape)

print(len(supplier_codes))
print(len(product_brand_codes))

print(len(lead_times))

(2838,)
(2838,)
2
3
2838

where do i go wrong?

I believe the issue lies in that you are trying to force μ_pb to be of dimension product_brand, when it should be the dimension of the number of number of observations, 2838. You need to add to your coords, modify the dimensions, and add some additional indexing. The model should look like this:

coords = {"supplier"      : supplier_codes,
          "product_brand" : product_brand_codes,
          "obs_idx"       : df.index.tolist()}

with pm.Model(coords=coords) as lead_time_h:
    # Hyper-priors
    μ = pm.Normal('μ', mu=0, sigma=10)
    σ = pm.HalfNormal('σ', sigma=10)
 
    # suppliers
    μ_sup = pm.Normal('μ_sup', mu=μ, sigma=σ, dims='supplier')
    σ_sup = pm.HalfNormal('σ_sup', sigma=10, dims='supplier')
 
    # product_brands
    μ_pb = pm.Normal('μ_pb', mu=μ_sup[supplier_idx], sigma=σ_sup[supplier_idx], dims='obs_idx') #<--- μ_sup[supplier_idx], σ_sup[supplier_idx] are both of dimension of the rows in df.
 
    # Likelihood
    σ_pb = pm.HalfNormal('σ_pb', sigma=10, dims='product_brand') #<-- Here you are assuming that there the variance is allowed to vary across products so you will need to index it accordingly
    y = pm.Normal('y', mu=μ_pb, sigma=σ_pb[product_brand_idx], observed=lead_times, dims='obs_idx')

     # Sample model
    trace = pm.sample(random_seed=rng, draws=1000, chains=1, nuts_sampler="numpyro")

A few things to note:
1.supplier_idx and product_brand_idx are both the length of the df so when you index the parameter μ_sup it will also be that length, which I added as obs_idx.
2. You are assuming that the noise on the observation varies with product brand, so you need to index it within the variable y.
3. I believe your model assumes that there is a supplier effect on lead time and that products vary within each supplier. I believe if you wanted to there to be an effect that covaries with supplier and product then you would need to change you effect μ_pb to have dimensions of both supplier and product.

with pm.Model(coords=coords) as model:
    # Hyper-priors
    μ_global = pm.Normal('μ_global', mu=0, sigma=10)
    σ_global = pm.HalfNormal('σ_global', sigma=10)
 
    # suppliers, product_brands
    μ = pm.Normal('μ', mu=μ_global, sigma=σ_global, dims=('supplier', 'product_brand'))
    σ = pm.HalfNormal('σ', sigma=10, dims=('supplier', 'product_brand'))
 
    # effect
    effect = pm.Normal('effect', mu=μ[supplier_idx, product_brand_idx], sigma=σ[supplier_idx, product_brand_idx], dims='obs_idx') 

    # Likelihood
    noise  = pm.HalfNormal('noise', sigma=10)
    y = pm.Normal('y', mu=effect, sigma=noise, observed=lead_times, dims='obs_idx')

    # Sample model
    trace = pm.sample(random_seed=rng, draws=1000, chains=1, nuts_sampler="numpyro")

However, here you are assuming that there are observations for every product/sample combo.

Hopefully this helps. Others may be able to weigh in on model structure based on assumptions.

2 Likes

Thanks for the reply Justin,

Both models work, but it would seem model 2 is closest to what i’m trying to achieve.
It run’s but i think it could still use a difference. Let me explain the dataframe which is the source of it all

It has the columns supplierNumber, product brand, actualLeadtime. And each row represent an orderline i received from my supplier.

My idea was to have a hyperprior that could be based on my lead time knowledge about suppliers from the geographical region my products are sourced, lets just say China.
Based on that i want model lead times per supplier, and then use this as a new prior for product brand.

Given that this is still new to me, i’m not certain if that’s what you second model does. I do get separate posteriors pr supplier product brand combination, so that’s fine. But not sure it’s using the hierarchical approach I was thinking of using.

It sounds like you may need to add an additional column for region and then build from there. Does the column supplierNumber contain any information about how product suppliers are grouped? If I understand correctly, if you add some context regarding the grouping of suppliers, you may want something like this:

with pm.Model(coords=coords) as model:

    # Global hyperpriors
    mu_global = pm.Normal("mu_global", mu=1, sigma=5)
    sigma_global = pm.HalfNormal("sigma_global", sigma=5)

    # Region effects
    mu_region = pm.Normal("mu_region", mu=mu_global, sigma=sigma_global, dims="region")
    sigma_region = pm.HalfNormal("sigma_region", sigma=2)

    # Supplier effects within each region
    mu_supplier = pm.Normal("mu_supplier", mu=mu_region[df["region"].values], sigma=sigma_region, dims="supplier")
    sigma_supplier = pm.HalfNormal("sigma_supplier", sigma=1)

    # Final effect accounting for region and supplier
    lead_time_mu = pm.Normal("lead_time_mu", mu=mu_supplier[df["supplierNumber"].values], sigma=sigma_supplier, dims="obs_idx")

    # Noise
    noise = pm.HalfNormal("noise", sigma=2)
    y = pm.Normal("y", mu=lead_time_mu, sigma=noise, observed=df["actualLeadtime"], dims="obs_idx")

That would bias your supplier lead times towards the region. If you also want to do this for separate products, you could add an additional level:

# Product-level effects nested under suppliers
mu_product = pm.Normal("mu_product", mu=mu_supplier[df["supplier"].values], sigma=sigma_supplier, dims="product")

# Final effect accounting for region and supplier
lead_time_mu = pm.Normal("lead_time_mu", mu=mu_product[df["product"].values], sigma=sigma_supplier, dims="obs_idx")

I am not sure how sampling will go, but you may need to consider a non-centered parameterization if you get lots of divergences.