Computing log likelihood in hierarchical model

I have what is probably a silly question but is it possible to compute the log likelihood in a hierarchical model? I guess I thought it was but I get the following error in my models which occurs when sampling has finished

TypeError: object of type 'NoneType' has no len()

So I tested this on the radon example to make sure it wasn’t just my model. Using the simplest partial pooled model you get the same error. Full code

with pm.Model(coords=coords) as partial_pooling:
    county_idx = pm.MutableData("county_idx", county, dims="obs_id")

    # Priors
    mu_a = pm.Normal("mu_a", mu=0.0, sigma=10)
    sigma_a = pm.Exponential("sigma_a", 1)

    # Random intercepts
    alpha = pm.Normal("alpha", mu=mu_a, sigma=sigma_a, dims="county")

    # Model error
    sigma_y = pm.Exponential("sigma_y", 1)

    # Expected value
    y_hat = alpha[county_idx]

    # Data likelihood
    y_like = pm.Normal("y_like", mu=y_hat, sigma=sigma_y, observed=log_radon, dims="obs_id")

with partial_pooling:
    partial_pooling_trace = pm.sample(tune=2000, random_seed=RANDOM_SEED, idata_kwargs=dict(log_likelihood=True))

Any help or direction is much appreciated.

Can you share:

  1. What version of PyMC and PyTensor you have installed?
  2. Can you show the whole error message?

The error I got running with my models were using

pymc version 5.6.0
pytensor version 2.12.3

But running the radon example I was using

pymc version 5.5.0
pytensor version 2.12.1

Whole error:

TypeError                                 Traceback (most recent call last)
Cell In[42], line 2
      1 with hits_generalized_poisson_pooled_mdl:
----> 2     idata_hits_generalized_poisson_pooled_mdl = pm.sample(init='adapt_diag',
      3                                                           idata_kwargs=dict(log_likelihood=True),
      4                                                           chains=4,
      5                                                           target_accept=0.95)

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pymc/sampling/mcmc.py:791, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    787 t_sampling = time.time() - t_start
    789 # Packaging, validating and returning the result was extracted
    790 # into a function to make it easier to test and refactor.
--> 791 return _sample_return(
    792     run=run,
    793     traces=traces,
    794     tune=tune,
    795     t_sampling=t_sampling,
    796     discard_tuned_samples=discard_tuned_samples,
    797     compute_convergence_checks=compute_convergence_checks,
    798     return_inferencedata=return_inferencedata,
    799     keep_warning_stat=keep_warning_stat,
    800     idata_kwargs=idata_kwargs or {},
    801     model=model,
    802 )

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pymc/sampling/mcmc.py:859, in _sample_return(run, traces, tune, t_sampling, discard_tuned_samples, compute_convergence_checks, return_inferencedata, keep_warning_stat, idata_kwargs, model)
    857 ikwargs: Dict[str, Any] = dict(model=model, save_warmup=not discard_tuned_samples)
    858 ikwargs.update(idata_kwargs)
--> 859 idata = pm.to_inference_data(mtrace, **ikwargs)
    861 if compute_convergence_checks:
    862     warns = run_convergence_checks(idata, model)

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pymc/backends/arviz.py:519, in to_inference_data(trace, prior, posterior_predictive, log_likelihood, coords, dims, sample_dims, model, save_warmup, include_transformed)
    505 if isinstance(trace, InferenceData):
    506     return trace
    508 return InferenceDataConverter(
    509     trace=trace,
    510     prior=prior,
    511     posterior_predictive=posterior_predictive,
    512     log_likelihood=log_likelihood,
    513     coords=coords,
    514     dims=dims,
    515     sample_dims=sample_dims,
    516     model=model,
    517     save_warmup=save_warmup,
    518     include_transformed=include_transformed,
--> 519 ).to_inference_data()

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pymc/backends/arviz.py:442, in InferenceDataConverter.to_inference_data(self)
    439 if self.log_likelihood:
    440     from pymc.stats.log_likelihood import compute_log_likelihood
--> 442     idata = compute_log_likelihood(
    443         idata,
    444         var_names=None if self.log_likelihood is True else self.log_likelihood,
    445         extend_inferencedata=True,
    446         model=self.model,
    447         sample_dims=self.sample_dims,
    448         progressbar=False,
    449     )
    450 return idata

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/pymc/stats/log_likelihood.py:116, in compute_log_likelihood(idata, var_names, extend_inferencedata, model, sample_dims, progressbar)
    111 for key, array in loglike_trace.items():
    112     loglike_trace[key] = array.reshape(
    113         (*[len(coord) for coord in stacked_dims.values()], *array.shape[1:])
    114     )
--> 116 loglike_dataset = dict_to_dataset(
    117     loglike_trace,
    118     library=pymc,
    119     dims={dname: list(dvals) for dname, dvals in model.named_vars_to_dims.items()},
    120     coords={
    121         cname: np.array(cvals) if isinstance(cvals, tuple) else cvals
    122         for cname, cvals in model.coords.items()
    123     },
    124     default_dims=list(sample_dims),
    125     skip_event_dims=True,
    126 )
    128 if extend_inferencedata:
    129     idata.add_groups(dict(log_likelihood=loglike_dataset))

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/arviz/data/base.py:306, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
    303 if dims is None:
    304     dims = {}
--> 306 data_vars = {
    307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/arviz/data/base.py:307, in <dictcomp>(.0)
    303 if dims is None:
    304     dims = {}
    306 data_vars = {
--> 307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/arviz/data/base.py:231, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
    228 else:
    229     ary = utils.one_de(ary)
--> 231 dims, coords = generate_dims_coords(
    232     ary.shape[len(default_dims) :],
    233     var_name,
    234     dims=dims,
    235     coords=coords,
    236     default_dims=default_dims,
    237     index_origin=index_origin,
    238     skip_event_dims=skip_event_dims,
    239 )
    241 # reversed order for default dims: 'chain', 'draw'
    242 if "draw" not in dims and "draw" in default_dims:

File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/arviz/data/base.py:146, in generate_dims_coords(shape, var_name, dims, coords, default_dims, index_origin, skip_event_dims)
    143 if skip_event_dims:
    144     # this is needed in case the reduction keeps the dimension with size 1
    145     for i, (dim, dim_size) in enumerate(zip(dims, shape)):
--> 146         if (dim in coords) and (dim_size != len(coords[dim])):
    147             dims = dims[:i]
    148             break

TypeError: object of type 'NoneType' has no len()

And to be explicit here’s the error from the radon example (which I’m pretty sure is the same)

TypeError                                 Traceback (most recent call last)
Cell In[27], line 2
      1 with partial_pooling:
----> 2     pm.compute_log_likelihood(partial_pooling_trace)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pymc\stats\log_likelihood.py:116, in compute_log_likelihood(idata, var_names, extend_inferencedata, model, sample_dims, progressbar)
    111 for key, array in loglike_trace.items():
    112     loglike_trace[key] = array.reshape(
    113         (*[len(coord) for coord in stacked_dims.values()], *array.shape[1:])
    114     )
--> 116 loglike_dataset = dict_to_dataset(
    117     loglike_trace,
    118     library=pymc,
    119     dims={dname: list(dvals) for dname, dvals in model.named_vars_to_dims.items()},
    120     coords={
    121         cname: np.array(cvals) if isinstance(cvals, tuple) else cvals
    122         for cname, cvals in model.coords.items()
    123     },
    124     default_dims=list(sample_dims),
    125     skip_event_dims=True,
    126 )
    128 if extend_inferencedata:
    129     idata.add_groups(dict(log_likelihood=loglike_dataset))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:306, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
    303 if dims is None:
    304     dims = {}
--> 306 data_vars = {
    307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:307, in <dictcomp>(.0)
    303 if dims is None:
    304     dims = {}
    306 data_vars = {
--> 307     key: numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316     for key, values in data.items()
    317 }
    318 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:231, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
    228 else:
    229     ary = utils.one_de(ary)
--> 231 dims, coords = generate_dims_coords(
    232     ary.shape[len(default_dims) :],
    233     var_name,
    234     dims=dims,
    235     coords=coords,
    236     default_dims=default_dims,
    237     index_origin=index_origin,
    238     skip_event_dims=skip_event_dims,
    239 )
    241 # reversed order for default dims: 'chain', 'draw'
    242 if "draw" not in dims and "draw" in default_dims:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\arviz\data\base.py:146, in generate_dims_coords(shape, var_name, dims, coords, default_dims, index_origin, skip_event_dims)
    143 if skip_event_dims:
    144     # this is needed in case the reduction keeps the dimension with size 1
    145     for i, (dim, dim_size) in enumerate(zip(dims, shape)):
--> 146         if (dim in coords) and (dim_size != len(coords[dim])):
    147             dims = dims[:i]
    148             break

TypeError: object of type 'NoneType' has no len()

What are the coords like? there shouldn’t be None in it.

If you dont’ have None it’s a PyMC bug. In which case can you open an issue on our Github with a fully reproducible example (as small as you can get it to be): GitHub - pymc-devs/pymc: Bayesian Modeling in Python

From the radon example

{'county': Index(['AITKIN', 'ANOKA', 'BECKER', 'BELTRAMI', 'BENTON', 'BIG STONE',
        'BLUE EARTH', 'BROWN', 'CARLTON', 'CARVER', 'CASS', 'CHIPPEWA',
        'CHISAGO', 'CLAY', 'CLEARWATER', 'COOK', 'COTTONWOOD', 'CROW WING',
        'DAKOTA', 'DODGE', 'DOUGLAS', 'FARIBAULT', 'FILLMORE', 'FREEBORN',
        'GOODHUE', 'HENNEPIN', 'HOUSTON', 'HUBBARD', 'ISANTI', 'ITASCA',
        'JACKSON', 'KANABEC', 'KANDIYOHI', 'KITTSON', 'KOOCHICHING',
        'LAC QUI PARLE', 'LAKE', 'LAKE OF THE WOODS', 'LE SUEUR', 'LINCOLN',
        'LYON', 'MAHNOMEN', 'MARSHALL', 'MARTIN', 'MCLEOD', 'MEEKER',
        'MILLE LACS', 'MORRISON', 'MOWER', 'MURRAY', 'NICOLLET', 'NOBLES',
        'NORMAN', 'OLMSTED', 'OTTER TAIL', 'PENNINGTON', 'PINE', 'PIPESTONE',
        'POLK', 'POPE', 'RAMSEY', 'REDWOOD', 'RENVILLE', 'RICE', 'ROCK',
        'ROSEAU', 'SCOTT', 'SHERBURNE', 'SIBLEY', 'ST LOUIS', 'STEARNS',
        'STEELE', 'STEVENS', 'SWIFT', 'TODD', 'TRAVERSE', 'WABASHA', 'WADENA',
        'WASECA', 'WASHINGTON', 'WATONWAN', 'WILKIN', 'WINONA', 'WRIGHT',
        'YELLOW MEDICINE'],
       dtype='object')}

I’ll open an issue.