Shape issues with sample_posterior_predictive in PyMc5

Hi,

I’m having issues migrating code to generate posterior predictive samples to version 5. Illustrative example below. In particular, I’m running into dimensionality/shape issues that I’ve unsuccessfully tried to fix using the docs.

import pymc as pm

with pm.Model() as model:
x = pm.MutableData(‘x’, [1., 2., 3.])
y = pm.MutableData(‘y’, [1., 2., 3.])
beta = pm.Normal(‘beta’, 0, 1)
obs = pm.Normal(‘obs’, x * beta, 1, observed=y, shape=x.shape)
idata = pm.sample()

with model:
pm.set_data({‘x’: [5., 6., 9., 12., 15.]})
y_test = pm.sample_posterior_predictive(idata)

Returns

ValueError: different number of dimensions on data and dims: 3 vs 2

Any guidance very appreciated.

Thanks

That’s weirf, can you share the full error message? Also what version of PyMC are you using?

Thanks so much for the quick reply Ricardo. Full error message pasted below - I ran with 5.0… All help appreciated thank you.


ValueError Traceback (most recent call last)
Input In [9], in <cell line: 1>()
1 with model:
2 #pm.set_data({‘x’: [5., 6., 9., 12., 15.]})
----> 3 y_test = pm.sample_posterior_predictive(idata)
5 print(y_test.posterior_predictive[‘obs’].mean((‘chain’, ‘draw’)))

File ~\Anaconda3\lib\site-packages\pymc\sampling\forward.py:673, in sample_posterior_predictive(trace, model, var_names, sample_dims, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
671 ikwargs.setdefault(“inplace”, True)
672 return pm.predictions_to_inference_data(ppc_trace, **ikwargs)
→ 673 idata_pp = pm.to_inference_data(posterior_predictive=ppc_trace, **ikwargs)
675 if extend_inferencedata and idata is not None:
676 idata.extend(idata_pp)

File ~\Anaconda3\lib\site-packages\pymc\backends\arviz.py:498, in to_inference_data(trace, prior, posterior_predictive, log_likelihood, coords, dims, sample_dims, model, save_warmup, include_transformed)
495 if isinstance(trace, InferenceData):
496 return trace
→ 498 return InferenceDataConverter(
499 trace=trace,
500 prior=prior,
501 posterior_predictive=posterior_predictive,
502 log_likelihood=log_likelihood,
503 coords=coords,
504 dims=dims,
505 sample_dims=sample_dims,
506 model=model,
507 save_warmup=save_warmup,
508 include_transformed=include_transformed,
509 ).to_inference_data()

File ~\Anaconda3\lib\site-packages\pymc\backends\arviz.py:419, in InferenceDataConverter.to_inference_data(self)
409 def to_inference_data(self):
410 “”“Convert all available data to an InferenceData object.
411
412 Note that if groups can not be created (e.g., there is no trace, so
413 the posterior and sample_stats can not be extracted), then the InferenceData
414 will not have those groups.
415 “””
416 id_dict = {
417 “posterior”: self.posterior_to_xarray(),
418 “sample_stats”: self.sample_stats_to_xarray(),
→ 419 “posterior_predictive”: self.posterior_predictive_to_xarray(),
420 “predictions”: self.predictions_to_xarray(),
421 **self.priors_to_xarray(),
422 “observed_data”: self.observed_data_to_xarray(),
423 }
424 if self.predictions:
425 id_dict[“predictions_constant_data”] = self.constant_data_to_xarray()

File ~\AppData\Roaming\Python\Python39\site-packages\arviz\data\base.py:65, in requires.call..wrapped(cls)
63 if all((getattr(cls, prop_i) is None for prop_i in prop)):
64 return None
—> 65 return func(cls)

File ~\Anaconda3\lib\site-packages\pymc\backends\arviz.py:340, in InferenceDataConverter.posterior_predictive_to_xarray(self)
338 data = self.posterior_predictive
339 dims = {var_name: self.sample_dims + self.dims.get(var_name, ) for var_name in data}
→ 340 return dict_to_dataset(
341 data, library=pymc, coords=self.coords, dims=dims, default_dims=self.sample_dims
342 )

File ~\AppData\Roaming\Python\Python39\site-packages\arviz\data\base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
305 data_vars = {}
306 for key, values in data.items():
→ 307 data_vars[key] = numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File ~\AppData\Roaming\Python\Python39\site-packages\arviz\data\base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
252 # filter coords based on the dims
253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
→ 254 return xr.DataArray(ary, coords=coords, dims=dims)

File ~\Anaconda3\lib\site-packages\xarray\core\dataarray.py:422, in DataArray.init(self, data, coords, dims, name, attrs, indexes, fastpath)
420 data = _check_data_shape(data, coords, dims)
421 data = as_compatible_data(data)
→ 422 coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
423 variable = Variable(dims, data, attrs, fastpath=True)
424 indexes, coords = _create_indexes_from_coords(coords)

File ~\Anaconda3\lib\site-packages\xarray\core\dataarray.py:137, in _infer_coords_and_dims(shape, coords, dims)
135 dims = tuple(dims)
136 elif len(dims) != len(shape):
→ 137 raise ValueError(
138 “different number of dimensions on data "
139 f"and dims: {len(shape)} vs {len(dims)}”
140 )
141 else:
142 for d in dims:

Update - the above worked fine with v5.5.0 so guessing it’s something with 5.0.1 that is no longer problematic. Thanks again for the reply.

1 Like