Hi,
I got a similar (although slightly differnt) error when I tried to escape the ValueError: conflicting sizes for dimension 'obs_id': length 1650 on the data but length 300 on coordinate 'obs_id' and set the keep size parameter to False to tell the model that the size of the ‘obs_id’ coord has changed pm.sample_posterior_predictive(keep_size=False).
The error message for me says:
/projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:433: UserWarning: The shape of variables sigma_ff, log_morphscale, pred_err_scale, response in predictions group is not compatible with number of chains and draws. The automatic dimension naming might not have worked. This can also mean that some draws or even whole chains are not represented.
warnings.warn(
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [114], in <cell line: 5>()
13 pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id_2})
15 # update values of predictors:
16 #pm.set_data({"pred": predictors_out_of_sample})
17 # use the updated values and predict outcomes and probabilities:
---> 18 t = pm.sample_posterior_predictive(
19 t,
20 var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
21 return_inferencedata=True,
22 predictions=True,
23 extend_inferencedata=True,
24 random_seed=rng,
25 keep_size=False
26 )
27 exp_prederr_model_dict["oos_highf0_highff"] = t
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/sampling.py:1983, in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
1981 ikwargs.setdefault("idata_orig", trace)
1982 ikwargs.setdefault("inplace", True)
-> 1983 return pm.predictions_to_inference_data(ppc_trace, **ikwargs)
1984 converter = pm.backends.arviz.InferenceDataConverter(posterior_predictive=ppc_trace, **ikwargs)
1985 converter.nchains = nchain
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:656, in predictions_to_inference_data(predictions, posterior_trace, model, coords, dims, idata_orig, inplace)
654 aelem = next(iter(predictions.values()))
655 converter.nchains, converter.ndraws = aelem.shape[:2]
--> 656 new_idata = converter.to_inference_data()
657 if idata_orig is None:
658 return new_idata
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:521, in InferenceDataConverter.to_inference_data(self)
509 def to_inference_data(self):
510 """Convert all available data to an InferenceData object.
511
512 Note that if groups can not be created (e.g., there is no `trace`, so
513 the `posterior` and `sample_stats` can not be extracted), then the InferenceData
514 will not have those groups.
515 """
516 id_dict = {
517 "posterior": self.posterior_to_xarray(),
518 "sample_stats": self.sample_stats_to_xarray(),
519 "log_likelihood": self.log_likelihood_to_xarray(),
520 "posterior_predictive": self.posterior_predictive_to_xarray(),
--> 521 "predictions": self.predictions_to_xarray(),
522 **self.priors_to_xarray(),
523 "observed_data": self.observed_data_to_xarray(),
524 }
525 if self.predictions:
526 id_dict["predictions_constant_data"] = self.constant_data_to_xarray()
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:65, in requires.__call__.<locals>.wrapped(cls)
63 if all((getattr(cls, prop_i) is None for prop_i in prop)):
64 return None
---> 65 return func(cls)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:451, in InferenceDataConverter.predictions_to_xarray(self)
448 @requires(["predictions"])
449 def predictions_to_xarray(self):
450 """Convert predictions (out of sample predictions) to xarray."""
--> 451 return self.translate_posterior_predictive_dict_to_xarray(self.predictions, "predictions")
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:439, in InferenceDataConverter.translate_posterior_predictive_dict_to_xarray(self, dct, kind)
432 if warning_vars:
433 warnings.warn(
434 f"The shape of variables {', '.join(warning_vars)} in {kind} group is not compatible "
435 "with number of chains and draws. The automatic dimension naming might not have worked. "
436 "This can also mean that some draws or even whole chains are not represented.",
437 UserWarning,
438 )
--> 439 return dict_to_dataset(data, library=pymc, coords=self.coords, dims=self.dims)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
305 data_vars = {}
306 for key, values in data.items():
--> 307 data_vars[key] = numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
252 # filter coords based on the dims
253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
--> 254 return xr.DataArray(ary, coords=coords, dims=dims)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:412, in DataArray.__init__(self, data, coords, dims, name, attrs, indexes, fastpath)
410 data = _check_data_shape(data, coords, dims)
411 data = as_compatible_data(data)
--> 412 coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
413 variable = Variable(dims, data, attrs, fastpath=True)
414 indexes, coords = _create_indexes_from_coords(coords)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:160, in _infer_coords_and_dims(shape, coords, dims)
158 for d, s in zip(v.dims, v.shape):
159 if s != sizes[d]:
--> 160 raise ValueError(
161 f"conflicting sizes for dimension {d!r}: "
162 f"length {sizes[d]} on the data but length {s} on "
163 f"coordinate {k!r}"
164 )
166 if k in sizes and v.shape != (sizes[k],):
167 raise ValueError(
168 f"coordinate {k!r} is a DataArray dimension, but "
169 f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} "
170 "matching the dimension size"
171 )
ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 4 on coordinate 'chain'
with the following code and model:
obs_id = np.arange(data_df.shape[0])
with pm.Model() as pred_err_model:
pred_err_model.add_coord('obs_id',obs_id, mutable = True)
context_f0 = pm.MutableData('context_f0', data_df.context_f0, dims='obs_id')
context_ff = pm.MutableData('context_ff', data_df.context_ff, dims='obs_id')
stimulus_ff = pm.MutableData('stimulus_ff', data_df.ffb, dims='obs_id')
stimulus_f0 = pm.MutableData('stimulus_f0', data_df.f0b, dims='obs_id')
morph = pm.MutableData('morph', data_df.morph_scaled, dims='obs_id')
nr_o = pm.MutableData('nr_o', data_df.nr_o, dims='obs_id')
nr = pm.MutableData('nr', data_df.nr, dims='obs_id')
pop_intercept = pm.MutableData('pop_intercept', all_intercept)
pop_slope = pm.MutableData('pop_slope', all_slope)
pop_var = pm.MutableData('pop_var', all_var)
sigma_sq_ff = pm.HalfNormal('sigma_ff',sigma=100)
pred_err_scale = pm.Normal('pred_err_scale', mu=0, sigma=2)
log_morphscale = pm.Normal('log_morphscale', mu=0, sigma=3)
context_ff_from_f0_prediction = pop_intercept + pop_slope * context_f0
stimulus_ff_from_f0_prediction = pop_intercept + pop_slope * stimulus_f0
learned_ff_prediction_error = pm.Deterministic('learned_prediction_error', context_ff - context_ff_from_f0_prediction, dims='obs_id')
context_biased_ff_prediction = pm.Deterministic('context_biased_ff_prediction', stimulus_ff_from_f0_prediction + learned_ff_prediction_error * pred_err_scale, dims='obs_id')
s = pm.math.sqrt(1 / (
(1 / pop_var) +
(1 / sigma_sq_ff) )
)
m = ((context_biased_ff_prediction*(1/pop_var)) + (stimulus_ff*(1/sigma_sq_ff))) / ((1/pop_var) + (1 / sigma_sq_ff))
combined = pm.Normal('combined', mu=m,sigma=s, dims='obs_id')
x = pm.Deterministic('x', pm.math.exp(log_morphscale) * morph + stimulus_ff, dims='obs_id')
p = pm.Deterministic('prob', pm.math.exp(pm.logcdf(combined, x)), dims='obs_id')
n_os = pm.Binomial('response',n=nr, p=p, observed = nr_o, dims='obs_id')
# out of sample predictions for response given all other variables
obs_id = np.arange(oos_df.shape[0])
with pred_err_model:
pred_err_model.set_data('context_f0', oos_df['context_f0'], coords = {'obs_id': obs_id})
pred_err_model.set_data('context_ff', oos_df.context_ff, coords = {'obs_id': obs_id})
pred_err_model.set_data('stimulus_ff',oos_df.ffb, coords = {'obs_id': obs_id})
pred_err_model.set_data('stimulus_f0',oos_df.f0b, coords = {'obs_id': obs_id})
pred_err_model.set_data('morph', oos_df.morph_scaled, coords = {'obs_id': obs_id})
pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id})
t = pm.sample_posterior_predictive(
t,
var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
return_inferencedata=True,
predictions=True,
extend_inferencedata=True,
random_seed=rng,
keep_size=False
)