I am trying to use sample_posterior_predictive but getting this error. As per the threads I should remove samples and keep_size which I did. But i didn’t thinned any data from trace for sample posterior predictive. Not able to understand the thinned concept.
Can you provide the code you are using and the (full) error messages you are getting?
Can you dump the raw text? It’s hard to run the code from screenshots.
Please find the code
def bayesian_lin_reg_fit(x_data_shared,y_data_shared,display_plot=True,Intercept=False):
with pm.Model() as linear_model:
sigma = pm.Uniform('Sigma',0,100)
intercept = pm.Uniform('Intercept',0,1200)
x_coeff = pm.Uniform('Slope',-10,10)
if(Intercept==True):
yhat = pm.math.dot(x_data_shared,x_coeff)+intercept
else:
yhat = pm.math.dot(x_data_shared,x_coeff)
#Likelihood
likelihood = pm.Normal('y',yhat,sigma,observed=y_data_shared)
print(likelihood)
#Inference
trace_linear = pm.sample(500,cores=1,init="auto",tune=500,progressbar=True)
if(display_plot):
plt.figure(figsize=(7,7))
# traceplot(trace_linear[100:])
plt.tight_layout()
print(pm.summary(trace_linear))
return linear_model,trace_linear
def credible_interval_calculation(x_data_shared,trace_linear,linear_model):
n_samples=1000
ppc_CI = pm.sample_posterior_predictive(trace_linear,
model=linear_model,
progressbar=False,keep_size=False)
predicted = ppc_CI.posterior_predictive['y']
print(predicted.shape)
nb_point = x_data_shared.get_value().shape[0]
print(nb_point)
predicted = np.reshape(predicted, (n_samples, nb_point))
predicted_interval = []
for col in predicted.T:
predicted_loc = [np.quantile(col, x) for x in [0.0005, 0.005, 0.05]] + \
[np.mean(col)] + \
[np.quantile(col, x) for x in [0.95, 0.995, 0.9995]]
predicted_interval += predicted_loc
predicted_interval = np.array(predicted_interval)
predicted_interval = np.reshape(predicted_interval, (int(predicted_interval.size / 7), 7), order='C')
predicted_interval = pd.DataFrame(predicted_interval, columns=('0.05%', '0.5%', '5.%',
'mean',
'95.%', '99.5%', '99.95%'))
print('predicted_interval:', predicted_interval)
predicted_interval['x'] = x_data_shared.get_value()#x_for_interp
return (predicted_interval, predicted)
Error:
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In [148], line 42
37 bayes_reg.limit_y = LSL_table.loc[idx, ].iloc[0, 0]
39 bayes_reg.fit(df_reg[test_for_reg[0]],
40 df_reg[test_for_reg[1]],
41 Intercept = True)
---> 42 bayes_reg.credible_interval()
43 bayes_reg.credible_interval_loc([bayes_reg.limit_x])
44 #bayes_reg.distribution_plot_at_x()
Cell In [124], line 25, in release_test_correlation.credible_interval(self, nb_points)
23 self.x_data_shared.set_value(x_for_interp)
24 self.y_data_shared.set_value(np.zeros_like(x_for_interp))
---> 25 self.credible_interval_result, self.sampled_posterior_CI = credible_interval_calculation(self.x_data_shared,
26 self.trace_linear, self.linear_model)
Cell In [147], line 3, in credible_interval_calculation(x_data_shared, trace_linear, linear_model)
1 def credible_interval_calculation(x_data_shared,trace_linear,linear_model):
2 n_samples=1000
----> 3 ppc_CI = pm.sample_posterior_predictive(trace_linear,
4 model=linear_model,
5 progressbar=False,keep_size=False)
6 predicted = ppc_CI.posterior_predictive['y']
7 print(predicted.shape)
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\sampling.py:1983, in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
1981 converter.nchains = nchain
1982 converter.ndraws = len_trace
-> 1983 idata_pp = converter.to_inference_data()
1984 if extend_inferencedata:
1985 trace.extend(idata_pp)
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:520, in InferenceDataConverter.to_inference_data(self)
509 def to_inference_data(self):
510 """Convert all available data to an InferenceData object.
511
512 Note that if groups can not be created (e.g., there is no `trace`, so
513 the `posterior` and `sample_stats` can not be extracted), then the InferenceData
514 will not have those groups.
515 """
516 id_dict = {
517 "posterior": self.posterior_to_xarray(),
518 "sample_stats": self.sample_stats_to_xarray(),
519 "log_likelihood": self.log_likelihood_to_xarray(),
--> 520 "posterior_predictive": self.posterior_predictive_to_xarray(),
521 "predictions": self.predictions_to_xarray(),
522 **self.priors_to_xarray(),
523 "observed_data": self.observed_data_to_xarray(),
524 }
525 if self.predictions:
526 id_dict["predictions_constant_data"] = self.constant_data_to_xarray()
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:65, in requires.__call__.<locals>.wrapped(cls)
63 if all((getattr(cls, prop_i) is None for prop_i in prop)):
64 return None
---> 65 return func(cls)
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:444, in InferenceDataConverter.posterior_predictive_to_xarray(self)
441 @requires(["posterior_predictive"])
442 def posterior_predictive_to_xarray(self):
443 """Convert posterior_predictive samples to xarray."""
--> 444 return self.translate_posterior_predictive_dict_to_xarray(
445 self.posterior_predictive, "posterior_predictive"
446 )
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:439, in InferenceDataConverter.translate_posterior_predictive_dict_to_xarray(self, dct, kind)
432 if warning_vars:
433 warnings.warn(
434 f"The shape of variables {', '.join(warning_vars)} in {kind} group is not compatible "
435 "with number of chains and draws. The automatic dimension naming might not have worked. "
436 "This can also mean that some draws or even whole chains are not represented.",
437 UserWarning,
438 )
--> 439 return dict_to_dataset(data, library=pymc, coords=self.coords, dims=self.dims)
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
305 data_vars = {}
306 for key, values in data.items():
--> 307 data_vars[key] = numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))
File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
252 # filter coords based on the dims
253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
--> 254 return xr.DataArray(ary, coords=coords, dims=dims)
...
169 f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} "
170 "matching the dimension size"
171 )
ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 2 on coordinate 'chain'
<Figure size 700x700 with 0 Axes>
Hi,
I got a similar (although slightly differnt) error when I tried to escape the ValueError: conflicting sizes for dimension 'obs_id': length 1650 on the data but length 300 on coordinate 'obs_id'
and set the keep size parameter to False to tell the model that the size of the ‘obs_id’ coord has changed pm.sample_posterior_predictive(keep_size=False)
.
The error message for me says:
/projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:433: UserWarning: The shape of variables sigma_ff, log_morphscale, pred_err_scale, response in predictions group is not compatible with number of chains and draws. The automatic dimension naming might not have worked. This can also mean that some draws or even whole chains are not represented.
warnings.warn(
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [114], in <cell line: 5>()
13 pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id_2})
15 # update values of predictors:
16 #pm.set_data({"pred": predictors_out_of_sample})
17 # use the updated values and predict outcomes and probabilities:
---> 18 t = pm.sample_posterior_predictive(
19 t,
20 var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
21 return_inferencedata=True,
22 predictions=True,
23 extend_inferencedata=True,
24 random_seed=rng,
25 keep_size=False
26 )
27 exp_prederr_model_dict["oos_highf0_highff"] = t
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/sampling.py:1983, in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
1981 ikwargs.setdefault("idata_orig", trace)
1982 ikwargs.setdefault("inplace", True)
-> 1983 return pm.predictions_to_inference_data(ppc_trace, **ikwargs)
1984 converter = pm.backends.arviz.InferenceDataConverter(posterior_predictive=ppc_trace, **ikwargs)
1985 converter.nchains = nchain
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:656, in predictions_to_inference_data(predictions, posterior_trace, model, coords, dims, idata_orig, inplace)
654 aelem = next(iter(predictions.values()))
655 converter.nchains, converter.ndraws = aelem.shape[:2]
--> 656 new_idata = converter.to_inference_data()
657 if idata_orig is None:
658 return new_idata
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:521, in InferenceDataConverter.to_inference_data(self)
509 def to_inference_data(self):
510 """Convert all available data to an InferenceData object.
511
512 Note that if groups can not be created (e.g., there is no `trace`, so
513 the `posterior` and `sample_stats` can not be extracted), then the InferenceData
514 will not have those groups.
515 """
516 id_dict = {
517 "posterior": self.posterior_to_xarray(),
518 "sample_stats": self.sample_stats_to_xarray(),
519 "log_likelihood": self.log_likelihood_to_xarray(),
520 "posterior_predictive": self.posterior_predictive_to_xarray(),
--> 521 "predictions": self.predictions_to_xarray(),
522 **self.priors_to_xarray(),
523 "observed_data": self.observed_data_to_xarray(),
524 }
525 if self.predictions:
526 id_dict["predictions_constant_data"] = self.constant_data_to_xarray()
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:65, in requires.__call__.<locals>.wrapped(cls)
63 if all((getattr(cls, prop_i) is None for prop_i in prop)):
64 return None
---> 65 return func(cls)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:451, in InferenceDataConverter.predictions_to_xarray(self)
448 @requires(["predictions"])
449 def predictions_to_xarray(self):
450 """Convert predictions (out of sample predictions) to xarray."""
--> 451 return self.translate_posterior_predictive_dict_to_xarray(self.predictions, "predictions")
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:439, in InferenceDataConverter.translate_posterior_predictive_dict_to_xarray(self, dct, kind)
432 if warning_vars:
433 warnings.warn(
434 f"The shape of variables {', '.join(warning_vars)} in {kind} group is not compatible "
435 "with number of chains and draws. The automatic dimension naming might not have worked. "
436 "This can also mean that some draws or even whole chains are not represented.",
437 UserWarning,
438 )
--> 439 return dict_to_dataset(data, library=pymc, coords=self.coords, dims=self.dims)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
305 data_vars = {}
306 for key, values in data.items():
--> 307 data_vars[key] = numpy_to_data_array(
308 values,
309 var_name=key,
310 coords=coords,
311 dims=dims.get(key),
312 default_dims=default_dims,
313 index_origin=index_origin,
314 skip_event_dims=skip_event_dims,
315 )
316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
252 # filter coords based on the dims
253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
--> 254 return xr.DataArray(ary, coords=coords, dims=dims)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:412, in DataArray.__init__(self, data, coords, dims, name, attrs, indexes, fastpath)
410 data = _check_data_shape(data, coords, dims)
411 data = as_compatible_data(data)
--> 412 coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
413 variable = Variable(dims, data, attrs, fastpath=True)
414 indexes, coords = _create_indexes_from_coords(coords)
File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:160, in _infer_coords_and_dims(shape, coords, dims)
158 for d, s in zip(v.dims, v.shape):
159 if s != sizes[d]:
--> 160 raise ValueError(
161 f"conflicting sizes for dimension {d!r}: "
162 f"length {sizes[d]} on the data but length {s} on "
163 f"coordinate {k!r}"
164 )
166 if k in sizes and v.shape != (sizes[k],):
167 raise ValueError(
168 f"coordinate {k!r} is a DataArray dimension, but "
169 f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} "
170 "matching the dimension size"
171 )
ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 4 on coordinate 'chain'
with the following code and model:
obs_id = np.arange(data_df.shape[0])
with pm.Model() as pred_err_model:
pred_err_model.add_coord('obs_id',obs_id, mutable = True)
context_f0 = pm.MutableData('context_f0', data_df.context_f0, dims='obs_id')
context_ff = pm.MutableData('context_ff', data_df.context_ff, dims='obs_id')
stimulus_ff = pm.MutableData('stimulus_ff', data_df.ffb, dims='obs_id')
stimulus_f0 = pm.MutableData('stimulus_f0', data_df.f0b, dims='obs_id')
morph = pm.MutableData('morph', data_df.morph_scaled, dims='obs_id')
nr_o = pm.MutableData('nr_o', data_df.nr_o, dims='obs_id')
nr = pm.MutableData('nr', data_df.nr, dims='obs_id')
pop_intercept = pm.MutableData('pop_intercept', all_intercept)
pop_slope = pm.MutableData('pop_slope', all_slope)
pop_var = pm.MutableData('pop_var', all_var)
sigma_sq_ff = pm.HalfNormal('sigma_ff',sigma=100)
pred_err_scale = pm.Normal('pred_err_scale', mu=0, sigma=2)
log_morphscale = pm.Normal('log_morphscale', mu=0, sigma=3)
context_ff_from_f0_prediction = pop_intercept + pop_slope * context_f0
stimulus_ff_from_f0_prediction = pop_intercept + pop_slope * stimulus_f0
learned_ff_prediction_error = pm.Deterministic('learned_prediction_error', context_ff - context_ff_from_f0_prediction, dims='obs_id')
context_biased_ff_prediction = pm.Deterministic('context_biased_ff_prediction', stimulus_ff_from_f0_prediction + learned_ff_prediction_error * pred_err_scale, dims='obs_id')
s = pm.math.sqrt(1 / (
(1 / pop_var) +
(1 / sigma_sq_ff) )
)
m = ((context_biased_ff_prediction*(1/pop_var)) + (stimulus_ff*(1/sigma_sq_ff))) / ((1/pop_var) + (1 / sigma_sq_ff))
combined = pm.Normal('combined', mu=m,sigma=s, dims='obs_id')
x = pm.Deterministic('x', pm.math.exp(log_morphscale) * morph + stimulus_ff, dims='obs_id')
p = pm.Deterministic('prob', pm.math.exp(pm.logcdf(combined, x)), dims='obs_id')
n_os = pm.Binomial('response',n=nr, p=p, observed = nr_o, dims='obs_id')
# out of sample predictions for response given all other variables
obs_id = np.arange(oos_df.shape[0])
with pred_err_model:
pred_err_model.set_data('context_f0', oos_df['context_f0'], coords = {'obs_id': obs_id})
pred_err_model.set_data('context_ff', oos_df.context_ff, coords = {'obs_id': obs_id})
pred_err_model.set_data('stimulus_ff',oos_df.ffb, coords = {'obs_id': obs_id})
pred_err_model.set_data('stimulus_f0',oos_df.f0b, coords = {'obs_id': obs_id})
pred_err_model.set_data('morph', oos_df.morph_scaled, coords = {'obs_id': obs_id})
pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id})
t = pm.sample_posterior_predictive(
t,
var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
return_inferencedata=True,
predictions=True,
extend_inferencedata=True,
random_seed=rng,
keep_size=False
)
Try not using the samples
nor the keep_size
arguments. One of the screenshots has keep_size=False
which is using the argument
I removed both the tags. But the question is now, how I will control the sample size from the trace in posterior prediction. And similarly how we will do it when have more chains.
You should get as many posterior predictive samples as posterior samples you have.
If this were not possible due to time or memory constraints, then you can generate samples for a subset of posterior samples as shown in the docs: pymc.sample_posterior_predictive — PyMC 0+untagged.345.g2bd0611.dirty documentation (example at the bottom)