ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 2 on coordinate 'chain'

I am trying to use sample_posterior_predictive but getting this error. As per the threads I should remove samples and keep_size which I did. But i didn’t thinned any data from trace for sample posterior predictive. Not able to understand the thinned concept.

image

Can you provide the code you are using and the (full) error messages you are getting?

Please find the full error:



Can you dump the raw text? It’s hard to run the code from screenshots.

Please find the code

def bayesian_lin_reg_fit(x_data_shared,y_data_shared,display_plot=True,Intercept=False):
    
    with pm.Model() as linear_model:
        sigma = pm.Uniform('Sigma',0,100)
        intercept = pm.Uniform('Intercept',0,1200)
        x_coeff = pm.Uniform('Slope',-10,10)
        if(Intercept==True):
            yhat = pm.math.dot(x_data_shared,x_coeff)+intercept
        else:
            yhat = pm.math.dot(x_data_shared,x_coeff)
        #Likelihood
        likelihood = pm.Normal('y',yhat,sigma,observed=y_data_shared)
        print(likelihood)
        

        #Inference
        trace_linear = pm.sample(500,cores=1,init="auto",tune=500,progressbar=True)
        
        if(display_plot):
            plt.figure(figsize=(7,7))
            # traceplot(trace_linear[100:])
            plt.tight_layout()

        print(pm.summary(trace_linear))
        return linear_model,trace_linear

def credible_interval_calculation(x_data_shared,trace_linear,linear_model):
    n_samples=1000
    ppc_CI = pm.sample_posterior_predictive(trace_linear,
                                            model=linear_model,
                                            progressbar=False,keep_size=False)
    predicted = ppc_CI.posterior_predictive['y']
    print(predicted.shape)
    nb_point = x_data_shared.get_value().shape[0]
    print(nb_point)
    predicted = np.reshape(predicted, (n_samples, nb_point))    
    predicted_interval = []
    for col in predicted.T:
        predicted_loc = [np.quantile(col, x) for x in [0.0005, 0.005, 0.05]] + \
                        [np.mean(col)] + \
                        [np.quantile(col, x) for x in [0.95, 0.995, 0.9995]]
        predicted_interval += predicted_loc
        
    predicted_interval = np.array(predicted_interval)
    predicted_interval = np.reshape(predicted_interval, (int(predicted_interval.size / 7), 7), order='C')
    predicted_interval = pd.DataFrame(predicted_interval, columns=('0.05%', '0.5%', '5.%',
                                                                   'mean',
                                                                   '95.%', '99.5%', '99.95%'))
    print('predicted_interval:', predicted_interval)
    predicted_interval['x'] = x_data_shared.get_value()#x_for_interp

    return (predicted_interval, predicted)

Error:

Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In [148], line 42
     37 bayes_reg.limit_y = LSL_table.loc[idx, ].iloc[0, 0] 
     39 bayes_reg.fit(df_reg[test_for_reg[0]], 
     40               df_reg[test_for_reg[1]],
     41               Intercept = True)
---> 42 bayes_reg.credible_interval()
     43 bayes_reg.credible_interval_loc([bayes_reg.limit_x])
     44 #bayes_reg.distribution_plot_at_x()

Cell In [124], line 25, in release_test_correlation.credible_interval(self, nb_points)
     23 self.x_data_shared.set_value(x_for_interp)
     24 self.y_data_shared.set_value(np.zeros_like(x_for_interp))
---> 25 self.credible_interval_result, self.sampled_posterior_CI = credible_interval_calculation(self.x_data_shared, 
     26                                                               self.trace_linear, self.linear_model)

Cell In [147], line 3, in credible_interval_calculation(x_data_shared, trace_linear, linear_model)
      1 def credible_interval_calculation(x_data_shared,trace_linear,linear_model):
      2     n_samples=1000
----> 3     ppc_CI = pm.sample_posterior_predictive(trace_linear,
      4                                             model=linear_model,
      5                                             progressbar=False,keep_size=False)
      6     predicted = ppc_CI.posterior_predictive['y']
      7     print(predicted.shape)

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\sampling.py:1983, in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
   1981 converter.nchains = nchain
   1982 converter.ndraws = len_trace
-> 1983 idata_pp = converter.to_inference_data()
   1984 if extend_inferencedata:
   1985     trace.extend(idata_pp)

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:520, in InferenceDataConverter.to_inference_data(self)
    509 def to_inference_data(self):
    510     """Convert all available data to an InferenceData object.
    511 
    512     Note that if groups can not be created (e.g., there is no `trace`, so
    513     the `posterior` and `sample_stats` can not be extracted), then the InferenceData
    514     will not have those groups.
    515     """
    516     id_dict = {
    517         "posterior": self.posterior_to_xarray(),
    518         "sample_stats": self.sample_stats_to_xarray(),
    519         "log_likelihood": self.log_likelihood_to_xarray(),
--> 520         "posterior_predictive": self.posterior_predictive_to_xarray(),
    521         "predictions": self.predictions_to_xarray(),
    522         **self.priors_to_xarray(),
    523         "observed_data": self.observed_data_to_xarray(),
    524     }
    525     if self.predictions:
    526         id_dict["predictions_constant_data"] = self.constant_data_to_xarray()

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:65, in requires.__call__.<locals>.wrapped(cls)
     63     if all((getattr(cls, prop_i) is None for prop_i in prop)):
     64         return None
---> 65 return func(cls)

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:444, in InferenceDataConverter.posterior_predictive_to_xarray(self)
    441 @requires(["posterior_predictive"])
    442 def posterior_predictive_to_xarray(self):
    443     """Convert posterior_predictive samples to xarray."""
--> 444     return self.translate_posterior_predictive_dict_to_xarray(
    445         self.posterior_predictive, "posterior_predictive"
    446     )

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\pymc\backends\arviz.py:439, in InferenceDataConverter.translate_posterior_predictive_dict_to_xarray(self, dct, kind)
    432 if warning_vars:
    433     warnings.warn(
    434         f"The shape of variables {', '.join(warning_vars)} in {kind} group is not compatible "
    435         "with number of chains and draws. The automatic dimension naming might not have worked. "
    436         "This can also mean that some draws or even whole chains are not represented.",
    437         UserWarning,
    438     )
--> 439 return dict_to_dataset(data, library=pymc, coords=self.coords, dims=self.dims)

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
    305 data_vars = {}
    306 for key, values in data.items():
--> 307     data_vars[key] = numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File c:\ProgramData\MiniforgeEnvs\pymc_env\lib\site-packages\arviz\data\base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
    252 # filter coords based on the dims
    253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
--> 254 return xr.DataArray(ary, coords=coords, dims=dims)
...
    169         f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} "
    170         "matching the dimension size"
    171     )

ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 2 on coordinate 'chain'
<Figure size 700x700 with 0 Axes>

Hi,
I got a similar (although slightly differnt) error when I tried to escape the ValueError: conflicting sizes for dimension 'obs_id': length 1650 on the data but length 300 on coordinate 'obs_id' and set the keep size parameter to False to tell the model that the size of the ‘obs_id’ coord has changed pm.sample_posterior_predictive(keep_size=False).

The error message for me says:

/projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:433: UserWarning: The shape of variables sigma_ff, log_morphscale, pred_err_scale, response in predictions group is not compatible with number of chains and draws. The automatic dimension naming might not have worked. This can also mean that some draws or even whole chains are not represented.
  warnings.warn(
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [114], in <cell line: 5>()
     13 pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id_2})
     15 # update values of predictors:
     16 #pm.set_data({"pred": predictors_out_of_sample})
     17 # use the updated values and predict outcomes and probabilities:
---> 18 t = pm.sample_posterior_predictive(
     19     t,
     20     var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
     21     return_inferencedata=True,
     22     predictions=True,
     23     extend_inferencedata=True,
     24     random_seed=rng,
     25     keep_size=False
     26 )
     27 exp_prederr_model_dict["oos_highf0_highff"] = t

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/sampling.py:1983, in sample_posterior_predictive(trace, samples, model, var_names, keep_size, random_seed, progressbar, return_inferencedata, extend_inferencedata, predictions, idata_kwargs, compile_kwargs)
   1981         ikwargs.setdefault("idata_orig", trace)
   1982         ikwargs.setdefault("inplace", True)
-> 1983     return pm.predictions_to_inference_data(ppc_trace, **ikwargs)
   1984 converter = pm.backends.arviz.InferenceDataConverter(posterior_predictive=ppc_trace, **ikwargs)
   1985 converter.nchains = nchain

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:656, in predictions_to_inference_data(predictions, posterior_trace, model, coords, dims, idata_orig, inplace)
    654     aelem = next(iter(predictions.values()))
    655     converter.nchains, converter.ndraws = aelem.shape[:2]
--> 656 new_idata = converter.to_inference_data()
    657 if idata_orig is None:
    658     return new_idata

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:521, in InferenceDataConverter.to_inference_data(self)
    509 def to_inference_data(self):
    510     """Convert all available data to an InferenceData object.
    511 
    512     Note that if groups can not be created (e.g., there is no `trace`, so
    513     the `posterior` and `sample_stats` can not be extracted), then the InferenceData
    514     will not have those groups.
    515     """
    516     id_dict = {
    517         "posterior": self.posterior_to_xarray(),
    518         "sample_stats": self.sample_stats_to_xarray(),
    519         "log_likelihood": self.log_likelihood_to_xarray(),
    520         "posterior_predictive": self.posterior_predictive_to_xarray(),
--> 521         "predictions": self.predictions_to_xarray(),
    522         **self.priors_to_xarray(),
    523         "observed_data": self.observed_data_to_xarray(),
    524     }
    525     if self.predictions:
    526         id_dict["predictions_constant_data"] = self.constant_data_to_xarray()

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:65, in requires.__call__.<locals>.wrapped(cls)
     63     if all((getattr(cls, prop_i) is None for prop_i in prop)):
     64         return None
---> 65 return func(cls)

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:451, in InferenceDataConverter.predictions_to_xarray(self)
    448 @requires(["predictions"])
    449 def predictions_to_xarray(self):
    450     """Convert predictions (out of sample predictions) to xarray."""
--> 451     return self.translate_posterior_predictive_dict_to_xarray(self.predictions, "predictions")

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/pymc/backends/arviz.py:439, in InferenceDataConverter.translate_posterior_predictive_dict_to_xarray(self, dct, kind)
    432 if warning_vars:
    433     warnings.warn(
    434         f"The shape of variables {', '.join(warning_vars)} in {kind} group is not compatible "
    435         "with number of chains and draws. The automatic dimension naming might not have worked. "
    436         "This can also mean that some draws or even whole chains are not represented.",
    437         UserWarning,
    438     )
--> 439 return dict_to_dataset(data, library=pymc, coords=self.coords, dims=self.dims)

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:307, in dict_to_dataset(data, attrs, library, coords, dims, default_dims, index_origin, skip_event_dims)
    305 data_vars = {}
    306 for key, values in data.items():
--> 307     data_vars[key] = numpy_to_data_array(
    308         values,
    309         var_name=key,
    310         coords=coords,
    311         dims=dims.get(key),
    312         default_dims=default_dims,
    313         index_origin=index_origin,
    314         skip_event_dims=skip_event_dims,
    315     )
    316 return xr.Dataset(data_vars=data_vars, attrs=make_attrs(attrs=attrs, library=library))

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/arviz/data/base.py:254, in numpy_to_data_array(ary, var_name, coords, dims, default_dims, index_origin, skip_event_dims)
    252 # filter coords based on the dims
    253 coords = {key: xr.IndexVariable((key,), data=np.asarray(coords[key])) for key in dims}
--> 254 return xr.DataArray(ary, coords=coords, dims=dims)

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:412, in DataArray.__init__(self, data, coords, dims, name, attrs, indexes, fastpath)
    410 data = _check_data_shape(data, coords, dims)
    411 data = as_compatible_data(data)
--> 412 coords, dims = _infer_coords_and_dims(data.shape, coords, dims)
    413 variable = Variable(dims, data, attrs, fastpath=True)
    414 indexes, coords = _create_indexes_from_coords(coords)

File /projects/crunchie/miniconda3/envs/pymc_env/lib/python3.10/site-packages/xarray/core/dataarray.py:160, in _infer_coords_and_dims(shape, coords, dims)
    158 for d, s in zip(v.dims, v.shape):
    159     if s != sizes[d]:
--> 160         raise ValueError(
    161             f"conflicting sizes for dimension {d!r}: "
    162             f"length {sizes[d]} on the data but length {s} on "
    163             f"coordinate {k!r}"
    164         )
    166 if k in sizes and v.shape != (sizes[k],):
    167     raise ValueError(
    168         f"coordinate {k!r} is a DataArray dimension, but "
    169         f"it has shape {v.shape!r} rather than expected shape {sizes[k]!r} "
    170         "matching the dimension size"
    171     )

ValueError: conflicting sizes for dimension 'chain': length 1 on the data but length 4 on coordinate 'chain'

with the following code and model:

obs_id = np.arange(data_df.shape[0])

with pm.Model() as pred_err_model:
    pred_err_model.add_coord('obs_id',obs_id, mutable = True)
    
    context_f0 = pm.MutableData('context_f0', data_df.context_f0, dims='obs_id')
    context_ff = pm.MutableData('context_ff', data_df.context_ff, dims='obs_id')
    stimulus_ff = pm.MutableData('stimulus_ff', data_df.ffb, dims='obs_id')
    stimulus_f0 = pm.MutableData('stimulus_f0', data_df.f0b, dims='obs_id')
    morph = pm.MutableData('morph', data_df.morph_scaled, dims='obs_id')
    nr_o = pm.MutableData('nr_o', data_df.nr_o, dims='obs_id')
    nr = pm.MutableData('nr', data_df.nr, dims='obs_id')
    
    pop_intercept = pm.MutableData('pop_intercept', all_intercept)
    pop_slope = pm.MutableData('pop_slope', all_slope)
    pop_var = pm.MutableData('pop_var', all_var)
    
    sigma_sq_ff = pm.HalfNormal('sigma_ff',sigma=100)
    pred_err_scale = pm.Normal('pred_err_scale', mu=0, sigma=2)
    log_morphscale = pm.Normal('log_morphscale', mu=0, sigma=3)
    
    context_ff_from_f0_prediction = pop_intercept + pop_slope * context_f0
    stimulus_ff_from_f0_prediction = pop_intercept + pop_slope * stimulus_f0
    
    learned_ff_prediction_error = pm.Deterministic('learned_prediction_error', context_ff - context_ff_from_f0_prediction, dims='obs_id')
    
    context_biased_ff_prediction = pm.Deterministic('context_biased_ff_prediction', stimulus_ff_from_f0_prediction + learned_ff_prediction_error * pred_err_scale, dims='obs_id')

    s = pm.math.sqrt(1 / (
        (1 / pop_var) + 
        (1 / sigma_sq_ff)  )
                        )
    m = ((context_biased_ff_prediction*(1/pop_var))  + (stimulus_ff*(1/sigma_sq_ff))) / ((1/pop_var) + (1 / sigma_sq_ff))

    combined = pm.Normal('combined', mu=m,sigma=s, dims='obs_id')
    
    x = pm.Deterministic('x', pm.math.exp(log_morphscale) * morph + stimulus_ff, dims='obs_id')

    p = pm.Deterministic('prob', pm.math.exp(pm.logcdf(combined, x)), dims='obs_id')
    

    n_os = pm.Binomial('response',n=nr, p=p, observed = nr_o, dims='obs_id')


# out of sample predictions for response given all other variables
obs_id = np.arange(oos_df.shape[0])

with pred_err_model:    
    pred_err_model.set_data('context_f0', oos_df['context_f0'], coords = {'obs_id': obs_id})
    pred_err_model.set_data('context_ff', oos_df.context_ff, coords = {'obs_id': obs_id})
    pred_err_model.set_data('stimulus_ff',oos_df.ffb, coords = {'obs_id': obs_id})
    pred_err_model.set_data('stimulus_f0',oos_df.f0b, coords = {'obs_id': obs_id})
    pred_err_model.set_data('morph', oos_df.morph_scaled, coords = {'obs_id': obs_id})
    pred_err_model.set_data('nr', oos_df.nr, coords = {'obs_id': obs_id})
                 
    t = pm.sample_posterior_predictive(
        t,
        var_names=["sigma_ff", "log_morphscale","pred_err_scale", "response"],
        return_inferencedata=True,
        predictions=True,
        extend_inferencedata=True,
        random_seed=rng,
        keep_size=False
    )

Try not using the samples nor the keep_size arguments. One of the screenshots has keep_size=False which is using the argument

I removed both the tags. But the question is now, how I will control the sample size from the trace in posterior prediction. And similarly how we will do it when have more chains.

You should get as many posterior predictive samples as posterior samples you have.

If this were not possible due to time or memory constraints, then you can generate samples for a subset of posterior samples as shown in the docs: pymc.sample_posterior_predictive — PyMC 0+untagged.345.g2bd0611.dirty documentation (example at the bottom)