Hello all,
I was having a great deal of pain getting some models to run with dimensionality issues, which I was able to fix by turning the observed y values into a numpy array then taking the 0th dimension, as in:
batter_idx, batters = factorize(in_play_velos,'BatterId')
coords = {'batter': batters, 'obs_id': np.arange(in_play_velos.select(pl.col('BatExitVelo')).shape[0]) }
with pm.Model(coords=coords) as model:
EV_prior = pm.Normal('EV_prior',mu=80,sigma=2.5,dims='batter')
sigma = pm.Exponential('sigma',2.5)
y = pm.Normal('y', EV_prior[batter_idx],sigma=sigma, observed =exit_velos[:,0], dims='obs_id')
trace = pm.sample_prior_predictive(1000)
trace.extend(pm.sample(chains=4, cores=4, random_seed=0))
pm.sample_posterior_predictive(trace, extend_inferencedata=True)
But, when I try to get any az. summary information, even with specifying the group and var_names as in [this example](https://discourse.pymc.io/t/arviz-summary-valueerror/12558/3)
, it throws the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~\anaconda3\Lib\site-packages\pandas\core\internals\blocks.py:1429, in Block.setitem(self, indexer, value, using_cow)
1428 try:
-> 1429 values[indexer] = casted
1430 except (TypeError, ValueError) as err:
ValueError: could not broadcast input array from shape (9,2) into shape (9,)
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
Cell In[18], line 1
----> 1 az.summary(trace)
File ~\anaconda3\Lib\site-packages\arviz\stats\stats.py:1509, in summary(data, var_names, filter_vars, group, fmt, kind, round_to, circ_var_names, stat_focus, stat_funcs, extend, hdi_prob, skipna, labeller, coords, index_origin, order)
1505 indices = []
1506 for i, (var_name, sel, isel, values) in enumerate(
1507 xarray_var_iter(joined, skip_dims={"metric"})
1508 ):
-> 1509 summary_df.iloc[i] = values
1510 indices.append(labeller.make_label_flat(var_name, sel, isel))
1511 summary_df.index = indices
File ~\anaconda3\Lib\site-packages\pandas\core\indexing.py:911, in _LocationIndexer.__setitem__(self, key, value)
908 self._has_valid_setitem_indexer(key)
910 iloc = self if self.name == "iloc" else self.obj.iloc
--> 911 iloc._setitem_with_indexer(indexer, value, self.name)
File ~\anaconda3\Lib\site-packages\pandas\core\indexing.py:1944, in _iLocIndexer._setitem_with_indexer(self, indexer, value, name)
1942 self._setitem_with_indexer_split_path(indexer, value, name)
1943 else:
-> 1944 self._setitem_single_block(indexer, value, name)
File ~\anaconda3\Lib\site-packages\pandas\core\indexing.py:2218, in _iLocIndexer._setitem_single_block(self, indexer, value, name)
2215 self.obj._check_is_chained_assignment_possible()
2217 # actually do the set
-> 2218 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
2219 self.obj._maybe_update_cacher(clear=True, inplace=True)
File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:415, in BaseBlockManager.setitem(self, indexer, value, warn)
411 # No need to split if we either set all columns or on a single block
412 # manager
413 self = self.copy()
--> 415 return self.apply("setitem", indexer=indexer, value=value)
File ~\anaconda3\Lib\site-packages\pandas\core\internals\managers.py:363, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
361 applied = b.apply(f, **kwargs)
362 else:
--> 363 applied = getattr(b, f)(**kwargs)
364 result_blocks = extend_blocks(applied, result_blocks)
366 out = type(self).from_blocks(result_blocks, self.axes)
File ~\anaconda3\Lib\site-packages\pandas\core\internals\blocks.py:1432, in Block.setitem(self, indexer, value, using_cow)
1430 except (TypeError, ValueError) as err:
1431 if is_list_like(casted):
-> 1432 raise ValueError(
1433 "setting an array element with a sequence."
1434 ) from err
1435 raise
1436 return self
ValueError: setting an array element with a sequence.
I’m not sure if I actually fixed the dimensionality problems from my model or not! I am essentially trying to get posteriors for each batter based on differing lengths of observations aggregated as the y_observed. I have read all about data containers and dimensionality and so on but I’m still extremely stuck.