Hello,
I am trying to model the price of properties by block, zone, municipality and state. As it sounds, there is a natural hierarchy where block should follow the mean of the zone, the zone the mean of the municipality and so on. I have strong arguments to model the data like this.
I model the prices as follows:
# multi hierarchical model with cvegeo as the finest level, id_sepomex as the second finest level, cve_mun as the third finest level, and cve_ent as the coarsest level
with pm.Model(
coords_mutable={
'cveent': df_work['cveent_index'].unique(),
'cvemun': df_work['cvemun_index'].unique(),
'sepomex': df_work['sepomex_index'].unique(),
'cvegeo': df_work['cvegeo_index'].unique(),
'obs_id': df_work.index
}
) as hierarchical_model:
# Data
idx_cveent = pm.MutableData('idx_cveent', df_work['cveent_index'].values)
idx_cvemun = pm.MutableData('idx_cvemun', df_work['cvemun_index'].values)
idx_sepomex = pm.MutableData('idx_sepomex', df_work['sepomex_index'].values)
idx_cvegeo = pm.MutableData('idx_cvegeo', df_work['cvegeo_index'].values)
# Hyperpriors
# coarsest level
mu_cve_ent = pm.Normal('mu_cve_ent', mu=0, sigma=1, dims='cveent')
sigma_cve_ent = pm.Exponential('sigma_cve_ent', 10, dims='cveent')
# third finest level
mu_cve_mun = pm.Normal('mu_cve_mun', mu=mu_cve_ent[idx_cveent], sigma=sigma_cve_ent[idx_cveent], dims='cvemun')
sigma_cve_mun = pm.Exponential('sigma_cve_mun', 10, dims='cvemun')
# second finest level
mu_sepomex = pm.Normal('mu_sepomex', mu=mu_cve_mun[idx_cvemun], sigma=sigma_cve_mun[idx_cvemun], dims='sepomex')
sigma_sepomex = pm.Exponential('sigma_sepomex', 10, dims='sepomex')
# finest level
mu_cvegeo = pm.Normal('mu_cvegeo', mu=mu_sepomex[idx_sepomex], sigma=sigma_sepomex[idx_sepomex], dims='cvegeo')
# Priors
sigma_price = pm.Exponential('sigma_price', 100, dims='obs_id')
mu_price = pm.Deterministic('mu_price', mu_cvegeo[idx_cvegeo], dims='obs_id')
# Likelihood
price = pm.LogNormal('price', mu=mu_price, sigma=sigma_price, observed=df_work['price'], dims='obs_id')
However, when trying to sample from the prior I get the following error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
968 try:
969 outputs = (
--> 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/graph/op.py:543, in Op.make_py_thunk.<locals>.rval(p, i, o, n, params)
539 @is_thunk_type
540 def rval(
541 p=p, i=node_input_storage, o=node_output_storage, n=node, params=None
542 ):
--> 543 r = p(n, [x[0] for x in i], o)
544 for o in node.outputs:
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:378, in RandomVariable.perform(self, node, inputs, outputs)
376 rng_var_out[0] = rng
--> 378 smpl_val = self.rng_fn(rng, *(args + [size]))
380 if (
381 not isinstance(smpl_val, np.ndarray)
382 or str(smpl_val.dtype) != out_var.type.dtype
383 ):
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:164, in RandomVariable.rng_fn(self, rng, *args, **kwargs)
163 """Sample a numeric random variate."""
--> 164 return getattr(rng, self.name)(*args, **kwargs)
File _generator.pyx:1220, in numpy.random._generator.Generator.normal()
File _common.pyx:600, in numpy.random._common.cont()
File _common.pyx:517, in numpy.random._common.cont_broadcast_2()
File __init__.pxd:738, in numpy.PyArray_MultiIterNew3()
ValueError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (3,) and arg 1 with shape (2530,).
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[13], line 3
1 with hierarchical_model:
2 # Sample prior
----> 3 prior = pm.sample_prior_predictive(100, random_seed=3)
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/forward.py:425, in sample_prior_predictive(samples, model, var_names, random_seed, return_inferencedata, idata_kwargs, compile_kwargs)
423 # All model variables have a name, but mypy does not know this
424 _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}") # type: ignore
--> 425 values = zip(*(sampler_fn() for i in range(samples)))
427 data = {k: np.stack(v) for k, v in zip(names, values)}
428 if data is None:
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/forward.py:425, in <genexpr>(.0)
423 # All model variables have a name, but mypy does not know this
424 _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}") # type: ignore
--> 425 values = zip(*(sampler_fn() for i in range(samples)))
427 data = {k: np.stack(v) for k, v in zip(names, values)}
428 if data is None:
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/compile/function/types.py:983, in Function.__call__(self, *args, **kwargs)
981 if hasattr(self.vm, "thunks"):
982 thunk = self.vm.thunks[self.vm.position_of_error]
--> 983 raise_with_op(
984 self.maker.fgraph,
985 node=self.vm.nodes[self.vm.position_of_error],
986 thunk=thunk,
987 storage_map=getattr(self.vm, "storage_map", None),
988 )
989 else:
990 # old-style linkers raise their own exceptions
991 raise
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/link/utils.py:535, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
530 warnings.warn(
531 f"{exc_type} error does not allow us to add an extra error message"
532 )
533 # Some exception need extra parameter in inputs. So forget the
534 # extra long error message in that case.
--> 535 raise exc_value.with_traceback(exc_trace)
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
967 t0_fn = time.perf_counter()
968 try:
969 outputs = (
--> 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
975 restore_defaults()
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/graph/op.py:543, in Op.make_py_thunk.<locals>.rval(p, i, o, n, params)
539 @is_thunk_type
540 def rval(
541 p=p, i=node_input_storage, o=node_output_storage, n=node, params=None
542 ):
--> 543 r = p(n, [x[0] for x in i], o)
544 for o in node.outputs:
545 compute_map[o][0] = True
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:378, in RandomVariable.perform(self, node, inputs, outputs)
374 rng = copy(rng)
376 rng_var_out[0] = rng
--> 378 smpl_val = self.rng_fn(rng, *(args + [size]))
380 if (
381 not isinstance(smpl_val, np.ndarray)
382 or str(smpl_val.dtype) != out_var.type.dtype
383 ):
384 smpl_val = _asarray(smpl_val, dtype=out_var.type.dtype)
File ~/opt/anaconda3/envs/pymc_env/lib/python3.11/site-packages/pytensor/tensor/random/op.py:164, in RandomVariable.rng_fn(self, rng, *args, **kwargs)
162 def rng_fn(self, rng, *args, **kwargs) -> Union[int, float, np.ndarray]:
163 """Sample a numeric random variate."""
--> 164 return getattr(rng, self.name)(*args, **kwargs)
File _generator.pyx:1220, in numpy.random._generator.Generator.normal()
File _common.pyx:600, in numpy.random._common.cont()
File _common.pyx:517, in numpy.random._common.cont_broadcast_2()
File __init__.pxd:738, in numpy.PyArray_MultiIterNew3()
ValueError: shape mismatch: objects cannot be broadcast to a single shape. Mismatch is between arg 0 with shape (3,) and arg 1 with shape (2530,).
Apply node that caused the error: normal_rv{0, (0, 0), floatX, True}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x1755A2CE0>), MakeVector{dtype='int64'}.0, 11, AdvancedSubtensor1.0, AdvancedSubtensor1.0)
Toposort index: 6
Inputs types: [RandomGeneratorType, TensorType(int64, shape=(1,)), TensorType(int64, shape=()), TensorType(float64, shape=(None,)), TensorType(float64, shape=(None,))]
Inputs shapes: ['No shapes', (1,), (), (2530,), (2530,)]
Inputs strides: ['No strides', (8,), (), (8,), (8,)]
Inputs values: [Generator(PCG64) at 0x1755A2CE0, array([3]), array(11), 'not shown', 'not shown']
Outputs clients: [['output'], ['output', AdvancedSubtensor1(mu_cve_mun, idx_cvemun)]]
HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.
In my data there is only 1 entity, 3 municipalities, 24 neighborhoods and 701 blocks.
Can someone please help me?
Regards,
RAVJ
PD: this is the graph of my model