Lkjcorr returning error

Hi all,

I am manually creating a covariance matrix using similar methods discussed here because I have grouped-level standard deviations, and therefore, cannot use pm.LKJCholeskyCov as this can only accept .dist() for the sd_dist argument.

I can see there is already an issue raised. Is there any quick workaround for the time being? I really do need to use the pm.LKJCorr and I’m not Python-savvy enough to make the changes suggested mentioned on the previous link.

See codes and error messages below. Note that I am getting this error message on an actual scenario I’m working on where I’m using an pm.MvNormal, with the manually computed covariance matrix and observed data.

N = 100
predictors = ["A", "B", "C"]

coords = {
    "obs_id": np.arange(N),
    "predictors": predictors,
}

with pm.Model(coords=coords) as m:
    corr = pm.LKJCorr("corr", eta=1, n=len(predictors))
    idata = pm.sample(tune=1000, draws=1000)
{
	"name": "NotImplementedError",
	"message": "Univariate transform Interval cannot be applied to multivariate lkjcorr_rv{1, (0, 0), floatX, False}",
	"stack": "---------------------------------------------------------------------------
NotImplementedError                       Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 3 line 1
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=14'>15</a> with pm.Model(coords=coords) as m:
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=15'>16</a>     corr = pm.LKJCorr(\"corr\", eta=1, n=len(predictors))
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a>     idata = pm.sample(tune=1000, draws=1000)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:689, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    686         auto_nuts_init = False
    688 initial_points = None
--> 689 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    691 if nuts_sampler != \"pymc\":
    692     if not isinstance(step, NUTS):

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:217, in assign_step_methods(model, step, methods, step_kwargs)
    215 methods_list: List[Type[BlockedStep]] = list(methods or pm.STEP_METHODS)
    216 selected_steps: Dict[Type[BlockedStep], List] = {}
--> 217 model_logp = model.logp()
    219 for var in model.value_vars:
    220     if var not in assigned_vars:
    221         # determine if a gradient can be computed

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:727, in Model.logp(self, vars, jacobian, sum)
    725 rv_logps: List[TensorVariable] = []
    726 if rvs:
--> 727     rv_logps = transformed_conditional_logp(
    728         rvs=rvs,
    729         rvs_to_values=self.rvs_to_values,
    730         rvs_to_transforms=self.rvs_to_transforms,
    731         jacobian=jacobian,
    732     )
    733     assert isinstance(rv_logps, list)
    735 # Replace random variables by their value variables in potential terms

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/logprob/basic.py:611, in transformed_conditional_logp(rvs, rvs_to_values, rvs_to_transforms, jacobian, **kwargs)
    608     transform_rewrite = TransformValuesRewrite(values_to_transforms)  # type: ignore
    610 kwargs.setdefault(\"warn_rvs\", False)
--> 611 temp_logp_terms = conditional_logp(
    612     rvs_to_values,
    613     extra_rewrites=transform_rewrite,
    614     use_jacobian=jacobian,
    615     **kwargs,
    616 )
    618 # The function returns the logp for every single value term we provided to it.
    619 # This includes the extra values we plugged in above, so we filter those we
    620 # actually wanted in the same order they were given in.
    621 logp_terms = {}

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/logprob/basic.py:541, in conditional_logp(rv_values, warn_rvs, ir_rewriter, extra_rewrites, **kwargs)
    538 q_values = remapped_vars[: len(q_values)]
    539 q_rv_inputs = remapped_vars[len(q_values) :]
--> 541 q_logprob_vars = _logprob(
    542     node.op,
    543     q_values,
    544     *q_rv_inputs,
    545     **kwargs,
    546 )
    548 if not isinstance(q_logprob_vars, (list, tuple)):
    549     q_logprob_vars = [q_logprob_vars]

File ~/miniconda3/envs/pymc/lib/python3.11/functools.py:909, in singledispatch.<locals>.wrapper(*args, **kw)
    905 if not args:
    906     raise TypeError(f'{funcname} requires at least '
    907                     '1 positional argument')
--> 909 return dispatch(args[0].__class__)(*args, **kw)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/logprob/transform_value.py:125, in transformed_value_logprob(op, values, use_jacobian, *rv_outs, **kwargs)
    122 # This case is sometimes, but not always, trivial to accomodate depending on the \"space rank\" of the
    123 # multivariate distribution. See https://proceedings.mlr.press/v130/radul21a.html
    124 elif log_jac_det.ndim > logp.ndim:
--> 125     raise NotImplementedError(
    126         f\"Univariate transform {transform} cannot be applied to multivariate {rv_op}\"
    127     )
    128 else:
    129     # Check there is no broadcasting between logp and jacobian
    130     if logp.type.broadcastable != log_jac_det.type.broadcastable:

NotImplementedError: Univariate transform Interval cannot be applied to multivariate lkjcorr_rv{1, (0, 0), floatX, False}"
}

pymc version: 5.10.1

What do you mean?

Asking because you can pass different prior from the same family like sd_dist=pm.Exponential.dist(lam=[lam1, lam2, lam3,...]). Would that fit your need?

Otherwise the easiest solution is to wait or to downgrade pymc a couple of versions when the Interval transform worked fine still.

Example of group-level standard deviations with 2 groups is below. I don’t think a prior would work in my case because sd_dist.ndim == 2

n_groups = 2

coords = {
    "predictors": predictors,
    "predictors_I": predictors,
    "groups": np.arange(n_groups)
}

with pm.Model(coords=coords) as m:
    
    ...

    corr = pm.LKJCorr("corr", n=len(predictors), eta=1, dims=("predictors", "predictors_I"))
    sd_dist = pm.HalfCauchy("sigma", 1, dims=("groups", "predictors"))
    cov = pm.Deterministic("cov", 
                           pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] \
                               for i in range(n_groups)]), 
                           dims=("groups", "predictors", "predictors_I"))



I tried this as a workaround, the pm.sample_prior_predictive() works but not pm.sample() unfortunately…

Here is the code to generate the dummy data in case you do want to try it:

import string

def generate_dummy_data():

    # dummy data:
    rng = np.random.default_rng(seed=88)
    n_predictors = 7
    predictors = [pred.upper() for pred in string.ascii_letters[:n_predictors]]
    N = 100
    n_groups = 2
    group = rng.binomial(n=1, p=.5, size=N)

    # manually make corr matrix
    sigma = pm.HalfCauchy.dist(1, shape=(n_predictors, n_groups)).eval()
    corr_packed = pm.LKJCorr.dist(n=n_predictors, eta=1).eval()
    triu_idx = np.triu_indices(n_predictors, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((n_predictors, n_predictors))[triu_idx], corr_packed)
    corr = pt.eye(n_predictors) + corr_upper + corr_upper.T
    sigma_diag = (pt.eye(n_predictors)[:,:,None] * sigma).T
    cov = pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] for i in range(n_groups)]).eval()
    obs = pm.MvNormal.dist(0, cov=cov[group]).eval()
    return obs

obs = generate_dummy_data()

Run model on dummy data:

coords = {
    "obs_id": np.arange(N),
    "predictors": predictors,
    "predictors_I": predictors,
    "groups": np.arange(n_groups)
}

def lkj(n, eta, size: TensorVariable,) -> TensorVariable:
    return pm.LKJCorr.dist(n=n, eta=eta, size=size)

with pm.Model(coords=coords) as m:
    
    corr_packed = pm.CustomDist("corr_packed", n_predictors, 1, dist=lkj)    
    sigma = pm.HalfCauchy("sigma", 1, dims=("groups", "predictors"))
    triu_idx = pt.triu_indices(n_predictors, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((n_predictors, n_predictors))[triu_idx], corr_packed)
    corr = pm.Deterministic("corr", pt.eye(n_predictors) + corr_upper + corr_upper.T, dims=("predictors", "predictors_I"))
    
    # n_groups x n_predictors x n_predictors
    sigma_diag = pt.stack([pt.eye(n_predictors) * sigma[i] for i in range(n_groups)])
    cov = pm.Deterministic("cov", 
                           pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] \
                               for i in range(n_groups)]), 
                           dims=("groups", "predictors", "predictors_I"))

    # batched dim on the far left
    # cov[group]: N x n_predictors x n_predictors (n_groups disappear)
    # obs: N x n_predictors (n_groups disappear)
    likelihood = pm.MvNormal("likelihood", mu=0, cov=cov[group], observed=obs, dims=("obs_id", "predictors"))

with m:
    idata_prior = pm.sample_prior_predictive()
    idata = pm.sample(tune=1000, draws=1000, nuts_sampler="numpyro")    

And now getting this error:

{
	"name": "ImportError",
	"message": "/home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpl0m7osww/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 72
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.",
	"stack": "---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1230 location = dlimport_workdir(self.dirname)
-> 1231 module = lnk.compile_cmodule(location)
   1232 name = module.__file__

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpl0m7osww/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_

During handling of the above exception, another exception occurred:

ImportError                               Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 4 line 3
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=30'>31</a> with m:
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=31'>32</a>     idata_prior = pm.sample_prior_predictive()
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#W1sdnNjb2RlLXJlbW90ZQ%3D%3D?line=32'>33</a>     idata = pm.sample(tune=1000, draws=1000, nuts_sampler=\"numpyro\")    

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:689, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    686         auto_nuts_init = False
    688 initial_points = None
--> 689 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    691 if nuts_sampler != \"pymc\":
    692     if not isinstance(step, NUTS):

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:239, in assign_step_methods(model, step, methods, step_kwargs)
    231         selected = max(
    232             methods_list,
    233             key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence(  # type: ignore
    234                 var, has_gradient
    235             ),
    236         )
    237         selected_steps.setdefault(selected, []).append(var)
--> 239 return instantiate_steppers(model, steps, selected_steps, step_kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:140, in instantiate_steppers(model, steps, selected_steps, step_kwargs)
    138         args = step_kwargs.get(name, {})
    139         used_keys.add(name)
--> 140         step = step_class(vars=vars, model=model, **args)
    141         steps.append(step)
    143 unused_args = set(step_kwargs).difference(used_keys)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/nuts.py:180, in NUTS.__init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    122 def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs):
    123     r\"\"\"Set up the No-U-Turn sampler.
    124 
    125     Parameters
   (...)
    178     `pm.sample` to the desired number of tuning steps.
    179     \"\"\"
--> 180     super().__init__(vars, **kwargs)
    182     self.max_treedepth = max_treedepth
    183     self.early_max_treedepth = early_max_treedepth

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/base_hmc.py:109, in BaseHMC.__init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **pytensor_kwargs)
    107 else:
    108     vars = get_value_vars_from_user_vars(vars, self._model)
--> 109 super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
    111 self.adapt_step_size = adapt_step_size
    112 self.Emax = Emax

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/arraystep.py:164, in GradientSharedStep.__init__(self, vars, model, blocked, dtype, logp_dlogp_func, **pytensor_kwargs)
    161 model = modelcontext(model)
    163 if logp_dlogp_func is None:
--> 164     func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
    165 else:
    166     func = logp_dlogp_func

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:618, in Model.logp_dlogp_function(self, grad_vars, tempered, **kwargs)
    612 ip = self.initial_point(0)
    613 extra_vars_and_values = {
    614     var: ip[var.name]
    615     for var in self.value_vars
    616     if var in input_vars and var not in grad_vars
    617 }
--> 618 return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:350, in ValueGradFunction.__init__(self, costs, grad_vars, extra_vars_and_values, dtype, casting, compute_grads, **kwargs)
    346     outputs = [cost]
    348 inputs = grad_vars
--> 350 self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/pytensorf.py:991, in compile_pymc(inputs, outputs, random_seed, mode, **kwargs)
    989 opt_qry = mode.provided_optimizer.including(\"random_make_inplace\", check_parameter_opt)
    990 mode = Mode(linker=mode.linker, optimizer=opt_qry)
--> 991 pytensor_function = pytensor.function(
    992     inputs,
    993     outputs,
    994     updates={**rng_updates, **kwargs.pop(\"updates\", {})},
    995     mode=mode,
    996     **kwargs,
    997 )
    998 return pytensor_function

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/__init__.py:315, in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    309     fn = orig_function(
    310         inputs, outputs, mode=mode, accept_inplace=accept_inplace, name=name
    311     )
    312 else:
    313     # note: pfunc will also call orig_function -- orig_function is
    314     #      a choke point that all compilation must pass through
--> 315     fn = pfunc(
    316         params=inputs,
    317         outputs=outputs,
    318         mode=mode,
    319         updates=updates,
    320         givens=givens,
    321         no_default_updates=no_default_updates,
    322         accept_inplace=accept_inplace,
    323         name=name,
    324         rebuild_strict=rebuild_strict,
    325         allow_input_downcast=allow_input_downcast,
    326         on_unused_input=on_unused_input,
    327         profile=profile,
    328         output_keys=output_keys,
    329     )
    330 return fn

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/pfunc.py:469, in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys, fgraph)
    455     profile = ProfileStats(message=profile)
    457 inputs, cloned_outputs = construct_pfunc_ins_and_outs(
    458     params,
    459     outputs,
   (...)
    466     fgraph=fgraph,
    467 )
--> 469 return orig_function(
    470     inputs,
    471     cloned_outputs,
    472     mode,
    473     accept_inplace=accept_inplace,
    474     name=name,
    475     profile=profile,
    476     on_unused_input=on_unused_input,
    477     output_keys=output_keys,
    478     fgraph=fgraph,
    479 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1762, in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys, fgraph)
   1750     m = Maker(
   1751         inputs,
   1752         outputs,
   (...)
   1759         fgraph=fgraph,
   1760     )
   1761     with config.change_flags(compute_test_value=\"off\"):
-> 1762         fn = m.create(defaults)
   1763 finally:
   1764     if profile and fn:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1654, in FunctionMaker.create(self, input_storage, storage_map)
   1651 start_import_time = pytensor.link.c.cmodule.import_time
   1653 with config.change_flags(traceback__limit=config.traceback__compile_limit):
-> 1654     _fn, _i, _o = self.linker.make_thunk(
   1655         input_storage=input_storage_lists, storage_map=storage_map
   1656     )
   1658 end_linker = time.perf_counter()
   1660 linker_time = end_linker - start_linker

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/basic.py:245, in LocalLinker.make_thunk(self, input_storage, output_storage, storage_map, **kwargs)
    238 def make_thunk(
    239     self,
    240     input_storage: Optional[\"InputStorageType\"] = None,
   (...)
    243     **kwargs,
    244 ) -> tuple[\"BasicThunkType\", \"InputStorageType\", \"OutputStorageType\"]:
--> 245     return self.make_all(
    246         input_storage=input_storage,
    247         output_storage=output_storage,
    248         storage_map=storage_map,
    249     )[:3]

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1244, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1242             thunks[-1].lazy = False
   1243     except Exception:
-> 1244         raise_with_op(fgraph, node)
   1246 t1 = time.perf_counter()
   1248 if self.profile:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    526     warnings.warn(
    527         f\"{exc_type} error does not allow us to add an extra error message\"
    528     )
    529     # Some exception need extra parameter in inputs. So forget the
    530     # extra long error message in that case.
--> 531 raise exc_value.with_traceback(exc_trace)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1230 thunk_start = time.perf_counter()
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start
   1238 if not hasattr(thunks[-1], \"lazy\"):
   1239     # We don't want all ops maker to think about lazy Ops.
   1240     # So if they didn't specify that its lazy or not, it isn't.
   1241     # If this member isn't present, it will crash later.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    115 self.prepare_node(
    116     node, storage_map=storage_map, compute_map=compute_map, impl=\"c\"
    117 )
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.
    122     if impl == \"c\":

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     82         print(f\"Disabling C code for {self} due to unsupported float16\")
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs
     89 @is_cthunk_wrapper_type
     90 def rval():

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1174 \"\"\"Compile this linker's `self.fgraph` and return a function that performs the computations.
   1175 
   1176 The return values can be used as follows:
   (...)
   1206 
   1207 \"\"\"
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
   1214 res.nodes = self.node_order

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1127 input_storage = tuple(input_storage)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1651     if cache is None:
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans
   1656 # List of indices that should be ignored when passing the arguments
   1657 # (basically, everything that the previous call to uniq eliminated)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1229 try:
   1230     location = dlimport_workdir(self.dirname)
-> 1231     module = lnk.compile_cmodule(location)
   1232     name = module.__file__
   1233     assert name.startswith(location)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1550 try:
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:
   1562     e.args += (str(self.fgraph),)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2650     pass
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    330 with warnings.catch_warnings():
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()
    334 import_time += t1 - t0

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpl0m7osww/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 72
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node."
}

I can try to downgrade pymc to another version as a last resort after everything else fails

That by itself is not a limitation if the LKJ also has a batch dimension (groups in your case)

I’ve tried doing this with LKJCholeskyCov before, but it didn’t work for me. What am I doing wrong here?

def lkj(n, eta, size: TensorVariable,) -> TensorVariable:
    return pm.LKJCorr.dist(n=n, eta=eta, size=size)

with pm.Model(coords=coords) as m:
     
    sd_dist = pm.HalfCauchy.dist(1, shape=(n_groups, n_predictors))
    chol, corr, sigmas = pm.LKJCholeskyCov("L", eta=1, n=n_predictors, sd_dist=sd_dist)
    # chol, corr, sigmas = pm.LKJCholeskyCov("L", eta=1, n=n_predictors, sd_dist=sd_dist, shape=(n_groups,))
    likelihood = pm.MvNormal("likelihood", mu=0, chol=chol[group], observed=obs, dims=("obs_id", "predictors"))

with m:
    idata_prior = pm.sample_prior_predictive()
    # idata = pm.sample(tune=1000, draws=1000, nuts_sampler="numpyro")
{
	"name": "ValueError",
	"message": "Packed triangular is not one dimensional.",
	"stack": "---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 6 line 1
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#X61sdnNjb2RlLXJlbW90ZQ%3D%3D?line=10'>11</a> with pm.Model(coords=coords) as m:
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#X61sdnNjb2RlLXJlbW90ZQ%3D%3D?line=12'>13</a>     sd_dist = pm.HalfCauchy.dist(1, shape=(n_groups, n_predictors))
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#X61sdnNjb2RlLXJlbW90ZQ%3D%3D?line=13'>14</a>     chol, corr, sigmas = pm.LKJCholeskyCov(\"L\", eta=1, n=n_predictors, sd_dist=sd_dist)
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#X61sdnNjb2RlLXJlbW90ZQ%3D%3D?line=14'>15</a>     likelihood = pm.MvNormal(\"likelihood\", mu=0, chol=chol[group], observed=obs, dims=(\"obs_id\", \"predictors\"))
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#X61sdnNjb2RlLXJlbW90ZQ%3D%3D?line=16'>17</a> with m:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/distributions/multivariate.py:1434, in LKJCholeskyCov.__new__(cls, name, eta, n, sd_dist, compute_corr, store_in_trace, **kwargs)
   1432     return packed_chol
   1433 else:
-> 1434     chol, corr, stds = cls.helper_deterministics(n, packed_chol)
   1435     if store_in_trace:
   1436         corr = pm.Deterministic(f\"{name}_corr\", corr)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/distributions/multivariate.py:1451, in LKJCholeskyCov.helper_deterministics(cls, n, packed_chol)
   1449 @classmethod
   1450 def helper_deterministics(cls, n, packed_chol):
-> 1451     chol = pm.expand_packed_triangular(n, packed_chol, lower=True)
   1452     # compute covariance matrix
   1453     cov = pt.dot(chol, chol.T)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/math.py:445, in expand_packed_triangular(n, packed, lower, diagonal_only)
    422 r\"\"\"Convert a packed triangular matrix into a two dimensional array.
    423 
    424 Triangular matrices can be stored with better space efficiency by
   (...)
    442     If true, return only the diagonal of the matrix.
    443 \"\"\"
    444 if packed.ndim != 1:
--> 445     raise ValueError(\"Packed triangular is not one dimensional.\")
    446 if not isinstance(n, int):
    447     raise TypeError(\"n must be an integer\")

ValueError: Packed triangular is not one dimensional."
}

My bad I didn’t know it is still restricted to the vector case… Increase support for batched multivariate distributions · Issue #5383 · pymc-devs/pymc · GitHub

How many groups do you have? If it’s a small group you could create n variables and stack them together.

Anyway, here is a quick fix for the time being:

import pymc as pm
from pymc.distributions.transforms import Interval


class MultivariateIntervalTransform(Interval):
    name = "interval"
        
    def log_jac_det(self, *args):
        return super().log_jac_det(*args).sum(-1)
    
tr = MultivariateIntervalTransform(-1.0, 1.0)

with pm.Model() as m:
    x = pm.LKJCorr("x", n=3, eta=1, transform=tr)
m.logp()  # Should not fail

Thanks for this. Its now letting me work with the LKJcorr, however I can’t seem to insert the covariance into MvNormal. From the link you sent, it looks like batched pm.MvNormal is not yet working?
In this post, you mentioned that pm.MvNormal accepts batched params.

From my attempt below, am I doing something wrong? Or is the batched MvNormal not yet working?

from pymc.distributions.transforms import Interval

class MultivariateIntervalTransform(Interval):
    name = "interval"
        
    def log_jac_det(self, *args):
        return super().log_jac_det(*args).sum(-1)
    
tr = MultivariateIntervalTransform(-1.0, 1.0)

with pm.Model(coords=coords) as m:
    
    corr_packed = pm.LKJCorr("x", n=n_groups, eta=1, transform=tr)
    sigma = pm.HalfCauchy("sigma", 1, dims=("groups", "predictors"))
    triu_idx = pt.triu_indices(n_predictors, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((n_predictors, n_predictors))[triu_idx], corr_packed)
    corr = pm.Deterministic("corr", pt.eye(n_predictors) + corr_upper + corr_upper.T, dims=("predictors", "predictors_I"))
    
    # n_groups x n_predictors x n_predictors
    sigma_diag = pt.stack([pt.eye(n_predictors) * sigma[i] for i in range(n_groups)])
    cov = pm.Deterministic("cov", 
                           pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] \
                               for i in range(n_groups)]), 
                           dims=("groups", "predictors", "predictors_I"))
    
    # batched dim on the far left
    # cov[group]: N x n_predictors x n_predictors (n_groups disappear)
    # obs: N x n_predictors (n_groups disappear)
    likelihood = pm.MvNormal("likelihood", mu=0, cov=cov[group], observed=obs, dims=("obs_id", "predictors"))
    
with m:
    idata_prior = pm.sample_prior_predictive()
    # idata = pm.sample(tune=1000, draws=1000, nuts_sampler="numpyro")
{
	"name": "IndexError",
	"message": "boolean index did not match indexed array along dimension 0; dimension is 2 but corresponding boolean dimension is 100
Apply node that caused the error: AdvancedSubtensor(cov, [ True Fal ... lse  True])
Toposort index: 22
Inputs types: [TensorType(float64, shape=(2, None, 7)), TensorType(bool, shape=(100,))]
Inputs shapes: [(2, 7, 7), (100,)]
Inputs strides: [(392, 56, 8), (1,)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[multivariate_normal_rv{1, (1, 2), floatX, True}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FEA9C0FDC40>), [53], 11, [[0 0 0 0 ... 0 0 0 0]], AdvancedSubtensor.0)]]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3048, in run_cell
    result = self._run_cell(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3103, in _run_cell
    result = runner(coro)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/async_helpers.py\", line 129, in _pseudo_sync_runner
    coro.send(None)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3308, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3490, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File \"/tmp/ipykernel_8875/1979937421.py\", line 26, in <module>
    likelihood = pm.MvNormal(\"likelihood\", mu=0, cov=cov[group==0], observed=obs[group==0])

HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.",
	"stack": "---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
    968 try:
    969     outputs = (
--> 970         self.vm()
    971         if output_subset is None
    972         else self.vm(output_subset=output_subset)
    973     )
    974 except Exception:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/op.py:518, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
    516 @is_thunk_type
    517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 518     r = p(n, [x[0] for x in i], o)
    519     for o in node.outputs:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/subtensor.py:2678, in AdvancedSubtensor.perform(self, node, inputs, out_)
   2677 (out,) = out_
-> 2678 check_advanced_indexing_dimensions(inputs[0], inputs[1:])
   2679 rval = inputs[0].__getitem__(tuple(inputs[1:]))

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/subtensor.py:2580, in check_advanced_indexing_dimensions(input, idx_list)
   2579     if index.shape[i] != input.shape[dim_seen + i]:
-> 2580         raise IndexError(
   2581             \"boolean index did not match indexed array \"
   2582             f\"along dimension {int(dim_seen + i)}; dimension is \"
   2583             f\"{int(input.shape[dim_seen + i])} but \"
   2584             f\"corresponding boolean dimension is {index.shape[i]}\"
   2585         )
   2586 dim_seen += index.ndim

IndexError: boolean index did not match indexed array along dimension 0; dimension is 2 but corresponding boolean dimension is 100

During handling of the above exception, another exception occurred:

IndexError                                Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 7 line 2
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=25'>26</a>     likelihood = pm.MvNormal(\"likelihood\", mu=0, cov=cov[group==0], observed=obs[group==0])
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=27'>28</a> with m:
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=28'>29</a>     idata_prior = pm.sample_prior_predictive()
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=29'>30</a>     # idata = pm.sample(tune=1000, draws=1000, nuts_sampler=\"numpyro\")

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/forward.py:425, in sample_prior_predictive(samples, model, var_names, random_seed, return_inferencedata, idata_kwargs, compile_kwargs)
    423 # All model variables have a name, but mypy does not know this
    424 _log.info(f\"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}\")  # type: ignore
--> 425 values = zip(*(sampler_fn() for i in range(samples)))
    427 data = {k: np.stack(v) for k, v in zip(names, values)}
    428 if data is None:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/forward.py:425, in <genexpr>(.0)
    423 # All model variables have a name, but mypy does not know this
    424 _log.info(f\"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}\")  # type: ignore
--> 425 values = zip(*(sampler_fn() for i in range(samples)))
    427 data = {k: np.stack(v) for k, v in zip(names, values)}
    428 if data is None:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:983, in Function.__call__(self, *args, **kwargs)
    981     if hasattr(self.vm, \"thunks\"):
    982         thunk = self.vm.thunks[self.vm.position_of_error]
--> 983     raise_with_op(
    984         self.maker.fgraph,
    985         node=self.vm.nodes[self.vm.position_of_error],
    986         thunk=thunk,
    987         storage_map=getattr(self.vm, \"storage_map\", None),
    988     )
    989 else:
    990     # old-style linkers raise their own exceptions
    991     raise

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    526     warnings.warn(
    527         f\"{exc_type} error does not allow us to add an extra error message\"
    528     )
    529     # Some exception need extra parameter in inputs. So forget the
    530     # extra long error message in that case.
--> 531 raise exc_value.with_traceback(exc_trace)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
    967 t0_fn = time.perf_counter()
    968 try:
    969     outputs = (
--> 970         self.vm()
    971         if output_subset is None
    972         else self.vm(output_subset=output_subset)
    973     )
    974 except Exception:
    975     restore_defaults()

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/op.py:518, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
    516 @is_thunk_type
    517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 518     r = p(n, [x[0] for x in i], o)
    519     for o in node.outputs:
    520         compute_map[o][0] = True

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/subtensor.py:2678, in AdvancedSubtensor.perform(self, node, inputs, out_)
   2676 def perform(self, node, inputs, out_):
   2677     (out,) = out_
-> 2678     check_advanced_indexing_dimensions(inputs[0], inputs[1:])
   2679     rval = inputs[0].__getitem__(tuple(inputs[1:]))
   2680     # When there are no arrays, we are not actually doing advanced
   2681     # indexing, so __getitem__ will not return a copy.
   2682     # Since no view_map is set, we need to copy the returned value

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/subtensor.py:2580, in check_advanced_indexing_dimensions(input, idx_list)
   2578     for i in range(index.ndim):
   2579         if index.shape[i] != input.shape[dim_seen + i]:
-> 2580             raise IndexError(
   2581                 \"boolean index did not match indexed array \"
   2582                 f\"along dimension {int(dim_seen + i)}; dimension is \"
   2583                 f\"{int(input.shape[dim_seen + i])} but \"
   2584                 f\"corresponding boolean dimension is {index.shape[i]}\"
   2585             )
   2586     dim_seen += index.ndim
   2587 else:

IndexError: boolean index did not match indexed array along dimension 0; dimension is 2 but corresponding boolean dimension is 100
Apply node that caused the error: AdvancedSubtensor(cov, [ True Fal ... lse  True])
Toposort index: 22
Inputs types: [TensorType(float64, shape=(2, None, 7)), TensorType(bool, shape=(100,))]
Inputs shapes: [(2, 7, 7), (100,)]
Inputs strides: [(392, 56, 8), (1,)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[multivariate_normal_rv{1, (1, 2), floatX, True}(RandomGeneratorSharedVariable(<Generator(PCG64) at 0x7FEA9C0FDC40>), [53], 11, [[0 0 0 0 ... 0 0 0 0]], AdvancedSubtensor.0)]]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/ipykernel/zmqshell.py\", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3048, in run_cell
    result = self._run_cell(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3103, in _run_cell
    result = runner(coro)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/async_helpers.py\", line 129, in _pseudo_sync_runner
    coro.send(None)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3308, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3490, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/IPython/core/interactiveshell.py\", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File \"/tmp/ipykernel_8875/1979937421.py\", line 26, in <module>
    likelihood = pm.MvNormal(\"likelihood\", mu=0, cov=cov[group==0], observed=obs[group==0])

HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node."
}

Batch MvNormal should work. You can call .eval() on your intermediate variables to see if they are coming out with the shape you expect them to

Here is a test we have for batch params, maybe that helps? https://github.com/pymc-devs/pymc/blob/2e05854c44edbd080085258dd879d9d6769bf683/tests/distributions/test_multivariate.py#L338

I actually have a few groups… am working with a tricky dataset atm :thinking:, so I might look something like this:

with pm.Model(coords=coords) as m:

    ...

    corr_packed = pm.LKJCorr("x", n=K, eta=1, transform=tr)
    sigma = pm.HalfCauchy("sigma", 1, dims=("group1", "group2", ..., "groupJ", "K"))
    triu_idx = pt.triu_indices(K, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((K, K))[triu_idx], corr_packed)
    corr = pm.Deterministic("corr", pt.eye(K) + corr_upper + corr_upper.T, dims=("K", "K_I"))
    
    sigma_diag = pt.stack([pt.eye(n_predictors) * sigma[i] for i in range(K)])
    cov = pm.Deterministic("cov", 
                           pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] \
                               for i in range(K)]), 
                           dims=("group1", "group2", ..., "groupJ", "K", "K_I"))

    ...

running likelihood.eval() works, and it gives the (N x n_predictors) shape I’m after.

Strange when its within the model environment it doesn’t work.

I’ll have a look at the link for test for batch params sometime tomorrow and provide an update on how it goes.

What do you mean?

Within the model environment, sampling doesn’t work and I get the error described here:

Lkjcorr returning error - #9 by mcao

But outside of the model environment (not in with pm.Models()), I can call likelihood.eval() and it works just fine. Calling .eval() is only meant to be used outside of the model environment right? Or can it also be used within the model environment?

.eval() is just for debugging and can be used also inside a model / with model variables.

If you are only getting error with model variables I suspect you did something wrong with the dims?

I’ve just tried setting the dimensions explicitly using shapes to make it easier to follow. But I can’t see there being anything wrong with the dims/shapes. Does it look like there’s a misspecified dimension somewhere?

obs.shape = (100, 7)
group.shape = (100,)
corr_packed.eval().shape = (21,)
sigma.eval().shape = (2, 7)
corr_upper.eval().shape = (7, 7)
corr.eval().shape = (7, 7)
sigma_diag.eval().shape = (2, 7, 7)
cov.eval().shape = (2, 7, 7)
cov[group].eval().shape = (100, 7, 7)
chol.eval().shape = (2, 7, 7)
chol[group].eval().shape = (100, 7, 7)
likelihood.eval().shape = (100, 7)
from pymc.distributions.transforms import Interval

class MultivariateIntervalTransform(Interval):
    name = "interval"
        
    def log_jac_det(self, *args):
        return super().log_jac_det(*args).sum(-1)
    
tr = MultivariateIntervalTransform(-1.0, 1.0)

# n_groups = 2
# n_predictors = 7
# N = 100

with pm.Model() as m:
    
    corr_packed = pm.LKJCorr("x", n=7, eta=1, transform=tr) 
    sigma = pm.HalfCauchy("sigma", 1, shape=(2, 7))         
    triu_idx = pt.triu_indices(7, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((7, 7))[triu_idx], corr_packed)
    corr = pt.eye(7) + corr_upper + corr_upper.T
    sigma_diag = pt.stack([pt.eye(7) * sigma[i] for i in range(2)])
    cov = pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] for i in range(2)])
    chol = pt.stack([pt.linalg.cholesky(cov[i]) for i in range(2)])
    likelihood = pm.MvNormal("likelihood", mu=0, cov=cov[group], observed=obs, shape=(100, 7))
    
with m:
    idata = pm.sample(tune=1000, draws=1000, nuts_sampler="numpyro")
{
	"name": "ImportError",
	"message": "/home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 73
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.",
	"stack": "---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1230 location = dlimport_workdir(self.dirname)
-> 1231 module = lnk.compile_cmodule(location)
   1232 name = module.__file__

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_

During handling of the above exception, another exception occurred:

ImportError                               Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 10 line 2
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=24'>25</a>     likelihood = pm.MvNormal(\"likelihood\", mu=0, cov=cov[group], observed=obs, shape=(100, 7))
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=26'>27</a> with m:
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=27'>28</a>     idata = pm.sample(tune=1000, draws=1000, nuts_sampler=\"numpyro\")

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:689, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    686         auto_nuts_init = False
    688 initial_points = None
--> 689 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    691 if nuts_sampler != \"pymc\":
    692     if not isinstance(step, NUTS):

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:239, in assign_step_methods(model, step, methods, step_kwargs)
    231         selected = max(
    232             methods_list,
    233             key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence(  # type: ignore
    234                 var, has_gradient
    235             ),
    236         )
    237         selected_steps.setdefault(selected, []).append(var)
--> 239 return instantiate_steppers(model, steps, selected_steps, step_kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:140, in instantiate_steppers(model, steps, selected_steps, step_kwargs)
    138         args = step_kwargs.get(name, {})
    139         used_keys.add(name)
--> 140         step = step_class(vars=vars, model=model, **args)
    141         steps.append(step)
    143 unused_args = set(step_kwargs).difference(used_keys)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/nuts.py:180, in NUTS.__init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    122 def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs):
    123     r\"\"\"Set up the No-U-Turn sampler.
    124 
    125     Parameters
   (...)
    178     `pm.sample` to the desired number of tuning steps.
    179     \"\"\"
--> 180     super().__init__(vars, **kwargs)
    182     self.max_treedepth = max_treedepth
    183     self.early_max_treedepth = early_max_treedepth

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/base_hmc.py:109, in BaseHMC.__init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **pytensor_kwargs)
    107 else:
    108     vars = get_value_vars_from_user_vars(vars, self._model)
--> 109 super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
    111 self.adapt_step_size = adapt_step_size
    112 self.Emax = Emax

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/arraystep.py:164, in GradientSharedStep.__init__(self, vars, model, blocked, dtype, logp_dlogp_func, **pytensor_kwargs)
    161 model = modelcontext(model)
    163 if logp_dlogp_func is None:
--> 164     func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
    165 else:
    166     func = logp_dlogp_func

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:618, in Model.logp_dlogp_function(self, grad_vars, tempered, **kwargs)
    612 ip = self.initial_point(0)
    613 extra_vars_and_values = {
    614     var: ip[var.name]
    615     for var in self.value_vars
    616     if var in input_vars and var not in grad_vars
    617 }
--> 618 return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:350, in ValueGradFunction.__init__(self, costs, grad_vars, extra_vars_and_values, dtype, casting, compute_grads, **kwargs)
    346     outputs = [cost]
    348 inputs = grad_vars
--> 350 self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/pytensorf.py:991, in compile_pymc(inputs, outputs, random_seed, mode, **kwargs)
    989 opt_qry = mode.provided_optimizer.including(\"random_make_inplace\", check_parameter_opt)
    990 mode = Mode(linker=mode.linker, optimizer=opt_qry)
--> 991 pytensor_function = pytensor.function(
    992     inputs,
    993     outputs,
    994     updates={**rng_updates, **kwargs.pop(\"updates\", {})},
    995     mode=mode,
    996     **kwargs,
    997 )
    998 return pytensor_function

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/__init__.py:315, in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    309     fn = orig_function(
    310         inputs, outputs, mode=mode, accept_inplace=accept_inplace, name=name
    311     )
    312 else:
    313     # note: pfunc will also call orig_function -- orig_function is
    314     #      a choke point that all compilation must pass through
--> 315     fn = pfunc(
    316         params=inputs,
    317         outputs=outputs,
    318         mode=mode,
    319         updates=updates,
    320         givens=givens,
    321         no_default_updates=no_default_updates,
    322         accept_inplace=accept_inplace,
    323         name=name,
    324         rebuild_strict=rebuild_strict,
    325         allow_input_downcast=allow_input_downcast,
    326         on_unused_input=on_unused_input,
    327         profile=profile,
    328         output_keys=output_keys,
    329     )
    330 return fn

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/pfunc.py:469, in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys, fgraph)
    455     profile = ProfileStats(message=profile)
    457 inputs, cloned_outputs = construct_pfunc_ins_and_outs(
    458     params,
    459     outputs,
   (...)
    466     fgraph=fgraph,
    467 )
--> 469 return orig_function(
    470     inputs,
    471     cloned_outputs,
    472     mode,
    473     accept_inplace=accept_inplace,
    474     name=name,
    475     profile=profile,
    476     on_unused_input=on_unused_input,
    477     output_keys=output_keys,
    478     fgraph=fgraph,
    479 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1762, in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys, fgraph)
   1750     m = Maker(
   1751         inputs,
   1752         outputs,
   (...)
   1759         fgraph=fgraph,
   1760     )
   1761     with config.change_flags(compute_test_value=\"off\"):
-> 1762         fn = m.create(defaults)
   1763 finally:
   1764     if profile and fn:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1654, in FunctionMaker.create(self, input_storage, storage_map)
   1651 start_import_time = pytensor.link.c.cmodule.import_time
   1653 with config.change_flags(traceback__limit=config.traceback__compile_limit):
-> 1654     _fn, _i, _o = self.linker.make_thunk(
   1655         input_storage=input_storage_lists, storage_map=storage_map
   1656     )
   1658 end_linker = time.perf_counter()
   1660 linker_time = end_linker - start_linker

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/basic.py:245, in LocalLinker.make_thunk(self, input_storage, output_storage, storage_map, **kwargs)
    238 def make_thunk(
    239     self,
    240     input_storage: Optional[\"InputStorageType\"] = None,
   (...)
    243     **kwargs,
    244 ) -> tuple[\"BasicThunkType\", \"InputStorageType\", \"OutputStorageType\"]:
--> 245     return self.make_all(
    246         input_storage=input_storage,
    247         output_storage=output_storage,
    248         storage_map=storage_map,
    249     )[:3]

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1244, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1242             thunks[-1].lazy = False
   1243     except Exception:
-> 1244         raise_with_op(fgraph, node)
   1246 t1 = time.perf_counter()
   1248 if self.profile:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    526     warnings.warn(
    527         f\"{exc_type} error does not allow us to add an extra error message\"
    528     )
    529     # Some exception need extra parameter in inputs. So forget the
    530     # extra long error message in that case.
--> 531 raise exc_value.with_traceback(exc_trace)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1230 thunk_start = time.perf_counter()
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start
   1238 if not hasattr(thunks[-1], \"lazy\"):
   1239     # We don't want all ops maker to think about lazy Ops.
   1240     # So if they didn't specify that its lazy or not, it isn't.
   1241     # If this member isn't present, it will crash later.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    115 self.prepare_node(
    116     node, storage_map=storage_map, compute_map=compute_map, impl=\"c\"
    117 )
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.
    122     if impl == \"c\":

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     82         print(f\"Disabling C code for {self} due to unsupported float16\")
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs
     89 @is_cthunk_wrapper_type
     90 def rval():

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1174 \"\"\"Compile this linker's `self.fgraph` and return a function that performs the computations.
   1175 
   1176 The return values can be used as follows:
   (...)
   1206 
   1207 \"\"\"
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
   1214 res.nodes = self.node_order

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1127 input_storage = tuple(input_storage)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1651     if cache is None:
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans
   1656 # List of indices that should be ignored when passing the arguments
   1657 # (basically, everything that the previous call to uniq eliminated)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1229 try:
   1230     location = dlimport_workdir(self.dirname)
-> 1231     module = lnk.compile_cmodule(location)
   1232     name = module.__file__
   1233     assert name.startswith(location)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1550 try:
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:
   1562     e.args += (str(self.fgraph),)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2650     pass
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    330 with warnings.catch_warnings():
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()
    334 import_time += t1 - t0

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 73
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node."
}

You might need to clear pytensor cache, with pytensor-cache purge (in your terminal, with the PyMC environment activated). Installing the latest version of PyTensor should avoid the problem from resurfacing

Were you pip installing pymc/pytensor by any chance?

Yeah I need to use pip in my work environment because installing packages via conda is extremely slow