Lkjcorr returning error

I’ve just tried setting the dimensions explicitly using shapes to make it easier to follow. But I can’t see there being anything wrong with the dims/shapes. Does it look like there’s a misspecified dimension somewhere?

obs.shape = (100, 7)
group.shape = (100,)
corr_packed.eval().shape = (21,)
sigma.eval().shape = (2, 7)
corr_upper.eval().shape = (7, 7)
corr.eval().shape = (7, 7)
sigma_diag.eval().shape = (2, 7, 7)
cov.eval().shape = (2, 7, 7)
cov[group].eval().shape = (100, 7, 7)
chol.eval().shape = (2, 7, 7)
chol[group].eval().shape = (100, 7, 7)
likelihood.eval().shape = (100, 7)
from pymc.distributions.transforms import Interval

class MultivariateIntervalTransform(Interval):
    name = "interval"
        
    def log_jac_det(self, *args):
        return super().log_jac_det(*args).sum(-1)
    
tr = MultivariateIntervalTransform(-1.0, 1.0)

# n_groups = 2
# n_predictors = 7
# N = 100

with pm.Model() as m:
    
    corr_packed = pm.LKJCorr("x", n=7, eta=1, transform=tr) 
    sigma = pm.HalfCauchy("sigma", 1, shape=(2, 7))         
    triu_idx = pt.triu_indices(7, k=1)
    corr_upper = pt.set_subtensor(pt.zeros((7, 7))[triu_idx], corr_packed)
    corr = pt.eye(7) + corr_upper + corr_upper.T
    sigma_diag = pt.stack([pt.eye(7) * sigma[i] for i in range(2)])
    cov = pt.stack([sigma_diag[i] @ corr @ sigma_diag[i] for i in range(2)])
    chol = pt.stack([pt.linalg.cholesky(cov[i]) for i in range(2)])
    likelihood = pm.MvNormal("likelihood", mu=0, cov=cov[group], observed=obs, shape=(100, 7))
    
with m:
    idata = pm.sample(tune=1000, draws=1000, nuts_sampler="numpyro")
{
	"name": "ImportError",
	"message": "/home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 73
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.",
	"stack": "---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1230 location = dlimport_workdir(self.dirname)
-> 1231 module = lnk.compile_cmodule(location)
   1232 name = module.__file__

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_

During handling of the above exception, another exception occurred:

ImportError                               Traceback (most recent call last)
/home/cao/projects/pymc_experiments/test.ipynb Cell 10 line 2
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=24'>25</a>     likelihood = pm.MvNormal(\"likelihood\", mu=0, cov=cov[group], observed=obs, shape=(100, 7))
     <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=26'>27</a> with m:
---> <a href='vscode-notebook-cell://wsl%2Bubuntu-20.04/home/cao/projects/pymc_experiments/test.ipynb#Y101sdnNjb2RlLXJlbW90ZQ%3D%3D?line=27'>28</a>     idata = pm.sample(tune=1000, draws=1000, nuts_sampler=\"numpyro\")

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:689, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    686         auto_nuts_init = False
    688 initial_points = None
--> 689 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    691 if nuts_sampler != \"pymc\":
    692     if not isinstance(step, NUTS):

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:239, in assign_step_methods(model, step, methods, step_kwargs)
    231         selected = max(
    232             methods_list,
    233             key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence(  # type: ignore
    234                 var, has_gradient
    235             ),
    236         )
    237         selected_steps.setdefault(selected, []).append(var)
--> 239 return instantiate_steppers(model, steps, selected_steps, step_kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/sampling/mcmc.py:140, in instantiate_steppers(model, steps, selected_steps, step_kwargs)
    138         args = step_kwargs.get(name, {})
    139         used_keys.add(name)
--> 140         step = step_class(vars=vars, model=model, **args)
    141         steps.append(step)
    143 unused_args = set(step_kwargs).difference(used_keys)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/nuts.py:180, in NUTS.__init__(self, vars, max_treedepth, early_max_treedepth, **kwargs)
    122 def __init__(self, vars=None, max_treedepth=10, early_max_treedepth=8, **kwargs):
    123     r\"\"\"Set up the No-U-Turn sampler.
    124 
    125     Parameters
   (...)
    178     `pm.sample` to the desired number of tuning steps.
    179     \"\"\"
--> 180     super().__init__(vars, **kwargs)
    182     self.max_treedepth = max_treedepth
    183     self.early_max_treedepth = early_max_treedepth

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/hmc/base_hmc.py:109, in BaseHMC.__init__(self, vars, scaling, step_scale, is_cov, model, blocked, potential, dtype, Emax, target_accept, gamma, k, t0, adapt_step_size, step_rand, **pytensor_kwargs)
    107 else:
    108     vars = get_value_vars_from_user_vars(vars, self._model)
--> 109 super().__init__(vars, blocked=blocked, model=self._model, dtype=dtype, **pytensor_kwargs)
    111 self.adapt_step_size = adapt_step_size
    112 self.Emax = Emax

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/step_methods/arraystep.py:164, in GradientSharedStep.__init__(self, vars, model, blocked, dtype, logp_dlogp_func, **pytensor_kwargs)
    161 model = modelcontext(model)
    163 if logp_dlogp_func is None:
--> 164     func = model.logp_dlogp_function(vars, dtype=dtype, **pytensor_kwargs)
    165 else:
    166     func = logp_dlogp_func

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:618, in Model.logp_dlogp_function(self, grad_vars, tempered, **kwargs)
    612 ip = self.initial_point(0)
    613 extra_vars_and_values = {
    614     var: ip[var.name]
    615     for var in self.value_vars
    616     if var in input_vars and var not in grad_vars
    617 }
--> 618 return ValueGradFunction(costs, grad_vars, extra_vars_and_values, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/model/core.py:350, in ValueGradFunction.__init__(self, costs, grad_vars, extra_vars_and_values, dtype, casting, compute_grads, **kwargs)
    346     outputs = [cost]
    348 inputs = grad_vars
--> 350 self._pytensor_function = compile_pymc(inputs, outputs, givens=givens, **kwargs)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pymc/pytensorf.py:991, in compile_pymc(inputs, outputs, random_seed, mode, **kwargs)
    989 opt_qry = mode.provided_optimizer.including(\"random_make_inplace\", check_parameter_opt)
    990 mode = Mode(linker=mode.linker, optimizer=opt_qry)
--> 991 pytensor_function = pytensor.function(
    992     inputs,
    993     outputs,
    994     updates={**rng_updates, **kwargs.pop(\"updates\", {})},
    995     mode=mode,
    996     **kwargs,
    997 )
    998 return pytensor_function

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/__init__.py:315, in function(inputs, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input)
    309     fn = orig_function(
    310         inputs, outputs, mode=mode, accept_inplace=accept_inplace, name=name
    311     )
    312 else:
    313     # note: pfunc will also call orig_function -- orig_function is
    314     #      a choke point that all compilation must pass through
--> 315     fn = pfunc(
    316         params=inputs,
    317         outputs=outputs,
    318         mode=mode,
    319         updates=updates,
    320         givens=givens,
    321         no_default_updates=no_default_updates,
    322         accept_inplace=accept_inplace,
    323         name=name,
    324         rebuild_strict=rebuild_strict,
    325         allow_input_downcast=allow_input_downcast,
    326         on_unused_input=on_unused_input,
    327         profile=profile,
    328         output_keys=output_keys,
    329     )
    330 return fn

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/pfunc.py:469, in pfunc(params, outputs, mode, updates, givens, no_default_updates, accept_inplace, name, rebuild_strict, allow_input_downcast, profile, on_unused_input, output_keys, fgraph)
    455     profile = ProfileStats(message=profile)
    457 inputs, cloned_outputs = construct_pfunc_ins_and_outs(
    458     params,
    459     outputs,
   (...)
    466     fgraph=fgraph,
    467 )
--> 469 return orig_function(
    470     inputs,
    471     cloned_outputs,
    472     mode,
    473     accept_inplace=accept_inplace,
    474     name=name,
    475     profile=profile,
    476     on_unused_input=on_unused_input,
    477     output_keys=output_keys,
    478     fgraph=fgraph,
    479 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1762, in orig_function(inputs, outputs, mode, accept_inplace, name, profile, on_unused_input, output_keys, fgraph)
   1750     m = Maker(
   1751         inputs,
   1752         outputs,
   (...)
   1759         fgraph=fgraph,
   1760     )
   1761     with config.change_flags(compute_test_value=\"off\"):
-> 1762         fn = m.create(defaults)
   1763 finally:
   1764     if profile and fn:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/compile/function/types.py:1654, in FunctionMaker.create(self, input_storage, storage_map)
   1651 start_import_time = pytensor.link.c.cmodule.import_time
   1653 with config.change_flags(traceback__limit=config.traceback__compile_limit):
-> 1654     _fn, _i, _o = self.linker.make_thunk(
   1655         input_storage=input_storage_lists, storage_map=storage_map
   1656     )
   1658 end_linker = time.perf_counter()
   1660 linker_time = end_linker - start_linker

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/basic.py:245, in LocalLinker.make_thunk(self, input_storage, output_storage, storage_map, **kwargs)
    238 def make_thunk(
    239     self,
    240     input_storage: Optional[\"InputStorageType\"] = None,
   (...)
    243     **kwargs,
    244 ) -> tuple[\"BasicThunkType\", \"InputStorageType\", \"OutputStorageType\"]:
--> 245     return self.make_all(
    246         input_storage=input_storage,
    247         output_storage=output_storage,
    248         storage_map=storage_map,
    249     )[:3]

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1244, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1242             thunks[-1].lazy = False
   1243     except Exception:
-> 1244         raise_with_op(fgraph, node)
   1246 t1 = time.perf_counter()
   1248 if self.profile:

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
    526     warnings.warn(
    527         f\"{exc_type} error does not allow us to add an extra error message\"
    528     )
    529     # Some exception need extra parameter in inputs. So forget the
    530     # extra long error message in that case.
--> 531 raise exc_value.with_traceback(exc_trace)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/vm.py:1235, in VMLinker.make_all(self, profiler, input_storage, output_storage, storage_map)
   1230 thunk_start = time.perf_counter()
   1231 # no-recycling is done at each VM.__call__ So there is
   1232 # no need to cause duplicate c code by passing
   1233 # no_recycling here.
   1234 thunks.append(
-> 1235     node.op.make_thunk(node, storage_map, compute_map, [], impl=impl)
   1236 )
   1237 linker_make_thunk_time[node] = time.perf_counter() - thunk_start
   1238 if not hasattr(thunks[-1], \"lazy\"):
   1239     # We don't want all ops maker to think about lazy Ops.
   1240     # So if they didn't specify that its lazy or not, it isn't.
   1241     # If this member isn't present, it will crash later.

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:119, in COp.make_thunk(self, node, storage_map, compute_map, no_recycling, impl)
    115 self.prepare_node(
    116     node, storage_map=storage_map, compute_map=compute_map, impl=\"c\"
    117 )
    118 try:
--> 119     return self.make_c_thunk(node, storage_map, compute_map, no_recycling)
    120 except (NotImplementedError, MethodNotDefined):
    121     # We requested the c code, so don't catch the error.
    122     if impl == \"c\":

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/op.py:84, in COp.make_c_thunk(self, node, storage_map, compute_map, no_recycling)
     82         print(f\"Disabling C code for {self} due to unsupported float16\")
     83         raise NotImplementedError(\"float16\")
---> 84 outputs = cl.make_thunk(
     85     input_storage=node_input_storage, output_storage=node_output_storage
     86 )
     87 thunk, node_input_filters, node_output_filters = outputs
     89 @is_cthunk_wrapper_type
     90 def rval():

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1209, in CLinker.make_thunk(self, input_storage, output_storage, storage_map, cache, **kwargs)
   1174 \"\"\"Compile this linker's `self.fgraph` and return a function that performs the computations.
   1175 
   1176 The return values can be used as follows:
   (...)
   1206 
   1207 \"\"\"
   1208 init_tasks, tasks = self.get_init_tasks()
-> 1209 cthunk, module, in_storage, out_storage, error_storage = self.__compile__(
   1210     input_storage, output_storage, storage_map, cache
   1211 )
   1213 res = _CThunk(cthunk, init_tasks, tasks, error_storage, module)
   1214 res.nodes = self.node_order

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1129, in CLinker.__compile__(self, input_storage, output_storage, storage_map, cache)
   1127 input_storage = tuple(input_storage)
   1128 output_storage = tuple(output_storage)
-> 1129 thunk, module = self.cthunk_factory(
   1130     error_storage,
   1131     input_storage,
   1132     output_storage,
   1133     storage_map,
   1134     cache,
   1135 )
   1136 return (
   1137     thunk,
   1138     module,
   (...)
   1147     error_storage,
   1148 )

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1653, in CLinker.cthunk_factory(self, error_storage, in_storage, out_storage, storage_map, cache)
   1651     if cache is None:
   1652         cache = get_module_cache()
-> 1653     module = cache.module_from_key(key=key, lnk=self)
   1655 vars = self.inputs + self.outputs + self.orphans
   1656 # List of indices that should be ignored when passing the arguments
   1657 # (basically, everything that the previous call to uniq eliminated)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:1231, in ModuleCache.module_from_key(self, key, lnk)
   1229 try:
   1230     location = dlimport_workdir(self.dirname)
-> 1231     module = lnk.compile_cmodule(location)
   1232     name = module.__file__
   1233     assert name.startswith(location)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/basic.py:1552, in CLinker.compile_cmodule(self, location)
   1550 try:
   1551     _logger.debug(f\"LOCATION {location}\")
-> 1552     module = c_compiler.compile_str(
   1553         module_name=mod.code_hash,
   1554         src_code=src_code,
   1555         location=location,
   1556         include_dirs=self.header_dirs(),
   1557         lib_dirs=self.lib_dirs(),
   1558         libs=libs,
   1559         preargs=preargs,
   1560     )
   1561 except Exception as e:
   1562     e.args += (str(self.fgraph),)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:2652, in GCC_compiler.compile_str(module_name, src_code, location, include_dirs, lib_dirs, libs, preargs, py_module, hide_symbols)
   2650     pass
   2651 assert os.path.isfile(lib_filename)
-> 2652 return dlimport(lib_filename)

File ~/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/link/c/cmodule.py:332, in dlimport(fullpath, suffix)
    330 with warnings.catch_warnings():
    331     warnings.filterwarnings(\"ignore\", message=\"numpy.ndarray size changed\")
--> 332     rval = __import__(module_name, {}, {}, [module_name])
    333 t1 = time.perf_counter()
    334 import_time += t1 - t0

ImportError: /home/cao/.pytensor/compiledir_Linux-5.15-microsoft-standard-WSL2-x86_64-with-glibc2.31-x86_64-3.11.5-64/tmpdcml6aoa/m8c7f7341b546dd88fd9d5c257900e34b310c0b1b35fe7d5c475a6f74bd303f07.so: undefined symbol: dgemm_
Apply node that caused the error: BatchedDot(ExpandDims{axis=2}.0, ExpandDims{axis=1}.0)
Toposort index: 73
Inputs types: [TensorType(float64, shape=(100, 7, 1)), TensorType(float64, shape=(100, 1, 7))]

Backtrace when the node is created (use PyTensor flag traceback__limit=N to make it longer):
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1037, in <listcomp>
    output_grads = [access_grad_cache(var) for var in node.outputs]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1362, in access_grad_cache
    term = access_term_cache(node)[idx]
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/gradient.py\", line 1192, in access_term_cache
    input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 265, in L_op
    rval = self._bgrad(inputs, outs, ograds)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/tensor/blockwise.py\", line 248, in _bgrad
    igrads = vectorize_graph(
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 307, in vectorize_graph
    vect_node = vectorize_node(node, *vect_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/site-packages/pytensor/graph/replace.py\", line 221, in vectorize_node
    return _vectorize_node(op, node, *batched_inputs)
  File \"/home/cao/miniconda3/envs/pymc/lib/python3.11/functools.py\", line 909, in wrapper
    return dispatch(args[0].__class__)(*args, **kw)

HINT: Use a linker other than the C linker to print the inputs' shapes and strides.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node."
}