I am observing a quite large memory footprint (> 5GiB) for the following simple hierarchical linear regression code.

```
import pytensor
pytensor.config.gcc__cxxflags += " -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk"
import numpy as np
import pandas as pd
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
def generate_data(num_groups: int = 5, num_samples_per_group: int = 20) -> pd.DataFrame:
rng = np.random.default_rng(seed=42)
alpha = 2.0
beta = -0.5
omega = 0.1
tau = 0.1
sigma = 1.0
result = {"group": [], "y": [], "x": []}
for i in range(num_groups):
alpha_g = rng.normal(loc=alpha, scale=omega)
beta_g = rng.normal(loc=beta, scale=tau)
x = rng.normal(size=num_samples_per_group)
eps = rng.normal(size=num_samples_per_group)
y = alpha_g + beta_g * x + sigma * eps
result["group"] += [i] * num_samples_per_group
result["y"] += y.tolist()
result["x"] += x.tolist()
return pd.DataFrame(result)
def make_model(frame: pd.DataFrame) -> pm.Model:
coords = {"group": frame["group"].unique()}
with pm.Model(coords=coords) as model:
# Data
g = pm.MutableData("g", frame["group"])
x = pm.MutableData("x", frame["x"])
y = pm.ConstantData("y", frame["y"])
# Population level
alpha = pm.Normal("alpha")
beta = pm.Normal("beta")
omega = pm.HalfNormal("omega")
sigma = pm.HalfNormal("sigma")
tau = pm.HalfNormal("tau")
# Group level
epsilon_alpha_g = pm.ZeroSumNormal("epsilon_alpha_g", dims="group")
alpha_g = pm.Deterministic("alpha_g", alpha + omega * epsilon_alpha_g)
epsilon_beta_g = pm.ZeroSumNormal("epsilon_beta_g", dims="group")
beta_g = pm.Deterministic("beta_g", beta + tau * epsilon_beta_g)
# Linear model
mu_ = pm.Deterministic("mu_", alpha_g[g] + beta_g[g] * x)
# Likelihood
pm.Normal("y_obs", mu=mu_, sigma=sigma, observed=y, shape=mu_.shape)
return model
if __name__ == "__main__":
frame = generate_data(num_groups=5000)
model = make_model(frame)
with model:
trace = pm.sample()
summary = az.summary(trace, var_names=["alpha", "beta", "omega", "sigma", "tau"]).loc[:, ["mean", "hdi_3%", "hdi_97%", "ess_bulk", "ess_tail", "r_hat"]]
print(summary)
```

I assume this is due to the large number of groups that result in a large number of data variables in the `arviz.InferenceData`

object generated by the `pm.sample`

call. Would it be possible to use a âmemory mappingâ so that the âtraceâ is directly written to disk instead of ending up claiming a lot of RAM when sampling form the model?