You could fit this toy model very easily with Bambi as follows (no data-processing needed)
import arviz as az
import bambi as bmb
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
toy_data = pd.DataFrame(
{
"region":["West"] * 25 + ["East"] * 10,
"state": ["CA"] * 15 + ["NV"] * 10 + ["NY"] * 10,
"county":["Los Angeles"] * 10 + ["San Diego"] * 3 + ["Orange"] * 2 + ["Clark"] * 5 + ["Washoe"] * 5 + ["Kings"] * 5 + ["Queens"] * 5,
"y": (
list(2 + 0.1 * np.random.randn(10))
+ list(2.5 + 0.1 * np.random.randn(3))
+ list(3 + 0.1 * np.random.randn(2))
+ list(5 + 0.1 * np.random.randn(5))
+ list(5.5 + 0.1 * np.random.randn(5))
+ list(8 + 0.1 * np.random.randn(5))
+ list(8.5 + 0.1 * np.random.randn(5))
)
}
)
intercept_prior = bmb.Prior("Normal", mu=0, sigma=1)
group_specific_sd = bmb.Prior("HalfNormal", sigma=1)
group_specific_prior = bmb.Prior("Normal", mu=0, sigma=group_specific_sd)
sigma_prior = bmb.Prior("HalfNormal", sigma=0.5)
priors = {
"Intercept": intercept_prior,
"group_specific": group_specific_prior,
"sigma": sigma_prior
}
# (1 | state/county) means you want a group-specific intercept for each sate
# and each county within each state.
model_bmb = bmb.Model("y ~ (1|state/county)", toy_data, priors=priors)
idata = model_bmb.fit(target_accept=0.9)
az.plot_trace(idata, backend_kwargs={"tight_layout":True})
fig = plt.gcf()
fig.set_facecolor("w")
fig.savefig("posterior.png", dpi=300)
Note there are some divergences, but that also happens with the PyMC model. I think that’s because we’re using a hierarchical component for the state when there are only 3 states, and the sample sizes for some counties are very low (2 in one case for example)
EDIT Since county names are unique within states, you can also write "y ~ (1|state) + (1|county)".
