I am not great at PYMC directly (I work mostly in Bambi) but after messing around in ChatGPT, and correcting a couple of things, this seemed to work (the trace and PPC look good too):
n = 5
samples = 10000
# Data generation function
def data_faker(n, samples, mu_a=3, sig_a=5, sig_b=1):
xdata = np.random.normal(mu_a, sig_a, samples)
bdata = np.random.randint(low=0, high=n, size=samples)
mu = np.arange(0, n) - (n - 1) / 2 # Group-specific effects
for i, m in enumerate(mu):
xdata += np.where(bdata == i, np.random.normal(m, sig_b, samples), 0)
return xdata, bdata
# Generate the data
xdata, bdata = data_faker(n, samples)
# Define coordinates for groups
coords = {"group": np.arange(n)} # [0, 1, 2, 3, 4]
with pm.Model(coords=coords) as model:
# Data containers
bdata_shared = pm.Data('bdata_shared', bdata) # Length: samples
x_obs_data = pm.Data('x_obs_data', xdata) # Length: samples
# Prior for overall mean effect
a_i = pm.Normal('a_i', mu=3, sigma=5)
# Prior for group-specific weights (normalized to sum to 1)
b_ij = pm.Normal('b_ij', mu=0, sigma=1, dims='group', initval=np.zeros(n))
b_softplus = pt.softplus(b_ij)
b_ij_binary = pm.Deterministic('b_ij_binary', b_softplus / pt.sum(b_softplus))
# Prior for group-specific effects
c = pm.Normal('c', mu=0, sigma=5, dims='group')
# Expected value per data point
b_ij_binary_i = b_ij_binary[bdata_shared]
c_i = c[bdata_shared]
x_i = a_i + b_ij_binary_i * c_i
# Likelihood
sigma_obs = pm.HalfNormal('sigma_obs', sigma=1)
x_obs = pm.Normal('x_obs', mu=x_i, sigma=sigma_obs, observed=x_obs_data)
# Sample from the posterior
idata = pm.sample()

