Sure, this should run for you with the previously shared csv. My only issue right now is that each group will have different length observed values for the Bernoulli and the Gamma since the Bernoulli has all records and the Gamma has only the “successful” records. So when I use sample_posterior_predictive I get different length arrays for y_bernoulli and y_gamma and I’m not sure how to handle that to get to an HDI for each group. I think I could just use the values from the posterior trace with numpy or scipy but I’d prefer to use sample_posterior_predictive if possible.
df = pl.read_csv(
"sample_data.csv",
schema_overrides={"group": pl.String, "record": pl.String}
)
gamma_obs = df.filter(pl.col("observed_value") > 0).select("observed_value").to_numpy().squeeze()
bernoulli_obs = df.select("observed_value").to_numpy().squeeze()
def factorize(dataset, col_name):
dataset = dataset.with_columns(pl.col(col_name).cast(pl.Categorical).to_physical().alias("idx"))
categories = dataset.select([col_name, "idx"]).unique().sort("idx").select(col_name).to_numpy().squeeze()
idx = dataset.select("idx").to_numpy().squeeze()
return idx, categories
group_idx, groups = factorize(df, "group")
record_idx, records = factorize(df, "record")
nonzero_group_idx, non_zero_groups = factorize(df.filter(pl.col("observed_value") > 0), "group")
nonzero_record_idx, nonzero_records = factorize(df.filter(pl.col("observed_value") > 0), "record")
coords = {
"groups": groups,
"records": records,
"non_zero_groups": non_zero_groups,
"non_zero_records": nonzero_records,
}
with pm.Model(coords=coords) as model:
# HYPERPRIORS ON ALPHA
a_alpha = pm.Gamma("a_alpha", alpha=26.4, beta=3.25)
a_beta = pm.Gamma("a_beta", alpha=26.4, beta=3.25)
# ALPHA
alpha = pm.Gamma("alpha", alpha=a_alpha, beta=a_beta, dims="groups")
# HYPERPRIORS ON BETA
b_alpha = pm.Gamma("b_alpha", alpha=26.4, beta=3.25)
b_beta = pm.Gamma("b_beta", alpha=26.4, beta=3.25)
# BETA
beta = pm.Gamma("beta", alpha=b_alpha, beta=b_beta, dims="groups")
# HYPERPRIORS ON PSI
p_alpha = pm.Gamma("p_alpha", alpha=2.78, beta=7.37)
p_beta = pm.Gamma("p_beta", alpha=22.2, beta=24)
# PSI
psi = pm.Beta("psi", alpha=p_alpha, beta=p_beta, dims="groups")
# MU CALC
mu = pm.Deterministic("mu", alpha / beta, dims="groups")
y_bernoulli = pm.Bernoulli(
"y_bernoulli",
p=psi[group_idx],
observed=bernoulli_obs,
dims="records"
)
y_gamma = pm.Gamma(
"y_gamma",
alpha=alpha[nonzero_group_idx],
beta=beta[nonzero_group_idx],
observed=gamma_obs,
dims="non_zero_records"
)
pm.model_to_graphviz(model)