Sampling options with many observations and a HurdleGamma likelihood

With the help of ChatGPT, I was able to write the model(s). I have a fixed-effects (observed_value ~ 1 + group) and a hierarchical model (observed_value ~ 1 + (1 | group). The fixed effects model samples in ~11.8 seconds with nutpie, and the hierarchical model samples in ~16.7 seconds with nutpie (for the 2500 rows of data you sent). Numpyro_JAX fails to sample, the the native PYMC sampler is way slower. I got no divergences in either model.

I would share the full plot trace but it takes like 10 min to render. Hopefully, this helps get you where you want to go.

The fixed effects model:

import arviz as az
import pandas as pd
import polars as pl
import pymc as pm
import pytensor.tensor as pt
from scipy.stats import gamma

data_csv = "hurdlegamma_sample.csv"

df = (pl
     .read_csv(data_csv, schema_overrides={'group':pl.String})
     .drop('record')
     .with_columns(group = pl.concat_str(pl.lit('group'), pl.col.group.str.zfill(2), separator="_"))
     .sort(by='group')
     .to_pandas()
     )

# Encode categorical variable as integer index
df["group_code"] = pd.Categorical(df["group"]).codes
group_names = pd.Categorical(df["group"]).categories

# Coordinates for the model
coords = {"group": group_names, "obs_id": df.index}

with pm.Model(coords=coords) as model:
   # Data containers linked to df
   group_idx = pm.Data("group_idx", df["group_code"].values, dims="obs_id")
   y_obs = pm.Data("y_obs", df["observed_value"].values, dims="obs_id")

   # Priors for logit(p) in the hurdle component
   intercept_logit = pm.Normal("intercept_logit", mu=0, sigma=2)
   beta_logit = pm.Normal("beta_logit", mu=0, sigma=2, dims="group")

   # Priors for log(mu) in the gamma component
   intercept_gamma = pm.Normal("intercept_gamma", mu=0, sigma=2)
   beta_gamma = pm.Normal("beta_gamma", mu=0, sigma=2, dims="group")

   # Shared shape parameter for gamma
   alpha = pm.HalfNormal("alpha", sigma=2)

   # Compute probabilities and means
   logit_p = intercept_logit + beta_logit[group_idx]
   p = pm.Deterministic("p", pm.math.sigmoid(logit_p), dims="obs_id")

   log_mu = intercept_gamma + beta_gamma[group_idx]
   mu = pm.Deterministic("mu", pm.math.exp(log_mu), dims="obs_id")

   # Define log-likelihood for hurdle-gamma
   def hurdle_gamma_logp(value, p_, mu_, alpha_):
       is_zero = pt.eq(value, 0)
       beta_ = alpha_ / mu_
       gamma_rv = pm.Gamma.dist(alpha=alpha_, beta=beta_)
       gamma_logp = pm.logp(gamma_rv, value)
       return pt.switch(is_zero, pt.log1p(-p_), pt.log(p_) + gamma_logp)


   # Custom likelihood using positional args (required when only logp is defined)
   HurdleGamma = pm.CustomDist(
       "HurdleGamma",
       p, mu, alpha,
       observed=y_obs,
       logp=hurdle_gamma_logp,
       dims="obs_id"
   )

   # Sample from the posterior
   idata_fixed_effects = pm.sample(nuts_sampler='nutpie')

The hierarchical model:

import arviz as az
import pandas as pd
import polars as pl
import pymc as pm
import pytensor.tensor as pt
from scipy.stats import gamma

data_csv = "hurdlegamma_sample.csv"

df = (pl
      .read_csv(data_csv, schema_overrides={'group':pl.String})
      .drop('record')
      .with_columns(group = pl.concat_str(pl.lit('group'), pl.col.group.str.zfill(2), separator="_"))
      .sort(by='group')
      .to_pandas()
      )

# Encode categorical variable as integer index
df["group_code"] = pd.Categorical(df["group"]).codes
group_names = pd.Categorical(df["group"]).categories

# Coordinates for the model
coords = {"group": group_names, "obs_id": df.index}
coords = {"group": group_names, "obs_id": df.index}

with pm.Model(coords=coords) as model:
    # Data containers
    group_idx = pm.Data("group_idx", df["group_code"].values, dims="obs_id")
    y_obs = pm.Data("y_obs", df["observed_value"].values, dims="obs_id")

    # Global intercepts
    intercept_logit = pm.Normal("intercept_logit", mu=0, sigma=2)
    intercept_gamma = pm.Normal("intercept_gamma", mu=0, sigma=2)

    # Random effects for group (logit link)
    sigma_logit_group = pm.HalfNormal("sigma_logit_group", sigma=1)
    re_logit_group = pm.Normal("re_logit_group", mu=0, sigma=sigma_logit_group, dims="group")

    # Random effects for group (gamma link)
    sigma_gamma_group = pm.HalfNormal("sigma_gamma_group", sigma=1)
    re_gamma_group = pm.Normal("re_gamma_group", mu=0, sigma=sigma_gamma_group, dims="group")

    # Gamma shape (shared)
    alpha = pm.HalfNormal("alpha", sigma=2)

    # Linear predictors with random intercepts
    logit_p = intercept_logit + re_logit_group[group_idx]
    p = pm.Deterministic("p", pm.math.sigmoid(logit_p), dims="obs_id")

    log_mu = intercept_gamma + re_gamma_group[group_idx]
    mu = pm.Deterministic("mu", pm.math.exp(log_mu), dims="obs_id")

    # Custom hurdle-gamma log-likelihood
    def hurdle_gamma_logp(value, p_, mu_, alpha_):
        is_zero = pt.eq(value, 0)
        beta_ = alpha_ / mu_
        gamma_rv = pm.Gamma.dist(alpha=alpha_, beta=beta_)
        gamma_logp = pm.logp(gamma_rv, value)
        return pt.switch(is_zero, pt.log1p(-p_), pt.log(p_) + gamma_logp)

    # Use positional arguments only (required)
    HurdleGamma = pm.CustomDist(
        "HurdleGamma",
        p, mu, alpha,
        observed=y_obs,
        logp=hurdle_gamma_logp,
        dims="obs_id"
    )

    # Sample from the posterior
    idata_hierarchical = pm.sample(nuts_sampler='nutpie')