In trying to resolve this issue myself, I found this forum discussion very helpful. It helped me see a couple problems with my original best guess. However, I still haven’t gotten it to work. Below is my current best guess. The error I get now comes from prior predictive sampling. The Error message is: IndexError: shape mismatch: indexing arrays could not be broadcast together with (X,) (Y,)
. In this this case X corresponds to the length of the uncensored data and Y corresponds to the length of all observations (censored and uncensored).
idx = pd.Categorical(group_values, categories=unique_groups).codes
coords = {
"intervals": intervals,
"group": unique_groups,
"group_flat": group_values,
"preds": [
"sentiment",
"intention",
"Male",
"Low",
"Medium",
"Finance",
"Health",
"Law",
"Public/Government",
"Sales/Marketing",
],
}
X = retention_df[
[
"sentiment",
"intention",
"Male",
"Low",
"Medium",
"Finance",
"Health",
"Law",
"Public/Government",
"Sales/Marketing",
]
].copy()
y = retention_df["month"].values
cens = retention_df.left.values == 0.0
def weibull_lccdf(x, alpha, beta):
"""Log complementary cdf of Weibull distribution."""
return -((x / beta) ** alpha)
with pm.Model(coords=coords, check_bounds=False) as aft_model:
X_data = pm.MutableData("X_data_obs", X)
beta = pm.Normal("beta", 0.0, 1, dims="preds")
mu = pm.Normal("mu", 0, 1, dims="groups")
s = pm.HalfNormal("s", 5.0, dims="groups")
eta = pm.Deterministic("eta", pm.math.dot(beta, X_data.T))
reg = pm.Deterministic("reg", pt.exp(-(mu[None, :] + eta[:, None]) / s))
y_obs = pm.Weibull("y_obs", beta=reg[~cens, idx], alpha=s[idx], observed=y[~cens], dims="groups_flat")
y_cens = pm.Potential("y_cens", weibull_lccdf(y[cens], alpha=s[idx], beta=reg[cens, idx]), dims="groups_flat")