I’ve been having trouble porting just about all of my hierarchical survival analysis models over to pymc v4 from pymc3. They seem to be much more sensitive to initialization and tend to fail due to floating point errors. I didnt have this problem at all in pymc3.

Even using different model structures similar to this example fail for me despite working in pymc3

Here’s some reproducible code that works for pymc3 but not pymc v4. Any ideas?

```
use_pymc3 = False
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
import scipy.special as sp
if use_pymc3:
import pymc3 as pm
else:
import pymc as pm
import arviz as az
SEED = 99
print( pm.__version__)
# simulating fake data
np.random.seed(SEED)
N_groups = 100
N = 2500
params = dict(
log_lambd_mu = np.log(65),
log_lambd_sig = 0.4,
log_k_mu = np.log(1.65),
log_k_sig = 0.2,
)
params["log_lambd"] = np.random.normal(
params["log_lambd_mu"], params["log_lambd_sig"], size=N_groups)
params["log_k"] = np.random.normal(
params["log_k_mu"], params["log_k_sig"], size=N_groups)
# which groups each observation belongs to"
group_idxs = np.random.choice(range(N_groups),size=N)
# simulate event time data
if use_pymc3:
# for pymc3
y_true = pm.Weibull.dist(np.exp(params["log_k"][group_idxs]),
np.exp(params["log_lambd"][group_idxs])).random()
else:
# for pymc v4
y_true = pm.Weibull.dist(np.exp(params["log_k"][group_idxs]),
np.exp(params["log_lambd"][group_idxs])).eval()
# randomly censor the dataset to mimic survival analysis
cens_time = np.random.lognormal(4, 0.75, size=N).astype(int) #np.random.uniform(0, 250, size=N)
data = (
pd.DataFrame({
"group":group_idxs,
"time": y_true})
# adjust the dataset to censor observations
## indicates if an event hasnt occurred yet (cens=1)
.assign(cens = lambda d: np.where(d.time <= cens_time, 0, 1) )
## indicates the latest time observed for each record
.assign(time = lambda d: np.where(d.cens==1, cens_time, d.time) )
)
print( data.sample(5) )
# model helper functions
def hierarchical_normal(name, dims, μ=0., nc=True):
if nc:
Δ = pm.Normal('Δ_{}'.format(name), 0., 1., dims=dims)
σ = pm.Exponential('σ_{}'.format(name), 5.)
return pm.Deterministic(name, μ + Δ * σ)
else:
mu = pm.Normal("μ_{}".format(name), μ, 1)
sigma = pm.Exponential("σ_{}".format(name), 5.)
return pm.Normal(name, mu, sigma, dims=dims)
def weibull_cens_logp(k, lambd, E, T):
LL_observed = (pm.math.log(k) - pm.math.log(lambd)
+ (k-1) * ( pm.math.log(T) - pm.math.log(lambd) )
- (T/lambd)**k)
# CDF of Weibull: 1 - exp(-(t / lambda)^k)
# SF (survival fxn) = 1-CDF
LL_censored = -(T/lambd)**k
# If event observed, used observed log likelihood,
# otherwise use censored log likelihood
logprob = E * LL_observed + (1 - E) * LL_censored
return logprob
# data for model to reference
g_ = data.group.values
COORDS = {"group":range(N_groups)}
T = data.time.values
E = np.where(data.cens==1, 0, 1)
cens_ = np.where(data.cens==1, data.time, np.inf)
# model
with pm.Model(coords=COORDS) as weibull:
mu_log_k = pm.Normal("mu_log_k", 0.5, 0.25)
mu_log_lambd = pm.Normal("mu_log_lambd", 4.15, 0.25)
log_k = hierarchical_normal("log_k", μ=mu_log_k, dims="group", nc=True)
log_lambd = hierarchical_normal("log_lambd", μ=mu_log_lambd, dims="group", nc=True)
k = pm.Deterministic("k", pm.math.exp(log_k), dims="group")
lambd = pm.Deterministic("lambd", pm.math.exp(log_lambd), dims="group")
y_latent = pm.Weibull.dist(k[g_], lambd[g_])
if use_pymc3:
obs = pm.DensityDist(
name='obs',
logp=weibull_cens_logp,
random=y_latent.random,
observed={
"k":k[g_],
"lambd":lambd[g_],
"E":E,
"T":T}
)
idata = pm.sample(
init="advi+adapt_diag",
return_inferencedata=True,
# this is needed for custom log likelihood functions
idata_kwargs={"density_dist_obs": False})
else:
obs = pm.Censored("obs", y_latent,
lower=None,
upper=cens_,
observed=T)
idata = pm.sample(init="advi+adapt_diag")
```