Hello. I want to calculate the log-likelihood of a PyMC model. My research focuses on creating my own ADVI. Below is the initial setup code.
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
from jax.flatten_util import ravel_pytree
from pymc.sampling.jax import get_jaxified_logp
import numpy as np
coords = {
"obs": list(range(len(df))),
"indicators": drivers,
"indicators_1": ["se_acad_p1", "se_acad_p2", "se_acad_p3"],
"indicators_2": ["se_social_p1", "se_social_p2", "se_social_p3"],
"indicators_3": ["sup_friends_p1", "sup_friends_p2", "sup_friends_p3"],
"indicators_4": ["sup_parents_p1", "sup_parents_p2", "sup_parents_p3"],
"indicators_5": ["ls_p1", "ls_p2", "ls_p3"],
"latent": ["SUP_F", "SUP_P"],
"latent1": ["SUP_F", "SUP_P"],
"latent_regression": ["SUP_F->SE_ACAD", "SUP_P->SE_ACAD", "SUP_F->SE_SOC", "SUP_P->SE_SOC"],
"regression": ["SE_ACAD", "SE_SOCIAL", "SUP_F", "SUP_P"],
}
obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
Psi = pm.InverseGamma("Psi", 5, 10, dims="indicators")
lambdas_ = pm.Normal(
"lambdas_1", 1, 1, dims=("indicators_1")
)
lambdas_1 = pm.Deterministic(
"lambdas1", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_1")
)
lambdas_ = pm.Normal(
"lambdas_2", 1, 1, dims=("indicators_2")
)
lambdas_2 = pm.Deterministic(
"lambdas2", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_2")
)
lambdas_ = pm.Normal(
"lambdas_3", 1, 1, dims=("indicators_3")
)
lambdas_3 = pm.Deterministic(
"lambdas3", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_3")
)
lambdas_ = pm.Normal(
"lambdas_4", 1, 1, dims=("indicators_4")
)
lambdas_4 = pm.Deterministic(
"lambdas4", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_4")
)
lambdas_ = pm.Normal(
"lambdas_5", 1, 1, dims=("indicators_5")
)
lambdas_5 = pm.Deterministic(
"lambdas5", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_5")
)
tau = pm.Normal("tau", 3, 0.5, dims="indicators")
kappa = 0
sd_dist = pm.Exponential.dist(1.0, shape=2)
chol, _, _ = pm.LKJCholeskyCov(
"chol_cov", n=2, eta=2, sd_dist=sd_dist, compute_corr=True
)
cov = pm.Deterministic("cov", chol.dot(chol.T), dims=("latent", "latent1"))
ksi = pm.MvNormal("ksi", kappa, chol=chol, dims=("obs", "latent"))
# Regression Components
beta_r = pm.Normal("beta_r", 0, 0.1, dims="latent_regression")
beta_r2 = pm.Normal("beta_r2", 0, 0.1, dims="regression")
sd_dist1 = pm.Exponential.dist(1.0, shape=2)
resid_chol, _, _ = pm.LKJCholeskyCov(
"resid_chol", n=2, eta=3, sd_dist=sd_dist1, compute_corr=True
)
_ = pm.Deterministic("resid_cov", chol.dot(chol.T))
sigmas_resid = pm.MvNormal("sigmas_resid", kappa, chol=resid_chol)
# SE_ACAD ~ SUP_FRIENDS + SUP_PARENTS
regression_se_acad = pm.Normal(
"regr_se_acad",
beta_r[0] * ksi[obs_idx, 0] + beta_r[1] * ksi[obs_idx, 1],
sigmas_resid[0],
)
# SE_SOCIAL ~ SUP_FRIENDS + SUP_PARENTS
regression_se_social = pm.Normal(
"regr_se_social",
beta_r[2] * ksi[obs_idx, 0] + beta_r[3] * ksi[obs_idx, 1],
sigmas_resid[1],
)
# LS ~ SE_ACAD + SE_SOCIAL + SUP_FRIEND + SUP_PARENTS
regression = pm.Normal(
"regr",
beta_r2[0] * regression_se_acad
+ beta_r2[1] * regression_se_social
+ beta_r2[2] * ksi[obs_idx, 0]
+ beta_r2[3] * ksi[obs_idx, 1],
1,
)
m0 = tau[0] + regression_se_acad * lambdas_1[0]
m1 = tau[1] + regression_se_acad * lambdas_1[1]
m2 = tau[2] + regression_se_acad * lambdas_1[2]
m3 = tau[3] + regression_se_social * lambdas_2[0]
m4 = tau[4] + regression_se_social * lambdas_2[1]
m5 = tau[5] + regression_se_social * lambdas_2[2]
m6 = tau[6] + ksi[obs_idx, 0] * lambdas_3[0]
m7 = tau[7] + ksi[obs_idx, 0] * lambdas_3[1]
m8 = tau[8] + ksi[obs_idx, 0] * lambdas_3[2]
m9 = tau[9] + ksi[obs_idx, 1] * lambdas_4[0]
m10 = tau[10] + ksi[obs_idx, 1] * lambdas_4[1]
m11 = tau[11] + ksi[obs_idx, 1] * lambdas_4[2]
m12 = tau[12] + regression * lambdas_5[0]
m13 = tau[13] + regression * lambdas_5[1]
m14 = tau[14] + regression * lambdas_5[2]
mu = pm.Deterministic(
"mu", pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14]).T
)
_ = pm.Normal("likelihood", mu, Psi, observed=df[drivers].values)
Below is the preparation for obtaining samples from the variational distribution in the unconstrained domain. For the mean, I used PyMC’s init method, and I set log_sd to 0. I also prepared 100 samples from a standard normal distribution for use in the reparameterization trick.
np.random.seed(2)
# variational mean
init_means, fun = ravel_pytree(model.initial_point())
# variational log sigma
init_log_vars = np.zeros(init_means.shape[0])
# for reparametrization trick
zs = np.random.randn(100, init_means.shape[0])
# log-likelihood function from pymc model
logp_fn_jax = get_jaxified_logp(model)
I ran the following code
# evaluation at sample
for i in range(zs.shape[0]):
sample = zs[i] * np.exp(init_log_vars) + init_means
param_dict = fun(sample)
as_list = [param_dict[x.name] for x in model.value_vars]
print(logp_fn_jax(as_list))
Some results are
nan
nan
nan
nan
nan
-29890.78782366809
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
-81320.72316508116
nan
nan
nan
What could be the reason for NaN values appearing? The values are sometimes computed successfully depending on z, but sometimes they are not. Do you have any guesses about the possible cause?