Hello. I want to calculate the log-likelihood of a PyMC model. My research focuses on creating my own ADVI. Below is the initial setup code.
import pandas as pd
import pymc as pm
import pytensor.tensor as pt
from jax.flatten_util import ravel_pytree
from pymc.sampling.jax import get_jaxified_logp
import numpy as np
coords = {
"obs": list(range(len(df))),
"indicators": drivers,
"indicators_1": ["se_acad_p1", "se_acad_p2", "se_acad_p3"],
"indicators_2": ["se_social_p1", "se_social_p2", "se_social_p3"],
"indicators_3": ["sup_friends_p1", "sup_friends_p2", "sup_friends_p3"],
"indicators_4": ["sup_parents_p1", "sup_parents_p2", "sup_parents_p3"],
"indicators_5": ["ls_p1", "ls_p2", "ls_p3"],
"latent": ["SUP_F", "SUP_P"],
"latent1": ["SUP_F", "SUP_P"],
"latent_regression": ["SUP_F->SE_ACAD", "SUP_P->SE_ACAD", "SUP_F->SE_SOC", "SUP_P->SE_SOC"],
"regression": ["SE_ACAD", "SE_SOCIAL", "SUP_F", "SUP_P"],
}
obs_idx = list(range(len(df)))
with pm.Model(coords=coords) as model:
Psi = pm.InverseGamma("Psi", 5, 10, dims="indicators")
lambdas_ = pm.Normal(
"lambdas_1", 1, 1, dims=("indicators_1")
)
lambdas_1 = pm.Deterministic(
"lambdas1", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_1")
)
lambdas_ = pm.Normal(
"lambdas_2", 1, 1, dims=("indicators_2")
)
lambdas_2 = pm.Deterministic(
"lambdas2", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_2")
)
lambdas_ = pm.Normal(
"lambdas_3", 1, 1, dims=("indicators_3")
)
lambdas_3 = pm.Deterministic(
"lambdas3", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_3")
)
lambdas_ = pm.Normal(
"lambdas_4", 1, 1, dims=("indicators_4")
)
lambdas_4 = pm.Deterministic(
"lambdas4", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_4")
)
lambdas_ = pm.Normal(
"lambdas_5", 1, 1, dims=("indicators_5")
)
lambdas_5 = pm.Deterministic(
"lambdas5", pt.set_subtensor(lambdas_[0], 1), dims=("indicators_5")
)
tau = pm.Normal("tau", 3, 0.5, dims="indicators")
kappa = 0
sd_dist = pm.Exponential.dist(1.0, shape=2)
chol, _, _ = pm.LKJCholeskyCov(
"chol_cov", n=2, eta=2, sd_dist=sd_dist, compute_corr=True
)
cov = pm.Deterministic("cov", chol.dot(chol.T), dims=("latent", "latent1"))
ksi = pm.MvNormal("ksi", kappa, chol=chol, dims=("obs", "latent"))
# Regression Components
beta_r = pm.Normal("beta_r", 0, 0.1, dims="latent_regression")
beta_r2 = pm.Normal("beta_r2", 0, 0.1, dims="regression")
sd_dist1 = pm.Exponential.dist(1.0, shape=2)
resid_chol, _, _ = pm.LKJCholeskyCov(
"resid_chol", n=2, eta=3, sd_dist=sd_dist1, compute_corr=True
)
_ = pm.Deterministic("resid_cov", chol.dot(chol.T))
sigmas_resid = pm.MvNormal("sigmas_resid", kappa, chol=resid_chol)
# SE_ACAD ~ SUP_FRIENDS + SUP_PARENTS
regression_se_acad = pm.Normal(
"regr_se_acad",
beta_r[0] * ksi[obs_idx, 0] + beta_r[1] * ksi[obs_idx, 1],
sigmas_resid[0],
)
# SE_SOCIAL ~ SUP_FRIENDS + SUP_PARENTS
regression_se_social = pm.Normal(
"regr_se_social",
beta_r[2] * ksi[obs_idx, 0] + beta_r[3] * ksi[obs_idx, 1],
sigmas_resid[1],
)
# LS ~ SE_ACAD + SE_SOCIAL + SUP_FRIEND + SUP_PARENTS
regression = pm.Normal(
"regr",
beta_r2[0] * regression_se_acad
+ beta_r2[1] * regression_se_social
+ beta_r2[2] * ksi[obs_idx, 0]
+ beta_r2[3] * ksi[obs_idx, 1],
1,
)
m0 = tau[0] + regression_se_acad * lambdas_1[0]
m1 = tau[1] + regression_se_acad * lambdas_1[1]
m2 = tau[2] + regression_se_acad * lambdas_1[2]
m3 = tau[3] + regression_se_social * lambdas_2[0]
m4 = tau[4] + regression_se_social * lambdas_2[1]
m5 = tau[5] + regression_se_social * lambdas_2[2]
m6 = tau[6] + ksi[obs_idx, 0] * lambdas_3[0]
m7 = tau[7] + ksi[obs_idx, 0] * lambdas_3[1]
m8 = tau[8] + ksi[obs_idx, 0] * lambdas_3[2]
m9 = tau[9] + ksi[obs_idx, 1] * lambdas_4[0]
m10 = tau[10] + ksi[obs_idx, 1] * lambdas_4[1]
m11 = tau[11] + ksi[obs_idx, 1] * lambdas_4[2]
m12 = tau[12] + regression * lambdas_5[0]
m13 = tau[13] + regression * lambdas_5[1]
m14 = tau[14] + regression * lambdas_5[2]
mu = pm.Deterministic(
"mu", pm.math.stack([m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14]).T
)
_ = pm.Normal("likelihood", mu, Psi, observed=df[drivers].values)
Below is the preparation for obtaining samples from the variational distribution in the unconstrained domain. For the mean, I used PyMC’s init
method, and I set log_sd
to 0. I also prepared 100 samples from a standard normal distribution for use in the reparameterization trick.
np.random.seed(2)
# variational mean
init_means, fun = ravel_pytree(model.initial_point())
# variational log sigma
init_log_vars = np.zeros(init_means.shape[0])
# for reparametrization trick
zs = np.random.randn(100, init_means.shape[0])
# log-likelihood function from pymc model
logp_fn_jax = get_jaxified_logp(model)
I ran the following code
# evaluation at sample
for i in range(zs.shape[0]):
sample = zs[i] * np.exp(init_log_vars) + init_means
param_dict = fun(sample)
as_list = [param_dict[x.name] for x in model.value_vars]
print(logp_fn_jax(as_list))
Some results are
nan
nan
nan
nan
nan
-29890.78782366809
nan
nan
nan
nan
nan
nan
nan
nan
nan
nan
-81320.72316508116
nan
nan
nan
What could be the reason for NaN values appearing? The values are sometimes computed successfully depending on z
, but sometimes they are not. Do you have any guesses about the possible cause?