Hi all,
I try to use the CustomDist.
In the following program, when the number of data(n) is set to 5, no error occurs, but when the number of data is increased, an error occurs.
Can you tell me why the error occurs when the number of data increases?
I would be thankful if you could help me with this question.
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import arviz as az
from copy import copy
from scipy import stats
import pymc as pm
import pytensor.tensor as pt
# Initialize random number generator
RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)
az.style.use("arviz-darkgrid")
lam = -0.1
intercept = 1
slope = np.array([1, 1])
n, d = 5, 2
#n, d = 1000, 2
X = np.abs(np.random.randn(n,d))
mu = np.dot(X, slope) + intercept
d = pm.CustomDist.dist(np.exp(mu), lam, random=_generalized_poisson_random, logp=_generalized_poisson_logp, dtype="int64")
y = pm.draw(d)
print(y.mean())
print(y.std())
from pymc.distributions.dist_math import check_parameters
def _generalized_poisson_logp(value, theta, lam):
theta_lam_value = theta + lam * value
log_prob = pt.log(theta) + pt.log(pt.pow(theta_lam_value, value - 1)) - theta_lam_value - pt.gammaln(value)
log_prob = pt.switch(pt.le(theta_lam_value, 0), -np.inf, log_prob)
return check_parameters(log_prob, value >= 0, theta > 0, pt.abs(lam) <= 1, -theta / 4 <= lam)
def _generalized_poisson_random(theta, lam, rng=None, size=None):
if size is not None:
assert size == theta.shape
else:
size = theta.shape
#lam = lam[0]
omega = np.exp(-lam)
X = np.full(size, 0)
S = np.exp(-theta)
P = np.copy(S)
for i in range(size[0]):
U = np.random.uniform()
while U > S[i]:
X[i] += 1
C = theta[i] - lam + lam * X[i]
P[i] = omega * C * (1 + lam / C) ** (X[i] - 1) * P[i] / X[i]
S[i] += P[i]
return X
def _generalized_poisson_moment(rv, size, theta, lam):
return pt.ones(size)
with pm.Model() as generalized_poisson_model:
intercept = pm.Normal("intercept", mu=1.0, sigma=1.0, initval=1)
slope = pm.Normal("slope", mu=1.0, sigma=1.0, shape=(X.shape[1]), initval=np.ones((X.shape[1])))
lam = pm.TruncatedNormal("lam", mu=0, sigma=0.1, lower=-1, upper=1, initval=0)
theta = pm.Deterministic("mu", pt.exp(intercept + pt.dot(X, slope)))
obs = pm.CustomDist("obs",
theta,
lam,
random=_generalized_poisson_random,
logp=_generalized_poisson_logp,
moment=_generalized_poisson_moment,
ndim_supp=0,
ndims_params=[0, 0],
dtype="int64",
observed=y)
idata = pm.sample()
az.summary(idata)
n = 5
24.6
9.971960689854328
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [intercept, slope, lam]
Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 2 seconds.
mean sd hdi_3% hdi_97% mcse_mean mcse_sd ess_bulk ess_tail r_hat
intercept 0.836 0.515 -0.097 1.805 0.016 0.011 1100.0 1507.0 1.0
slope[0] 0.909 0.257 0.457 1.409 0.007 0.005 1286.0 1387.0 1.0
slope[1] 1.043 0.227 0.620 1.463 0.006 0.004 1424.0 1812.0 1.0
lam -0.006 0.095 -0.205 0.155 0.002 0.002 2675.0 2425.0 1.0
mu[0] 8.087 2.318 3.970 12.406 0.066 0.047 1175.0 1591.0 1.0
mu[1] 22.224 5.009 13.100 31.342 0.085 0.061 3463.0 2834.0 1.0
mu[2] 31.376 4.537 22.930 39.970 0.083 0.058 3003.0 2646.0 1.0
mu[3] 28.835 3.767 21.805 35.911 0.069 0.049 3001.0 2899.0 1.0
mu[4] 33.110 4.721 24.395 42.015 0.088 0.062 2858.0 2917.0 1.0
n= 1000
17.708
24.574554644998145
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
SamplingError Traceback (most recent call last)
Cell In[652], line 87
74 theta = pm.Deterministic("mu", pt.exp(intercept + pt.dot(X, slope)))
76 obs = pm.CustomDist("obs",
77 theta,
78 lam,
(...)
84 dtype="int64",
85 observed=y)
---> 87 idata = pm.sample()
88 az.summary(idata)
File ~/Desktop/programming/05_PyMC/.venv/lib/python3.9/site-packages/pymc/sampling/mcmc.py:708, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
706 ip: Dict[str, np.ndarray]
707 for ip in initial_points:
--> 708 model.check_start_vals(ip)
709 _check_start_shape(model, ip)
711 # Create trace backends for each chain
File ~/Desktop/programming/05_PyMC/.venv/lib/python3.9/site-packages/pymc/model.py:1784, in Model.check_start_vals(self, start)
1781 initial_eval = self.point_logps(point=elem)
1783 if not all(np.isfinite(v) for v in initial_eval.values()):
-> 1784 raise SamplingError(
1785 "Initial evaluation of model at starting point failed!\n"
...
{'intercept': array(1.1131772), 'slope': array([1.22276622, 1.64913618]), 'lam_interval__': array(-0.7628426)}
Logp initial evaluation results:
{'intercept': -0.93, 'slope': -2.07, 'lam': -6.07, 'obs': nan}
You can call `model.debug()` for more details.