Performance & MLE issues with CustomDist

Hi, I’m new to PyMC.
I managed to define logp and random for a CustomDist where t is supported on [0,12] with parameter t_\mathrm{SFR}\in[4,12] for later on posterior inference, check below for its shape.


With NumericalInversePolynomial from scipy.stats.sampling, my code as follows:

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
import pytensor.tensor as pt
from scipy.special import expi,erf
from scipy.integrate import quad
from scipy.stats.sampling import NumericalInversePolynomial
print(f"Running on PyMC v{pm.__version__}")
%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")
#------------------------------------------------------------------------
def log_p(t, t_sfr):
    return pt.switch(pt.bitwise_and(pt.ge(t, 0.), pt.le(t, 12.)), 
                     -(12 - t)/t_sfr + pt.log(0.159409 * pt.exp(-1.13802 * np.abs(-1.47073 + t)) * t - (0.690966 * (1 - pt.exp(0.0211761 * t)))/(3.80478 + t) + 0.156989 * pt.exp(-0.264495 * t) * pt.erf(0.690966 * t)),
                     -np.inf)#- np.log(deno(t_sfr))
#--------------------------------------------------------------------------
#define class p for the NumericalInversePolynomial
def int_x_to_12(x,t_sfr):
    return -0.690966 * np.exp(-15.8048/t_sfr)*(expi(15.8048/t_sfr)-expi((x+3.80478)/t_sfr)) + 0.690966*np.exp(-0.0805702 - 15.8048/t_sfr)*(expi(15.8048*(0.0211761 + 1/t_sfr))-expi((x+3.80478) * (0.0211761 + 1/t_sfr)))

def deno(t_sfr):
    return int_x_to_12(0,t_sfr) + t_sfr * (0.0248331 + 0.615691*np.exp((0.523633 - 12.277 * t_sfr)/t_sfr**2) * (erf(0.191395 - 0.723625/t_sfr)-erf(8.48299 - 0.723625/t_sfr)))/(3.78079 - t_sfr)+(np.exp(-10.5293/t_sfr)*(-0.181027 + np.exp(10.5293/t_sfr)*(9.23477e-6 - 0.000011279*t_sfr)+0.329099*t_sfr)*t_sfr)/(0.878715 - t_sfr)**2 + (np.exp(-22.5293/t_sfr)*t_sfr*(np.exp(12./t_sfr)*(0.181027 + 0.0829266*t_sfr)+0.0230844*np.exp(10.5293/t_sfr)*t_sfr))/(0.878715 +t_sfr)**2
class p:
    def __init__(self, t_sfr):
        self.t_sfr = t_sfr
    def pdf(self, t):
        '''
        analytical approx. and is not normalized
        '''
        sfr = np.exp(-(12 - t)/self.t_sfr)
        num = 0.159409 * np.exp(-1.13802 * np.abs(-1.47073 + t)) * t - (0.690966 * (1 - np.exp(0.0211761 * t)))/(3.80478 + t) + 0.156989 * np.exp(-0.264495 * t) * erf(0.690966 * t)
        return sfr*num#/deno(self.t_sfr)
#    def logpdf(self, t):
#        return -(12 - t)/self.t_sfr + np.log(0.159409 * np.exp(-1.13802 * np.abs(-1.47073 + t)) * t - (0.690966 * (1 - np.exp(0.0211761 * t)))/(3.80478 + t) + 0.156989 * np.exp(-0.264495 * t) * erf(0.690966 * t)) #- np.log(deno(self.t_sfr))
#-----------------------------------------------------------------------------
def random(t_sfr: float, rng= None, size= None,) -> np.ndarray | float:
    dist = p(t_sfr)
    p_generator = NumericalInversePolynomial(dist, center=1.47073, domain=(0,12), random_state=rng)
    samples = p_generator.rvs(size=size)
    return samples
#------------------------------------------------------------------------

Now I have 2 problems:

1. the prior sampling performance

it took 1m 30.33s to draw 1000 prior samples:

with pm.Model() as m:
    t_sfr = 8.
    t = pm.CustomDist(
    't',
    t_sfr,
    logp=log_p,
    random=random)
    idata = pm.sample_prior_predictive(1000)

But if I just call NumericalInversePolynomial, that’s superfast (78ms).

dist = p(8.)
urng = np.random.default_rng()
rng = NumericalInversePolynomial(dist, center=1.47073, domain=(0,12),random_state=urng)
obs = rng.rvs(100000);

I wonder why PyMC sampling is slower and if there is a better way to use NumericalInversePolynomial along with PyMC.

2. Test with MLE

With this CustomDist, I wanted to test it using MLE, which is Bayesian Inference with uninformative prior (as what I understood):

dist = p(8.) # true t_sfr=8.
urng = np.random.default_rng()
rng = NumericalInversePolynomial(dist, center=1.47073, domain=(0,12),random_state=urng)
obs = rng.rvs(100000);
with pm.Model() as m:
    t_sfr = pm.Uniform('t_sfr', lower=4, upper=12) 
    t = pm.CustomDist(
    't',
    t_sfr,
    logp=log_p,
    random=random,
    observed=obs)
    idata = pm.sample()

Then I got:


which is quite strange (they are almost all 11.999, but I thought they would be around 8).

I also tried a flat prior pm.Flat('t_sfr') but still not working (the code give me Chain 0 reached the maximum tree depth. Increase max_treedepth, increase target_accept or reparameterize. but I don’t know how to improve my model…

BTW, I verified this “MLE” method with pm.Normal separately for mu and sigma with pm.Flat prior and both gave plausible results:

obs = np.random.randn(1000)* 8 + 8
with pm.Model() as m:
    mu = pm.Flat('mu')
    sigma = pm.Flat('sigma')
    t = pm.Normal(
    't',
    mu=mu,
    sigma=sigma,
    observed=obs)
    idata = pm.sample()

Thanks in advance for any help!

I suspect the slowdown of the prior predictive is that for every call it will be running NumericalInversePolynomial whereas in your direct use you define it once and take 1000 draws from it.

For the MLE have you actually confirmed your logp is correct? You can test it by calling model.compile_logp()(test_point) where test_point is something that looks like model.initial_point()

Great advice!

1.

Instead of using NumericalInversePolynomial, I think I can try to directly fit a function form for t\sim\mathrm{CDF^{-1}}(u,t_\mathrm{sfr}) for random, which I believe is the same thing NumericalInversePolynomial is doing, to avoid calling NumericalInversePolynomial every time.

2.

I will try this.

1 Like

1.Using metalog distribution and tool from metalogsworkbook, I can fit for \mathrm{CDF}^{-1}(t;\,t_\mathrm{SFR}). Now the prior sampling is much faster and the first problem is solved.

def logpdf(t,t_sfr):
    return np.piecewise(t,
                        [(t<=12.)&(t>10**0.23),
                         (t<=10**0.23)&(t>10**(-0.1)),
                         (t<=10**(-0.1))&(t>=0.)],
                        [lambda t: 4.5860*np.log10(t)**3 - 9.0617*np.log10(t)**2 + 1.8664*np.log10(t) - 1.0666,
                         lambda t: -35.7414*np.log10(t)**3 + 2.2133*np.log10(t)**2 + 3.8230*np.log10(t) -1.6314,
                        lambda t: np.log(0.178*t),
                        lambda t: -np.inf]
                       ) - (12-t)/t_sfr - np.piecewise(t_sfr,
                                                      [(t_sfr>=4)&(t_sfr<=12)],
                                                      [lambda t_sfr: np.log(-3.75557 + 1/(0.221348 + 0.0538988*np.exp(-0.117341*t_sfr))-0.191343*np.exp(-1.00455*t_sfr) *np.sin(6.15789 - t_sfr)),
                                                      lambda t_sfr: np.inf])
#--------------------------------------------------------------------------------------------
coefficients_of_ai = pd.read_csv('fit_for_p(t_and_tsfr)/coeffofai.csv',header=None).to_numpy()
ai = []
for i in range(8):
    ai.append(np.poly1d(coefficients_of_ai[i]))
def inv_cdf(u, t_sfr):
    A = (u-0.5)
    B = np.log(u/(1-u))
    M = ai[0](t_sfr) + ai[1](t_sfr)*B + ai[2](t_sfr)*A*B + ai[3](t_sfr)*A + ai[4](t_sfr)*A**2 + ai[5](t_sfr)*A**2*B + ai[6](t_sfr)*A**3 + ai[7](t_sfr)*A**3*B
    return (12.*np.exp(M))/(1+np.exp(M))
def random(t_sfr: float, rng= None, size= None,) -> np.ndarray | float:
    return inv_cdf(rng.uniform(size=size),t_sfr)
#---------------------------------------------------------------
with pm.Model() as m:
    t_sfr = 8.
    t = pm.CustomDist(
    't',
    t_sfr,
    logp=logpdf,
    random=random,)
    idata = pm.sample_prior_predictive(100000)

2. It turns out that I didn’t normalize the pdf properly: the normalization depends on t_\mathrm{sfr} and I just ignored it.

Thanks again for your help!