Noncentral chi-squared distribution

Hi,

I’m trying to implement a noncentral chi-squared distribution to use. I’ve essentially copied the implementation of Ive from the Iv implementation in Theano 1.0 and I’ve tried to base my distribution off of what I see in the source for continuous.py. However, I get an error Could not convert <class 'numpy.float64'> (value=nan) to float64 and I can’t figure out why (full traceback below).

Code:

import numpy as np
import pymc3 as pm

import theano
from theano.gradient import grad_not_implemented
from theano.scalar.basic import (UnaryScalarOp, BinaryScalarOp,
        exp, upgrade_to_float,
        upgrade_to_float64,
        float_types)
from theano.scalar.basic import (upgrade_to_float_no_complex,
        complex_types, discrete_types,
        upcast)
import theano.tensor as T
from theano import function
from theano import as_op


imported_scipy_special = False
try:
    import scipy.special
    import scipy.stats
    imported_scipy_special = True
# Importing scipy.special may raise ValueError.
# See http://projects.scipy.org/scipy/ticket/1739
except (ImportError, ValueError):
    pass

class Ive(BinaryScalarOp):
    """
    Exponentially scaled modified Bessel function of the first kind of order v (real).
    """
    nfunc_spec = ('scipy.special.ive', 2, 1)

    @staticmethod
    def st_impl(v, x):
        return scipy.special.ive(v, x)

    def impl(self, v, x):
        if imported_scipy_special:
            return self.st_impl(v, x)
        else:
            super(Ive, self).impl(v, x) 
ive = Ive(upgrade_to_float, name='ive')


class Ncx2(pm.Continuous):
    def __init__(self, df, nc, *args, **kwargs):
        super(Ncx2, self).__init__(*args, **kwargs)
        self.df = df = T.as_tensor_variable(df)
        self.nc = nc = T.as_tensor_variable(nc)
        self.mean = self.df + self.nc

    def logp(self, x):
        df = self.df
        nc = self.nc
        df2 = df/2.0 - 1.0
        xs, ns = T.sqrt(x), T.sqrt(nc)
        res = (df2/2.0)*T.log(x/nc) - 0.5*(xs - ns)**2
        res2 = res + T.log(ive(df2, xs*ns) / 2.0)
        return res2

model = pm.Model()
with model:
    pr = pm.Categorical('pr', p=(0.7,0.2,0.1))
    cl = pm.Beta('cl', 3, 1)
    g1 = pm.Poisson('g1', cl*pr)
    g2 = pm.Poisson('g2', g1*10)
    tr = pm.Bernoulli('tr', pm.invlogit((g1 - 5)/2.5))
    xlo = pm.Deterministic('xlo',g2*(1-(1-1/3)*tr))
    #lo = pm.DensityDist('lo', lambda x: _ncx2logpdf(x,1.0,xlo), testval=2.0)
    lo = Ncx2('lo', df=1.0, nc=xlo, testval=1.0)
    trace = pm.sample()

Error:

Traceback (most recent call last):
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/scalar/basic.py", line 338, in filter
    self.dtype)
TypeError: Value cannot accurately be converted to dtype (float64) and allow_downcast is not True

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "pymc3_test.py", line 114, in <module>
    lo = Ncx2('lo', df=1.0, nc=xlo, testval=1.0)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/distributions/distribution.py", line 39, in __new__
    return model.Var(name, dist, data, total_size)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/model.py", line 515, in Var
    total_size=total_size, model=self)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/model.py", line 869, in __init__
    self.logp_elemwiset = distribution.logp(self)
  File "pymc3_test.py", line 95, in logp
    res2 = res + T.log(ive(df2, xs*ns) / 2.0)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/scalar/basic.py", line 754, in __truediv__
    return div_proxy(self, other)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/scalar/basic.py", line 1850, in div_proxy
    return f(x, y)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/gof/op.py", line 625, in __call__
    storage_map[ins] = [self._get_test_value(ins)]
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/gof/op.py", line 562, in _get_test_value
    ret = v.type.filter(v.tag.test_value)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/theano/scalar/basic.py", line 341, in filter
    type(data), data, self.dtype), e)
TypeError: For compute_test_value, one input test value does not have the requested type.

Backtrace when that variable is created:

  File "pymc3_test.py", line 114, in <module>
    lo = Ncx2('lo', df=1.0, nc=xlo, testval=1.0)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/distributions/distribution.py", line 39, in __new__
    return model.Var(name, dist, data, total_size)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/model.py", line 515, in Var
    total_size=total_size, model=self)
  File "/Users/keith/.venv/temp_env/lib/python3.6/site-packages/pymc3/model.py", line 869, in __init__
    self.logp_elemwiset = distribution.logp(self)
  File "pymc3_test.py", line 95, in logp
    res2 = res + T.log(ive(df2, xs*ns) / 2.0)

The error when converting the test value to that variable type:
Could not convert <class 'numpy.float64'> (value=nan) to float64
Value cannot accurately be converted to dtype (float64) and allow_downcast is not True

Sorry, didn’t have a chance to look at your code, but really quickly, if this is in a model, maybe you could possibly use

with pm.Model() as model:
    lamda = 3
    k = 2
    J = pm.Poisson("J", lamda)
    nc_chi2 = pm.ChiSquared("nc_chi2", nu=k+2*J)

as per wikipedia, section called “related distributions”.?

Thanks for this response! I think that this should work (although I’m still having trouble with my model). I am still curious, if anyone knows, what’s wrong with what I initially tried though. I’m very new to Theano and PyMC3

Not sure where went wrong but I think you are really close, the following code seems to sample fine now.

import scipy.special
import scipy.stats
from theano.scalar.basic import (BinaryScalarOp, upgrade_to_float)
from theano.gradient import grad_not_implemented

class Iv(BinaryScalarOp):
    """
    Modified Bessel function of the first kind of order v (real).
    """
    nfunc_spec = ('scipy.special.iv', 2, 1)

    @staticmethod
    def st_impl(v, x):
        return scipy.special.iv(v, x)

    def impl(self, v, x):
        return self.st_impl(v, x)

    def grad(self, inputs, grads):
        v, x = inputs
        gz, = grads
        return [grad_not_implemented(self, 0, v),
                gz * (iv(v - 1, x) + iv(v + 1, x)) / 2.]

iv = Iv(upgrade_to_float, name='iv')

class Ncx2(pm.Continuous):
    def __init__(self, df, nc, *args, **kwargs):
        super(Ncx2, self).__init__(*args, **kwargs)
        self.df = df = tt.as_tensor_variable(df)
        self.nc = nc = tt.as_tensor_variable(nc)
        self.mean = self.df + self.nc

    def logp(self, x):
        df = self.df
        nc = self.nc
        df2 = df/2.0 - 1.0
        xs, ns = tt.sqrt(x), tt.sqrt(nc)
        res = (df2/2.0)*tt.log(x/nc) - 0.5*(xs - ns)**2
        res2 = res + tt.log(iv(df2, xs*ns) / 2.0)
        return res2

with pm.Model() as model:
    PR = pm.Categorical('PR', p=p_pr, testval=2)
    CL = pm.Beta('CL', alpha=a_cl, beta=b_cl)
    G1 = pm.Poisson('G1', mu=CL*g_g1[PR])
    G2 = pm.Poisson('G2', mu=G1*k_g2)
    TR = pm.Bernoulli('TR', p=pm.math.invlogit((G1-m_tr)/s_tr))
    x_LO = pm.Deterministic('x_LO', G2*(1-(1-r_lo)*TR))
    LO = Ncx2('LO', df=1.0, nc=x_LO, testval=10.)
    trace = pm.sample(1000)
pm.traceplot(trace);

However, I think the logp is not correct (comparing to the result from Hybrid Bayesian Network getting stuck in PyMC3 for example). You should check your implementation again.