Model with conditional parameters causing mass matrix to contain zeros on diagonal

I have a model that attempts to estimate parameters of a curve that has a linear component followed by an exponential curve. That is, for time-t < t_2 the curve is linear, and for t \geq t_2 the curve is exponential. The conditional equation for the curve is

\begin{align} H(t_n) = \begin{cases} \frac{H_{ult}}{2} \big( 1- \exp(-B \: t_n^C) \big) & \text{if } t < t_2 \\ \frac{H_{ult}}{2} \big( 1- \exp(-B \: t_n^C) \big) + \frac{H_{ult}}{2} \frac{t_n-t_2}{t_n-t_2+D} & \text{otherwise} \end{cases} \end{align}

I’m attempting to estimate all five parameters B, C, D, t_2, H_{ult}. As you can see, for times-t<t_2 parameter D does not exist and i believe this is why i am getting the β€œMass matrix contains zeros on the diagonal” error message. Below is the full output:

WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [sigma, Hult, t2, D, C, B]
 40%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹                                                                                           | 403/1000 [00:12<00:17, 33.28it/s]
Traceback (most recent call last):
  File "Heat.py", line 47, in <module>
    trace = pm.sample()
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 469, in sample
    trace = _sample_many(**sample_args)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 515, in _sample_many
    step=step, random_seed=random_seed[i], **kwargs)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 559, in _sample
    for it, strace in enumerate(sampling):
  File "/home/nick/.local/lib/python3.6/site-packages/tqdm/_tqdm.py", line 931, in __iter__
    for obj in iterable:
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 655, in _iter_sample
    point, states = step.step(point)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
    apoint, stats = self.astep(array)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 115, in astep
    self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 201, in raise_ok
    raise ValueError('\n'.join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal. 
The derivative of RV `t2`.ravel()[0] is zero.
The derivative of RV `D`.ravel()[0] is zero.

The code used to obtain this error is:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt
from theano import shared, scan
from theano.ifelse import ifelse

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
H0 = tt.as_tensor_variable(npy.asarray(0., dtype='float64'))

def HeatCurve(B, C, D, t2, Hult, tn):
    Hn = ifelse(tt.lt(tn,t2),
        0.5*Hult*(1-tt.exp(-B*tn**C)),
        0.5*Hult*(1-tt.exp(-B*tn**C))+0.5*Hult*(tn-t2)/(tn-t2+D))
    return Hn

def CalHeat(tn, Hn_, B, C, D, t2, Hult):
    Hn = HeatCurve(B, C, D, t2, Hult, tn)
    return Hn

""" Build model """
with pm.Model() as TempModel:
    BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
    B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
    C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
    D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
    t2 = pm.Normal('t2', mu=3.5, sd=0.35, testval=3.5)
    Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

    (Hn, _) = scan(
        fn = CalHeat,
        sequences = shared(tSeq),
        outputs_info = H0,
        non_sequences = [B, C, D, t2, Hult])

    Hn_trim = Hn[rows]
    Hm = pm.Deterministic('Hm', Hn_trim)
    sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
    H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

    trace = pm.sample()

When i do a gradient check with
dHdB, dHdC, dHdD, dHdt2, dHdHult = tt.grad(Hn, [B, C, D, t2, Hult])
the dHdD and dHdt2 are indeed zero for t<t_2 as expected, therefore the error seems to make sense. Does anyone have any suggestions for how i might avoid this error?

You can try a break point model (ideally with smooth switch point), for example see: Switch point Metropolis tuning

That sounds like a promising suggestion but i don’t think i am quite there yet - still getting a similar error. Possibly my implementation is off? See below:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt
from theano import shared, scan
from theano.ifelse import ifelse

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
H0 = tt.as_tensor_variable(npy.asarray(0., dtype='float64'))

def logistic(L, x0, t, k=200):
    return L/(1+tt.exp(-k*(t-x0)))

def HeatCurve(B, C, D, t2, Hult, tn):
    switch = logistic(1., t2, tn)
    Hn = 0.5*Hult*(1-tt.exp(-B*tn**C))+switch*0.5*Hult*(tn-t2)/(tn-t2+D)
    return Hn

def CalHeat(tn, Hn_, B, C, D, t2, Hult):
    Hn = HeatCurve(B, C, D, t2, Hult, tn)
    return Hn

""" Build model """
with pm.Model() as TempModel:
    BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
    B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
    C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
    D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
    t2 = pm.Normal('t2', mu=3.5, sd=0.35, testval=3.5)
    Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

    (Hn, _) = scan(
        fn = CalHeat,
        sequences = shared(tSeq),
        outputs_info = H0,
        non_sequences = [B, C, D, t2, Hult])

    Hn_trim = Hn[rows]
    Hm = pm.Deterministic('Hm', Hn_trim)
    sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
    H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

    trace = pm.sample()

Full output below

WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Sequential sampling (2 chains in 1 job)
NUTS: [sigma, Hult, t2, D, C, B]
 41%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ                                                                                           | 406/1000 [00:08<00:11, 50.15it/s]
Traceback (most recent call last):
  File "Heat.py", line 56, in <module>
    trace = pm.sample()
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 469, in sample
    trace = _sample_many(**sample_args)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 515, in _sample_many
    step=step, random_seed=random_seed[i], **kwargs)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 559, in _sample
    for it, strace in enumerate(sampling):
  File "/home/nick/.local/lib/python3.6/site-packages/tqdm/_tqdm.py", line 931, in __iter__
    for obj in iterable:
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 655, in _iter_sample
    point, states = step.step(point)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
    apoint, stats = self.astep(array)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 115, in astep
    self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
  File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 201, in raise_ok
    raise ValueError('\n'.join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal. 
The derivative of RV `t2`.ravel()[0] is zero.
The derivative of RV `D`.ravel()[0] is zero.

What do you think?
Cheers.

I guess i don’t need scan for this task:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)

def logistic(L, x0, t=tSeq, k=500):
    return L/(1+tt.exp(-k*(t-x0)))

""" Build model """
with pm.Model() as TempModel:
    BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
    B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
    C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
    D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
    switchpoint = pm.Beta('switchpoint', alpha=1, beta=1, testval=0.5)
    t2 = tend*switchpoint
    Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

    Hn1 = 0.5*Hult*(1.-tt.exp(-1.*B*tSeq**C))
    Hn2 = 0.5*Hult*(tSeq-t2)/(tSeq-t2+D)
    Hn = Hn1 + logistic(Hn2, t2)

    Hn_trim = Hn[rows]
    Hm = pm.Deterministic('Hm', Hn_trim)
    Hm_printed = tt.printing.Print('Hm_printed')(Hm)
    sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
    H_obs = pm.Normal('H_obs', mu=Hm_printed, sd=sigma, observed=Hd)
    trace = pm.sample()

Still getting the same error though unfortunately.

** Correction on original post: both components of the curve are non-linear **

It seems the disparity between my parameters B and H_{ult} was the second issue. For my synthetic data, I had B=1.127e-2 and H_{ult}=3.38e+5. If I change the units for H_{ult} to reduce the magnitude by three orders, I no longer get the β€œmass matrix contains zero on diagonal” error and the sampling process runs to the end. I think there is still room for improvement because the disparity is till quite high but at least it runs now. There is, however, divergences but i guess that should be a question for another thread. Here is the code that runs for anyone that is interested:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt

npy.random.seed(12345)

rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
Nd = len(rows)

def logistic_(L, x0, t=tSeq, k=500):
    return L/(1+npy.exp(-k*(t-x0)))

""" Synthesise Data """
B_=1.172e-2; C_=1.6; D_=6.2; t2_=3.5; Hult_=3.38e2; err_sd=0.025
H_true = 0.5*Hult_*(1.-npy.exp(-B_*tSeq**C_))+logistic_(0.5, t2_)*(tSeq-t2_)/(tSeq-t2_+D_)
e = npy.random.randn(Nd)*err_sd
Hd = H_true[rows] * (npy.ones(Nd) + e)

def logistic(L, x0, t=tSeq, k=500):
    return L/(1+tt.exp(-k*(t-x0)))

""" Build model """
with pm.Model() as TempModel:
    B = pm.Normal('B', mu=B_, sd=B_, testval=B_)
    C = pm.Normal('C', mu=C_, sd=C_, testval=C_)
    D = pm.Normal('D', mu=D_, sd=D_, testval=D_)
    switch = pm.Beta('switch', alpha=1, beta=1, testval=0.1)
    t2 = switch*tend
    Hult = pm.Normal('Hult', mu=Hult_, sd=Hult_, testval=Hult_)

    factor = logistic(0.5, t2)
    Hn = 0.5*(Hult)*(1.-tt.exp(-(B)*tSeq**C)) + factor*(Hult)*(tSeq-t2)/(tSeq-t2+D)

    Hn_trim = Hn[rows]
    Hm = pm.Deterministic('Hm', Hn_trim)
    sigma = pm.HalfNormal('sigma', sd=1, shape=len(rows))
    H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

    trace = pm.sample(1000, tune=1000, cores=2, chains=2)
1 Like