 # Model with conditional parameters causing mass matrix to contain zeros on diagonal

I have a model that attempts to estimate parameters of a curve that has a linear component followed by an exponential curve. That is, for time-t < t_2 the curve is linear, and for t \geq t_2 the curve is exponential. The conditional equation for the curve is

\begin{align} H(t_n) = \begin{cases} \frac{H_{ult}}{2} \big( 1- \exp(-B \: t_n^C) \big) & \text{if } t < t_2 \\ \frac{H_{ult}}{2} \big( 1- \exp(-B \: t_n^C) \big) + \frac{H_{ult}}{2} \frac{t_n-t_2}{t_n-t_2+D} & \text{otherwise} \end{cases} \end{align}

I’m attempting to estimate all five parameters B, C, D, t_2, H_{ult}. As you can see, for times-t<t_2 parameter D does not exist and i believe this is why i am getting the “Mass matrix contains zeros on the diagonal” error message. Below is the full output:

WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Auto-assigning NUTS sampler...
Sequential sampling (2 chains in 1 job)
NUTS: [sigma, Hult, t2, D, C, B]
40%|█████████████████████████████████████████████████████████████▋                                                                                           | 403/1000 [00:12<00:17, 33.28it/s]
Traceback (most recent call last):
File "Heat.py", line 47, in <module>
trace = pm.sample()
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 469, in sample
trace = _sample_many(**sample_args)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 515, in _sample_many
step=step, random_seed=random_seed[i], **kwargs)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 559, in _sample
for it, strace in enumerate(sampling):
File "/home/nick/.local/lib/python3.6/site-packages/tqdm/_tqdm.py", line 931, in __iter__
for obj in iterable:
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 655, in _iter_sample
point, states = step.step(point)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
apoint, stats = self.astep(array)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 115, in astep
self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 201, in raise_ok
raise ValueError('\n'.join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV t2.ravel() is zero.
The derivative of RV D.ravel() is zero.


The code used to obtain this error is:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt
from theano import shared, scan
from theano.ifelse import ifelse

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
H0 = tt.as_tensor_variable(npy.asarray(0., dtype='float64'))

def HeatCurve(B, C, D, t2, Hult, tn):
Hn = ifelse(tt.lt(tn,t2),
0.5*Hult*(1-tt.exp(-B*tn**C)),
0.5*Hult*(1-tt.exp(-B*tn**C))+0.5*Hult*(tn-t2)/(tn-t2+D))
return Hn

def CalHeat(tn, Hn_, B, C, D, t2, Hult):
Hn = HeatCurve(B, C, D, t2, Hult, tn)
return Hn

""" Build model """
with pm.Model() as TempModel:
BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
t2 = pm.Normal('t2', mu=3.5, sd=0.35, testval=3.5)
Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

(Hn, _) = scan(
fn = CalHeat,
sequences = shared(tSeq),
outputs_info = H0,
non_sequences = [B, C, D, t2, Hult])

Hn_trim = Hn[rows]
Hm = pm.Deterministic('Hm', Hn_trim)
sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

trace = pm.sample()


When i do a gradient check with
dHdB, dHdC, dHdD, dHdt2, dHdHult = tt.grad(Hn, [B, C, D, t2, Hult])
the dHdD and dHdt2 are indeed zero for t<t_2 as expected, therefore the error seems to make sense. Does anyone have any suggestions for how i might avoid this error?

You can try a break point model (ideally with smooth switch point), for example see: Switch point Metropolis tuning

That sounds like a promising suggestion but i don’t think i am quite there yet - still getting a similar error. Possibly my implementation is off? See below:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt
from theano import shared, scan
from theano.ifelse import ifelse

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
H0 = tt.as_tensor_variable(npy.asarray(0., dtype='float64'))

def logistic(L, x0, t, k=200):
return L/(1+tt.exp(-k*(t-x0)))

def HeatCurve(B, C, D, t2, Hult, tn):
switch = logistic(1., t2, tn)
Hn = 0.5*Hult*(1-tt.exp(-B*tn**C))+switch*0.5*Hult*(tn-t2)/(tn-t2+D)
return Hn

def CalHeat(tn, Hn_, B, C, D, t2, Hult):
Hn = HeatCurve(B, C, D, t2, Hult, tn)
return Hn

""" Build model """
with pm.Model() as TempModel:
BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
t2 = pm.Normal('t2', mu=3.5, sd=0.35, testval=3.5)
Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

(Hn, _) = scan(
fn = CalHeat,
sequences = shared(tSeq),
outputs_info = H0,
non_sequences = [B, C, D, t2, Hult])

Hn_trim = Hn[rows]
Hm = pm.Deterministic('Hm', Hn_trim)
sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

trace = pm.sample()


Full output below

WARNING (theano.tensor.blas): Using NumPy C-API based implementation for BLAS functions.
Auto-assigning NUTS sampler...
Sequential sampling (2 chains in 1 job)
NUTS: [sigma, Hult, t2, D, C, B]
41%|██████████████████████████████████████████████████████████████                                                                                           | 406/1000 [00:08<00:11, 50.15it/s]
Traceback (most recent call last):
File "Heat.py", line 56, in <module>
trace = pm.sample()
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 469, in sample
trace = _sample_many(**sample_args)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 515, in _sample_many
step=step, random_seed=random_seed[i], **kwargs)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 559, in _sample
for it, strace in enumerate(sampling):
File "/home/nick/.local/lib/python3.6/site-packages/tqdm/_tqdm.py", line 931, in __iter__
for obj in iterable:
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/sampling.py", line 655, in _iter_sample
point, states = step.step(point)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
apoint, stats = self.astep(array)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 115, in astep
self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
File "/home/nick/.local/lib/python3.6/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 201, in raise_ok
raise ValueError('\n'.join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV t2.ravel() is zero.
The derivative of RV D.ravel() is zero.


What do you think?
Cheers.

I guess i don’t need scan for this task:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt

Hd = npy.array([629.52234607, 18969.57367046, 238960.91467992, 328497.85275376])
rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)

def logistic(L, x0, t=tSeq, k=500):
return L/(1+tt.exp(-k*(t-x0)))

""" Build model """
with pm.Model() as TempModel:
BoundedNormal = pm.Bound(pm.Normal, lower=1e-6, upper=1e6)
B = BoundedNormal('B', mu=0.01172, sd=0.001172, testval=0.01172)
C = BoundedNormal('C', mu=1.6, sd=0.16, testval=1.6)
D = pm.Normal('D', mu=6.2, sd=0.62, testval=6.2)
switchpoint = pm.Beta('switchpoint', alpha=1, beta=1, testval=0.5)
t2 = tend*switchpoint
Hult = BoundedNormal('Hult', mu=338e3, sd=338e2, testval=338e3)

Hn1 = 0.5*Hult*(1.-tt.exp(-1.*B*tSeq**C))
Hn2 = 0.5*Hult*(tSeq-t2)/(tSeq-t2+D)
Hn = Hn1 + logistic(Hn2, t2)

Hn_trim = Hn[rows]
Hm = pm.Deterministic('Hm', Hn_trim)
Hm_printed = tt.printing.Print('Hm_printed')(Hm)
sigma = pm.HalfNormal('sigma', sd=1)#, shape=len(rows))
H_obs = pm.Normal('H_obs', mu=Hm_printed, sd=sigma, observed=Hd)
trace = pm.sample()


Still getting the same error though unfortunately.

** Correction on original post: both components of the curve are non-linear **

It seems the disparity between my parameters B and H_{ult} was the second issue. For my synthetic data, I had B=1.127e-2 and H_{ult}=3.38e+5. If I change the units for H_{ult} to reduce the magnitude by three orders, I no longer get the “mass matrix contains zero on diagonal” error and the sampling process runs to the end. I think there is still room for improvement because the disparity is till quite high but at least it runs now. There is, however, divergences but i guess that should be a question for another thread. Here is the code that runs for anyone that is interested:

import numpy as npy
import pymc3 as pm
from theano import tensor as tt

npy.random.seed(12345)

rows = npy.array([2, 9, 37, 81])
tend = 21
dt = 0.25
tSeq = npy.arange(0, tend+dt, dt)
Nd = len(rows)

def logistic_(L, x0, t=tSeq, k=500):
return L/(1+npy.exp(-k*(t-x0)))

""" Synthesise Data """
B_=1.172e-2; C_=1.6; D_=6.2; t2_=3.5; Hult_=3.38e2; err_sd=0.025
H_true = 0.5*Hult_*(1.-npy.exp(-B_*tSeq**C_))+logistic_(0.5, t2_)*(tSeq-t2_)/(tSeq-t2_+D_)
e = npy.random.randn(Nd)*err_sd
Hd = H_true[rows] * (npy.ones(Nd) + e)

def logistic(L, x0, t=tSeq, k=500):
return L/(1+tt.exp(-k*(t-x0)))

""" Build model """
with pm.Model() as TempModel:
B = pm.Normal('B', mu=B_, sd=B_, testval=B_)
C = pm.Normal('C', mu=C_, sd=C_, testval=C_)
D = pm.Normal('D', mu=D_, sd=D_, testval=D_)
switch = pm.Beta('switch', alpha=1, beta=1, testval=0.1)
t2 = switch*tend
Hult = pm.Normal('Hult', mu=Hult_, sd=Hult_, testval=Hult_)

factor = logistic(0.5, t2)
Hn = 0.5*(Hult)*(1.-tt.exp(-(B)*tSeq**C)) + factor*(Hult)*(tSeq-t2)/(tSeq-t2+D)

Hn_trim = Hn[rows]
Hm = pm.Deterministic('Hm', Hn_trim)
sigma = pm.HalfNormal('sigma', sd=1, shape=len(rows))
H_obs = pm.Normal('H_obs', mu=Hm, sd=sigma, observed=Hd)

trace = pm.sample(1000, tune=1000, cores=2, chains=2)
1 Like