# Chain failed due to mass matrix going to zero in matrix normal distribution

I am trying to run multivariate linear regression using matrix normal distribution. I am facing the problem of a mass matrix contains zero on the diagonal. In addition, there are other warnings when I run the code, I am not sure is it because of the code or theano version. Code takes a lot of time given I am running on very small data. Below is the code-

``````%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cmap
cm = cmap.inferno

import numpy as np
import scipy as sp
import theano
import theano.tensor as tt
import theano.tensor.nlinalg
import sys
sys.path.insert(0, "../../..")
import pymc3 as pm

from sklearn import preprocessing
# simulated data
np.random.seed(20090425)
n = 30
p = 2
p_dash = 2
q= 3
y = np.vstack((np.random.normal(0.1, 1, (20,p)),np.random.normal(0.5, 2, (10,p))))
X = np.vstack((np.random.normal(1, 1, (20,p_dash)),np.random.normal(2, 2, (10,p_dash))))
#X = np.hstack((X,np.ones(n)))

B = np.random.normal(0.2,2,size=(p_dash,p))
y = np.matmul(X,B)
y = preprocessing.scale(y)

from pymc3 import (
NUTS,
Deterministic,
HalfCauchy,
Model,
MvNormal,
find_MAP,
sample,
summary,
traceplot,
)
import theano.tensor as T

true_scale = 3
from theano.tensor import _shared

with pm.Model() as model:

mu = np.zeros(n)

tau_sq = pm.TruncatedNormal('tau_sq', mu=0, sigma=30, lower=0)

# beta distribution
# Setup right cholesky matrix
sd_dist_beta = pm.HalfCauchy.dist(beta=10, shape=p)
colchol_packed_beta = pm.LKJCholeskyCov('colcholpacked_beta', n=p, eta=2, sd_dist=sd_dist_beta)
colchol_beta = pm.expand_packed_triangular(p, colchol_packed_beta)

# Setup left covariance matrix
rowchol_packed_beta = pm.LKJCholeskyCov('rowcholpacked_beta', n=p_dash, eta=2, sd_dist=sd_dist_beta)
rowchol_beta = pm.expand_packed_triangular(p_dash, colchol_packed_beta)

mu_beta = pm.MvNormal('mu_beta',mu=np.zeros(p),cov= np.eye(p)*tau_sq, shape=(p_dash,p))
beta = pm.MatrixNormal("beta", mu=mu_beta, colchol=colchol_beta, rowchol=rowchol_beta, shape=(p_dash,p))

sd_dist_y = pm.HalfCauchy.dist(beta=7, shape=p)
colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=p, eta=2,sd_dist=sd_dist_y)
colchol = pm.expand_packed_triangular(p, colchol_packed)
y_obs = pm.MatrixNormal("y_obs", mu=radon_est, colchol=colchol, rowcov=np.eye(n),observed=y,shape=(n,p))

with model:
trace = pm.sample(1000, tune=2000)
``````

Below is the error-

``````/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py:468: FutureWarning: In an upcoming release, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
FutureWarning,
Auto-assigning NUTS sampler...
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x7f318b51a7d0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/scalar/basic.py", line 358, in filter
" (%s) and allow_downcast is not True" % self.dtype
TypeError: Value cannot accurately be converted to dtype (float64) and allow_downcast is not True

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/opt.py", line 249, in apply
sub_prof = optimizer.optimize(fgraph)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/opt.py", line 89, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/tensor/opt.py", line 7774, in apply
new_outputs = self.optimizer(node)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/tensor/opt.py", line 7596, in local_fuse
tmp.tag.test_value = tv.flatten()
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/utils.py", line 277, in __setattr__
obj = self.attr_filter(obj)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/scalar/basic.py", line 364, in filter
e,
TypeError: ("Could not convert <class 'numpy.float64'> (value=nan) to float64", TypeError('Value cannot accurately be converted to dtype (float64) and allow_downcast is not True'))

Multiprocess sampling (4 chains in 4 jobs)
NUTS: [colcholpacked, beta, mu_beta, rowcholpacked_beta, colcholpacked_beta, tau_sq]

24.47% [2936/12000 09:57<30:45 Sampling 4 chains, 0 divergences]
---------------------------------------------------------------------------
RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 138, in run
self._start_loop()
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 192, in _start_loop
point, stats = self._compute_point()
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 217, in _compute_point
point, stats = self._step_method.step(self._point)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py", line 273, in step
apoint, stats = self.astep(array)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 148, in astep
self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 272, in raise_ok
raise ValueError("\n".join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV `beta`.ravel() is zero.
"""

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in run()
137             self._point = self._make_numpy_refs()
--> 138             self._start_loop()
139         except KeyboardInterrupt:

~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in _start_loop()
191                 try:
--> 192                     point, stats = self._compute_point()
193                 except SamplingError as e:

~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in _compute_point()
216         if self._step_method.generates_stats:
--> 217             point, stats = self._step_method.step(self._point)
218         else:

~/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py in step()
272         if self.generates_stats:
--> 273             apoint, stats = self.astep(array)
274             point = self._logp_dlogp_func.array_to_full_dict(apoint)

~/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py in astep()
147             ]
--> 148             self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
149             message_energy = (

271                 )
--> 272             raise ValueError("\n".join(errmsg))
273

ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV `beta`.ravel() is zero.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
<ipython-input-4-0e31cfaf8a73> in <module>
83
84 with model:
---> 85     trace = pm.sample(1000, tune=2000)

~/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
555         _print_step_hierarchy(step)
556         try:
--> 557             trace = _mp_sample(**sample_args, **parallel_args)
558         except pickle.PickleError:
559             _log.warning("Could not pickle model, sampling singlethreaded.")

~/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, pickle_backend, **kwargs)
1474         try:
1475             with sampler:
-> 1476                 for draw in sampler:
1477                     trace = traces[draw.chain - chain]
1478                     if trace.supports_sampler_stats and draw.stats is not None:

~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in __iter__(self)
478
479         while self._active:
481             proc, is_last, draw, tuning, stats, warns = draw
482             self._total_draws += 1

~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
358             else:
359                 error = RuntimeError("Chain %s failed." % proc.chain)
--> 360             raise error from old_error
361         elif msg == "writing_done":
Which version of PyMC3 / Theano are you using? Could you use `watermark` to provide the version numbers?