I am trying to run multivariate linear regression using matrix normal distribution. I am facing the problem of a mass matrix contains zero on the diagonal. In addition, there are other warnings when I run the code, I am not sure is it because of the code or theano version. Code takes a lot of time given I am running on very small data. Below is the code-
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cmap
cm = cmap.inferno
import numpy as np
import scipy as sp
import theano
import theano.tensor as tt
import theano.tensor.nlinalg
import sys
sys.path.insert(0, "../../..")
import pymc3 as pm
from sklearn import preprocessing
# simulated data
np.random.seed(20090425)
n = 30
p = 2
p_dash = 2
q= 3
y = np.vstack((np.random.normal(0.1, 1, (20,p)),np.random.normal(0.5, 2, (10,p))))
X = np.vstack((np.random.normal(1, 1, (20,p_dash)),np.random.normal(2, 2, (10,p_dash))))
#X = np.hstack((X,np.ones(n)))
B = np.random.normal(0.2,2,size=(p_dash,p))
y = np.matmul(X,B)
y = preprocessing.scale(y)
from pymc3 import (
NUTS,
Deterministic,
HalfCauchy,
Model,
MvNormal,
find_MAP,
sample,
summary,
traceplot,
)
import theano.tensor as T
true_scale = 3
from theano.tensor import _shared
with pm.Model() as model:
mu = np.zeros(n)
tau_sq = pm.TruncatedNormal('tau_sq', mu=0, sigma=30, lower=0)
# beta distribution
# Setup right cholesky matrix
sd_dist_beta = pm.HalfCauchy.dist(beta=10, shape=p)
colchol_packed_beta = pm.LKJCholeskyCov('colcholpacked_beta', n=p, eta=2, sd_dist=sd_dist_beta)
colchol_beta = pm.expand_packed_triangular(p, colchol_packed_beta)
# Setup left covariance matrix
rowchol_packed_beta = pm.LKJCholeskyCov('rowcholpacked_beta', n=p_dash, eta=2, sd_dist=sd_dist_beta)
rowchol_beta = pm.expand_packed_triangular(p_dash, colchol_packed_beta)
mu_beta = pm.MvNormal('mu_beta',mu=np.zeros(p),cov= np.eye(p)*tau_sq, shape=(p_dash,p))
beta = pm.MatrixNormal("beta", mu=mu_beta, colchol=colchol_beta, rowchol=rowchol_beta, shape=(p_dash,p))
sd_dist_y = pm.HalfCauchy.dist(beta=7, shape=p)
colchol_packed = pm.LKJCholeskyCov('colcholpacked', n=p, eta=2,sd_dist=sd_dist_y)
colchol = pm.expand_packed_triangular(p, colchol_packed)
radon_est = pm.math.dot(X,beta)
y_obs = pm.MatrixNormal("y_obs", mu=radon_est, colchol=colchol, rowcov=np.eye(n),observed=y,shape=(n,p))
with model:
trace = pm.sample(1000, tune=2000)
Below is the error-
/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py:468: FutureWarning: In an upcoming release, pm.sample will return an `arviz.InferenceData` object instead of a `MultiTrace` by default. You can pass return_inferencedata=True or return_inferencedata=False to be safe and silence this warning.
FutureWarning,
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
ERROR (theano.gof.opt): SeqOptimizer apply <theano.tensor.opt.FusionOptimizer object at 0x7f318b51a7d0>
ERROR (theano.gof.opt): Traceback:
ERROR (theano.gof.opt): Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/scalar/basic.py", line 358, in filter
" (%s) and allow_downcast is not True" % self.dtype
TypeError: Value cannot accurately be converted to dtype (float64) and allow_downcast is not True
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/opt.py", line 249, in apply
sub_prof = optimizer.optimize(fgraph)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/opt.py", line 89, in optimize
ret = self.apply(fgraph, *args, **kwargs)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/tensor/opt.py", line 7774, in apply
new_outputs = self.optimizer(node)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/tensor/opt.py", line 7596, in local_fuse
tmp.tag.test_value = tv.flatten()[0]
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/gof/utils.py", line 277, in __setattr__
obj = self.attr_filter(obj)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/theano/scalar/basic.py", line 364, in filter
e,
TypeError: ("Could not convert <class 'numpy.float64'> (value=nan) to float64", TypeError('Value cannot accurately be converted to dtype (float64) and allow_downcast is not True'))
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [colcholpacked, beta, mu_beta, rowcholpacked_beta, colcholpacked_beta, tau_sq]
24.47% [2936/12000 09:57<30:45 Sampling 4 chains, 0 divergences]
---------------------------------------------------------------------------
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 138, in run
self._start_loop()
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 192, in _start_loop
point, stats = self._compute_point()
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py", line 217, in _compute_point
point, stats = self._step_method.step(self._point)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py", line 273, in step
apoint, stats = self.astep(array)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 148, in astep
self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
File "/home/dushyant/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/quadpotential.py", line 272, in raise_ok
raise ValueError("\n".join(errmsg))
ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV `beta`.ravel()[3] is zero.
"""
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in run()
137 self._point = self._make_numpy_refs()
--> 138 self._start_loop()
139 except KeyboardInterrupt:
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in _start_loop()
191 try:
--> 192 point, stats = self._compute_point()
193 except SamplingError as e:
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in _compute_point()
216 if self._step_method.generates_stats:
--> 217 point, stats = self._step_method.step(self._point)
218 else:
~/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/arraystep.py in step()
272 if self.generates_stats:
--> 273 apoint, stats = self.astep(array)
274 point = self._logp_dlogp_func.array_to_full_dict(apoint)
~/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/base_hmc.py in astep()
147 ]
--> 148 self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap)
149 message_energy = (
~/anaconda3/lib/python3.7/site-packages/pymc3/step_methods/hmc/quadpotential.py in raise_ok()
271 )
--> 272 raise ValueError("\n".join(errmsg))
273
ValueError: Mass matrix contains zeros on the diagonal.
The derivative of RV `beta`.ravel()[3] is zero.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
<ipython-input-4-0e31cfaf8a73> in <module>
83
84 with model:
---> 85 trace = pm.sample(1000, tune=2000)
~/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
555 _print_step_hierarchy(step)
556 try:
--> 557 trace = _mp_sample(**sample_args, **parallel_args)
558 except pickle.PickleError:
559 _log.warning("Could not pickle model, sampling singlethreaded.")
~/anaconda3/lib/python3.7/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, pickle_backend, **kwargs)
1474 try:
1475 with sampler:
-> 1476 for draw in sampler:
1477 trace = traces[draw.chain - chain]
1478 if trace.supports_sampler_stats and draw.stats is not None:
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in __iter__(self)
478
479 while self._active:
--> 480 draw = ProcessAdapter.recv_draw(self._active)
481 proc, is_last, draw, tuning, stats, warns = draw
482 self._total_draws += 1
~/anaconda3/lib/python3.7/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
358 else:
359 error = RuntimeError("Chain %s failed." % proc.chain)
--> 360 raise error from old_error
361 elif msg[0] == "writing_done":
362 proc._readable = True
I think I have correctly given prior and everything, but I am not sure what is wrong here. Also, is matrix normal a good prior on beta when performing Bayesian multivariate regression? I feel like I have overcomplicated things.