I’ve done some searching on this forum but cannot figure out how to address this issue. Some of the examples I found seem to be specific to pymc3
and/or require use of Numpy masked arrays, which it seems pymc
no longer supports.
I’m using the Multivariate Normal distribution to model the correlations between two observed variables. However, some predictor values have missing data. This missing data is rare (i.e., only one missing value out of every 20 measurements), but does exist. In some cases, the missing data is truly missing. In other cases, it’s right-censored (i.e., it was below the detection limit of the system). For now, I am simply trying to get the following example to run without worrying about the issue of missing vs. censored observations.
Running the code below gives me ValueError: array must not contain infs or NaNs
(full traceback at the bottom). This code runs just fine if I don’t have NaN
values. I’d appreciate suggestions on how to rewrite this code to get it to properly run even in the presence of missing values.
import numpy as np
import pymc as pm
rng = np.random.RandomState(1)
N = 1000
mu_actual = np.array([1.0, -2.0])
sigmas_actual = np.array([0.7, 1.5])
Rho_actual = np.matrix([[1.0, -0.4], [-0.4, 1.0]])
Sigma_actual = np.diag(sigmas_actual) * Rho_actual * np.diag(sigmas_actual)
x = rng.multivariate_normal(mu_actual, Sigma_actual, size=N)
measure_actual = np.array([0.5, 0.75])
y = measure_actual * x + rng.normal(size=x.shape)
# Add missing value into the predictor variable after we have calculted our "toy" outcome variable
x[0, 1] = np.nan
coords = {"axis": ["y", "z"], "axis_bis": ["y", "z"], "obs_id": np.arange(N)}
with pm.Model(coords=coords) as model:
#b_i = pm.Normal('b_intercept', mu=0, sigma=1, shape=2)
b_measure = pm.Normal('b_measure', mu=0, sigma=1, shape=2)
#mu = b_i + b_measure * x
mu = b_measure * x
chol, corr, stds = pm.LKJCholeskyCov(
"chol", n=2, eta=2.0, sd_dist=pm.Exponential.dist(1.0, shape=2)
)
cov = pm.Deterministic("cov", chol.dot(chol.T), dims=("axis", "axis_bis"))
obs = pm.MvNormal("obs", mu, chol=chol, observed=y, dims=("obs_id", "axis"))
trace = pm.sample(nuts_sampler='numpyro')
Error traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
File /usr/local/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
968 try:
969 outputs = (
--> 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
File /usr/local/lib/python3.11/site-packages/pytensor/graph/op.py:518, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:
File /usr/local/lib/python3.11/site-packages/pytensor/tensor/slinalg.py:293, in SolveTriangular.perform(self, node, inputs, outputs)
292 A, b = inputs
--> 293 outputs[0][0] = scipy.linalg.solve_triangular(
294 A,
295 b,
296 lower=self.lower,
297 trans=self.trans,
298 unit_diagonal=self.unit_diagonal,
299 check_finite=self.check_finite,
300 )
File /usr/local/lib/python3.11/site-packages/scipy/linalg/_basic.py:335, in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, check_finite)
334 a1 = _asarray_validated(a, check_finite=check_finite)
--> 335 b1 = _asarray_validated(b, check_finite=check_finite)
336 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
File /usr/local/lib/python3.11/site-packages/scipy/_lib/_util.py:240, in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
239 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 240 a = toarray(a)
241 if not objects_ok:
File /usr/local/lib/python3.11/site-packages/numpy/lib/function_base.py:630, in asarray_chkfinite(a, dtype, order)
629 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 630 raise ValueError(
631 "array must not contain infs or NaNs")
632 return a
ValueError: array must not contain infs or NaNs
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[2], line 34
31 cov = pm.Deterministic("cov", chol.dot(chol.T), dims=("axis", "axis_bis"))
32 obs = pm.MvNormal("obs", mu, chol=chol, observed=y, dims=("obs_id", "axis"))
---> 34 trace = pm.sample()
File /usr/local/lib/python3.11/site-packages/pymc/sampling/mcmc.py:718, in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
716 [kwargs.setdefault(k, v) for k, v in nuts_kwargs.items()]
717 _log.info("Auto-assigning NUTS sampler...")
--> 718 initial_points, step = init_nuts(
719 init=init,
720 chains=chains,
721 n_init=n_init,
722 model=model,
723 random_seed=random_seed_list,
724 progressbar=progressbar,
725 jitter_max_retries=jitter_max_retries,
726 tune=tune,
727 initvals=initvals,
728 **kwargs,
729 )
731 if initial_points is None:
732 # Time to draw/evaluate numeric start points for each chain.
733 ipfns = make_initial_point_fns_per_chain(
734 model=model,
735 overrides=initvals,
736 jitter_rvs=set(),
737 chains=chains,
738 )
File /usr/local/lib/python3.11/site-packages/pymc/sampling/mcmc.py:1363, in init_nuts(init, chains, n_init, model, random_seed, progressbar, jitter_max_retries, tune, initvals, **kwargs)
1356 _log.info(f"Initializing NUTS using {init}...")
1358 cb = [
1359 pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="absolute"),
1360 pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"),
1361 ]
-> 1363 initial_points = _init_jitter(
1364 model,
1365 initvals,
1366 seeds=random_seed_list,
1367 jitter="jitter" in init,
1368 jitter_max_retries=jitter_max_retries,
1369 )
1371 apoints = [DictToArrayBijection.map(point) for point in initial_points]
1372 apoints_data = [apoint.data for apoint in apoints]
File /usr/local/lib/python3.11/site-packages/pymc/sampling/mcmc.py:1257, in _init_jitter(model, initvals, seeds, jitter, jitter_max_retries)
1255 if i < jitter_max_retries:
1256 try:
-> 1257 model.check_start_vals(point)
1258 except SamplingError:
1259 # Retry with a new seed
1260 seed = rng.randint(2**30, dtype=np.int64)
File /usr/local/lib/python3.11/site-packages/pymc/model/core.py:1657, in Model.check_start_vals(self, start)
1651 valid_keys = ", ".join(value_names_set)
1652 raise KeyError(
1653 "Some start parameters do not appear in the model!\n"
1654 f"Valid keys are: {valid_keys}, but {extra_keys} was supplied"
1655 )
-> 1657 initial_eval = self.point_logps(point=elem)
1659 if not all(np.isfinite(v) for v in initial_eval.values()):
1660 raise SamplingError(
1661 "Initial evaluation of model at starting point failed!\n"
1662 f"Starting values:\n{elem}\n\n"
1663 f"Logp initial evaluation results:\n{initial_eval}\n"
1664 "You can call `model.debug()` for more details."
1665 )
File /usr/local/lib/python3.11/site-packages/pymc/model/core.py:1692, in Model.point_logps(self, point, round_vals)
1686 factors = self.basic_RVs + self.potentials
1687 factor_logps_fn = [pt.sum(factor) for factor in self.logp(factors, sum=False)]
1688 return {
1689 factor.name: np.round(np.asarray(factor_logp), round_vals)
1690 for factor, factor_logp in zip(
1691 factors,
-> 1692 self.compile_fn(factor_logps_fn)(point),
1693 )
1694 }
File /usr/local/lib/python3.11/site-packages/pymc/pytensorf.py:600, in PointFunc.__call__(self, state)
599 def __call__(self, state):
--> 600 return self.f(**state)
File /usr/local/lib/python3.11/site-packages/pytensor/compile/function/types.py:983, in Function.__call__(self, *args, **kwargs)
981 if hasattr(self.vm, "thunks"):
982 thunk = self.vm.thunks[self.vm.position_of_error]
--> 983 raise_with_op(
984 self.maker.fgraph,
985 node=self.vm.nodes[self.vm.position_of_error],
986 thunk=thunk,
987 storage_map=getattr(self.vm, "storage_map", None),
988 )
989 else:
990 # old-style linkers raise their own exceptions
991 raise
File /usr/local/lib/python3.11/site-packages/pytensor/link/utils.py:531, in raise_with_op(fgraph, node, thunk, exc_info, storage_map)
526 warnings.warn(
527 f"{exc_type} error does not allow us to add an extra error message"
528 )
529 # Some exception need extra parameter in inputs. So forget the
530 # extra long error message in that case.
--> 531 raise exc_value.with_traceback(exc_trace)
File /usr/local/lib/python3.11/site-packages/pytensor/compile/function/types.py:970, in Function.__call__(self, *args, **kwargs)
967 t0_fn = time.perf_counter()
968 try:
969 outputs = (
--> 970 self.vm()
971 if output_subset is None
972 else self.vm(output_subset=output_subset)
973 )
974 except Exception:
975 restore_defaults()
File /usr/local/lib/python3.11/site-packages/pytensor/graph/op.py:518, in Op.make_py_thunk.<locals>.rval(p, i, o, n)
516 @is_thunk_type
517 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 518 r = p(n, [x[0] for x in i], o)
519 for o in node.outputs:
520 compute_map[o][0] = True
File /usr/local/lib/python3.11/site-packages/pytensor/tensor/slinalg.py:293, in SolveTriangular.perform(self, node, inputs, outputs)
291 def perform(self, node, inputs, outputs):
292 A, b = inputs
--> 293 outputs[0][0] = scipy.linalg.solve_triangular(
294 A,
295 b,
296 lower=self.lower,
297 trans=self.trans,
298 unit_diagonal=self.unit_diagonal,
299 check_finite=self.check_finite,
300 )
File /usr/local/lib/python3.11/site-packages/scipy/linalg/_basic.py:335, in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, check_finite)
267 """
268 Solve the equation `a x = b` for `x`, assuming a is a triangular matrix.
269
(...)
331
332 """
334 a1 = _asarray_validated(a, check_finite=check_finite)
--> 335 b1 = _asarray_validated(b, check_finite=check_finite)
336 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
337 raise ValueError('expected square matrix')
File /usr/local/lib/python3.11/site-packages/scipy/_lib/_util.py:240, in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
238 raise ValueError('masked arrays are not supported')
239 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 240 a = toarray(a)
241 if not objects_ok:
242 if a.dtype is np.dtype('O'):
File /usr/local/lib/python3.11/site-packages/numpy/lib/function_base.py:630, in asarray_chkfinite(a, dtype, order)
628 a = asarray(a, dtype=dtype, order=order)
629 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 630 raise ValueError(
631 "array must not contain infs or NaNs")
632 return a
ValueError: array must not contain infs or NaNs
Apply node that caused the error: SolveTriangular{trans=0, unit_diagonal=False, lower=True, check_finite=True, b_ndim=2}(Switch.0, Composite{(i2 - (i0 * i1))}.0)
Toposort index: 34
Inputs types: [TensorType(float64, shape=(None, None)), TensorType(float64, shape=(2, 1000))]
Inputs shapes: [(2, 2), (2, 1000)]
Inputs strides: [(16, 8), (8000, 8)]
Inputs values: [array([[ 0.96788863, 0. ],
[-0.29766344, 2.12449851]]), 'not shown']
Outputs clients: [[Transpose{axes=[1, 0]}(SolveTriangular{trans=0, unit_diagonal=False, lower=True, check_finite=True, b_ndim=2}.0)]]
HINT: Re-running with most PyTensor optimizations disabled could provide a back-trace showing when this node was created. This can be done by setting the PyTensor flag 'optimizer=fast_compile'. If that does not work, PyTensor optimizations can be disabled with 'optimizer=None'.
HINT: Use the PyTensor flag `exception_verbosity=high` for a debug print-out and storage map footprint of this Apply node.