Hi,
In this case, I have 4 kernels, one for long term trend, one for short term trend, one for seasonal behavior and 1 for noise. I need to regularize the effect of the short term trend, otherwise, it overfits pretty quickly and the predictions go through the roof. The length scale parameters have prior distributions defined accordingly to the article from Michael Betancourt, but he doesn’t touch much on the amplitudes.
When trying to regularize the hyperparameters of a GP I often get the following error:
trace
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/compile/function/types.py in __call__(self, *args, **kwargs)
972 outputs = (
--> 973 self.fn()
974 if output_subset is None
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gof/op.py in rval(p, i, o, n)
912 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 913 r = p(n, [x[0] for x in i], o)
914 for o in node.outputs:
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/tensor/slinalg.py in perform(self, node, inputs, output_storage)
251 elif self.A_structure == "upper_triangular":
--> 252 rval = scipy.linalg.solve_triangular(A, b, lower=False)
253 else:
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/linalg/basic.py in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, debug, check_finite)
333 a1 = _asarray_validated(a, check_finite=check_finite)
--> 334 b1 = _asarray_validated(b, check_finite=check_finite)
335 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
271 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 272 a = toarray(a)
273 if not objects_ok:
~/anaconda3/envs/phd/lib/python3.8/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order)
484 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 485 raise ValueError(
486 "array must not contain infs or NaNs")
ValueError: array must not contain infs or NaNs
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-139-25fc0a8509e1> in <module>
40 mu=tt.exp(f),
41 observed=groups['train']['data'].reshape(groups['train']['s'], groups['train']['n']).T[:,i])
---> 42 mp = pm.find_MAP(maxeval=5000, progressbar = False)
43
44 pred_samples_fit = pm.sample_posterior_predictive([mp],
~/anaconda3/envs/phd/lib/python3.8/site-packages/pymc3/tuning/starting.py in find_MAP(start, vars, method, return_raw, include_transformed, progressbar, maxeval, model, *args, **kwargs)
163
164 try:
--> 165 opt_result = minimize(
166 cost_func, x0, method=method, jac=compute_gradient, *args, **kwargs
167 )
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/_minimize.py in minimize(fun, x0, args, method, jac, hess, hessp, bounds, constraints, tol, callback, options)
615 **options)
616 elif meth == 'l-bfgs-b':
--> 617 return _minimize_lbfgsb(fun, x0, args, jac, bounds,
618 callback=callback, **options)
619 elif meth == 'tnc':
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/lbfgsb.py in _minimize_lbfgsb(fun, x0, args, jac, bounds, disp, maxcor, ftol, gtol, eps, maxfun, maxiter, iprint, callback, maxls, finite_diff_rel_step, **unknown_options)
358 # until the completion of the current minimization iteration.
359 # Overwrite f and g:
--> 360 f, g = func_and_grad(x)
361 elif task_str.startswith(b'NEW_X'):
362 # new iteration
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in fun_and_grad(self, x)
198 if not np.array_equal(x, self.x):
199 self._update_x_impl(x)
--> 200 self._update_fun()
201 self._update_grad()
202 return self.f, self.g
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in _update_fun(self)
164 def _update_fun(self):
165 if not self.f_updated:
--> 166 self._update_fun_impl()
167 self.f_updated = True
168
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in update_fun()
71
72 def update_fun():
---> 73 self.f = fun_wrapped(self.x)
74
75 self._update_fun_impl = update_fun
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/_differentiable_functions.py in fun_wrapped(x)
68 def fun_wrapped(x):
69 self.nfev += 1
---> 70 return fun(x, *args)
71
72 def update_fun():
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/optimize.py in __call__(self, x, *args)
72 def __call__(self, x, *args):
73 """ returns the the function value """
---> 74 self._compute_if_needed(x, *args)
75 return self._value
76
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/optimize/optimize.py in _compute_if_needed(self, x, *args)
66 if not np.all(x == self.x) or self._value is None or self.jac is None:
67 self.x = np.asarray(x).copy()
---> 68 fg = self.fun(x, *args)
69 self.jac = fg[1]
70 self._value = fg[0]
~/anaconda3/envs/phd/lib/python3.8/site-packages/pymc3/tuning/starting.py in __call__(self, x)
227 value = -1.0 * nan_to_high(neg_value)
228 if self.use_gradient:
--> 229 neg_grad = self.dlogp_func(pm.floatX(x))
230 if np.all(np.isfinite(neg_grad)):
231 self.previous_x = x
~/anaconda3/envs/phd/lib/python3.8/site-packages/pymc3/blocking.py in __call__(self, x)
270
271 def __call__(self, x):
--> 272 return self.fa(self.fb(x))
~/anaconda3/envs/phd/lib/python3.8/site-packages/pymc3/model.py in __call__(self, state)
1537
1538 def __call__(self, state):
-> 1539 return self.f(**state)
1540
1541
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/compile/function/types.py in __call__(self, *args, **kwargs)
984 if hasattr(self.fn, "thunks"):
985 thunk = self.fn.thunks[self.fn.position_of_error]
--> 986 gof.link.raise_with_op(
987 node=self.fn.nodes[self.fn.position_of_error],
988 thunk=thunk,
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
341 # Some exception need extra parameter in inputs. So forget the
342 # extra long error message in that case.
--> 343 raise exc_value.with_traceback(exc_trace)
344
345
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/compile/function/types.py in __call__(self, *args, **kwargs)
971 try:
972 outputs = (
--> 973 self.fn()
974 if output_subset is None
975 else self.fn(output_subset=output_subset)
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gof/op.py in rval(p, i, o, n)
911 # default arguments are stored in the closure of `rval`
912 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 913 r = p(n, [x[0] for x in i], o)
914 for o in node.outputs:
915 compute_map[o][0] = True
~/anaconda3/envs/phd/lib/python3.8/site-packages/theano/tensor/slinalg.py in perform(self, node, inputs, output_storage)
250 rval = scipy.linalg.solve_triangular(A, b, lower=True)
251 elif self.A_structure == "upper_triangular":
--> 252 rval = scipy.linalg.solve_triangular(A, b, lower=False)
253 else:
254 rval = scipy.linalg.solve(A, b)
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/linalg/basic.py in solve_triangular(a, b, trans, lower, unit_diagonal, overwrite_b, debug, check_finite)
332
333 a1 = _asarray_validated(a, check_finite=check_finite)
--> 334 b1 = _asarray_validated(b, check_finite=check_finite)
335 if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
336 raise ValueError('expected square matrix')
~/anaconda3/envs/phd/lib/python3.8/site-packages/scipy/_lib/_util.py in _asarray_validated(a, check_finite, sparse_ok, objects_ok, mask_ok, as_inexact)
270 raise ValueError('masked arrays are not supported')
271 toarray = np.asarray_chkfinite if check_finite else np.asarray
--> 272 a = toarray(a)
273 if not objects_ok:
274 if a.dtype is np.dtype('O'):
~/anaconda3/envs/phd/lib/python3.8/site-packages/numpy/lib/function_base.py in asarray_chkfinite(a, dtype, order)
483 a = asarray(a, dtype=dtype, order=order)
484 if a.dtype.char in typecodes['AllFloat'] and not np.isfinite(a).all():
--> 485 raise ValueError(
486 "array must not contain infs or NaNs")
487 return a
ValueError: array must not contain infs or NaNs
Apply node that caused the error: Solve{A_structure='upper_triangular', lower=False, overwrite_A=False, overwrite_b=False}(InplaceDimShuffle{1,0}.0, Elemwise{Composite{((i0 * i1) - i2)}}[(0, 0)].0)
Toposort index: 72
Inputs types: [TensorType(float64, matrix), TensorType(float64, matrix)]
Inputs shapes: [(220, 220), (220, 220)]
Inputs strides: [(1760, 8), (1760, 8)]
Inputs values: ['not shown', 'not shown']
Outputs clients: [[InplaceDimShuffle{1,0}(Solve{A_structure='upper_triangular', lower=False, overwrite_A=False, overwrite_b=False}.0)]]
Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1413, in access_grad_cache
term = access_term_cache(node)[idx]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1064, in access_term_cache
output_grads = [access_grad_cache(var) for var in node.outputs]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1064, in <listcomp>
output_grads = [access_grad_cache(var) for var in node.outputs]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1413, in access_grad_cache
term = access_term_cache(node)[idx]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1064, in access_term_cache
output_grads = [access_grad_cache(var) for var in node.outputs]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1064, in <listcomp>
output_grads = [access_grad_cache(var) for var in node.outputs]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1413, in access_grad_cache
term = access_term_cache(node)[idx]
File "/home/mach1ne/anaconda3/envs/phd/lib/python3.8/site-packages/theano/gradient.py", line 1223, in access_term_cache
input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
Here you can find some toy data:
data
y = np.asarray([936., 140., 118., 202., 122., 96., 124., 85., 8., 79., 82.,
282., 455., 219., 103., 124., 132., 191., 99., 78., 108., 113.,
140., 145., 574., 54., 52., 343., 66., 169., 174., 88., 121.,
140., 77., 251., 445., 220., 108., 343., 78., 54., 78., 80.,
68., 268., 185., 128., 248., 286., 85., 134., 84., 76., 67.,
72., 57., 158., 199., 238., 592., 175., 216., 298., 74., 68.,
148., 49., 82., 150., 125., 161., 579., 231., 79., 308., 164.,
68., 130., 104., 46., 93., 105., 116., 539., 100., 114., 129.,
50., 94., 56., 128., 93., 168., 92., 128., 792., 195., 89.,
246., 53., 147., 157., 86., 82., 213., 84., 145., 827., 208.,
114., 175., 62., 65., 118., 74., 92., 203., 138., 163., 715.,
166., 166., 133., 88., 65., 57., 123., 132., 121., 149., 171.,
726., 220., 71., 173., 24., 67., 127., 90., 82., 159., 113.,
187., 510., 131., 101., 189., 80., 82., 115., 99., 71., 120.,
121., 122., 615., 106., 88., 156., 91., 76., 140., 64., 84.,
147., 88., 165., 639., 204., 93., 103., 96., 82., 111., 119.,
109., 220., 94., 138., 514., 141., 91., 303., 103., 163., 116.,
77., 122., 87., 104., 205., 658., 94., 128., 126., 39., 75.,
71., 34., 66., 175., 89., 155., 831., 95., 210., 130., 31.,
95., 59., 95., 111., 136., 146., 184., 510., 101., 210., 138.])
Model:
X = np.arange(220).reshape(-1,1)
with pm.Model() as model:
l_t = pm.InverseGamma('l_t', 4, 220)
l_ts = pm.Gamma('l_ts', 5, 1)
l_p = pm.InverseGamma('l_p', 4, 220)
η_trend = pm.HalfNormal('η_trend', 1)
η_per = pm.HalfNormal('η_per', 1.5)
η_ts = pm.HalfNormal('η_ts',0.15)
σ = pm.HalfNormal("σ", sigma=0.05)
a = pm.Normal('a',
0.0,
5)
# cov function for the GP
cov = (η_trend**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=l_t)
+ η_ts**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=l_ts)
+ η_per**2 * pm.gp.cov.Periodic(1, period=12, ls=l_p)
+ pm.gp.cov.WhiteNoise(σ))
gp = pm.gp.Latent(mean_func=pm.gp.mean.Constant(a), cov_func=cov)
f = gp.prior('f', X=X, reparameterize=True)
y_pred = pm.Poisson('y_pred', mu=tt.exp(f), observed=y)
#prior_checks = pm.sample_prior_predictive(200)
mp = pm.find_MAP(maxeval=5000, progressbar = True)
I understand that it is due to the fact that I am using a prior distribution with much mass near zero. Is there any other way to achieve this regularizing effect? What exactly is happening that triggers the error, is there any way to debug it a little deeper?
Tks,
Luis