I’m getting the following error in my code:
TypeError: For compute_test_value, one input test value does not have the requested type.
The error when converting the test value to that variable type:
Wrong number of dimensions: expected 0, got 1 with shape (38994,).
Any ideas on how to get around this? Also any feedback on the approach would be appreciated! Borrowing alot of code here from Ritchie Vink’s blog
# Trend
def det_dot(a, b):
"""
The theano dot product and NUTS sampler don't work with large matrices?
:param a: (np matrix)
:param b: (theano vector)
"""
return (a * b[None, :]).sum(axis=-1)
def trend_model(m, t, n_changepoints=25, changepoints_prior_scale=0.05,
growth_prior_scale=5, changepoint_range=0.8):
"""
The piecewise linear trend with changepoint implementation in PyMC3.
:param m: (pm.Model)
:param t: (np.array) MinMax scaled time.
:param n_changepoints: (int) The number of changepoints to model.
:param changepoint_prior_scale: (flt/ None) The scale of the Laplace prior on the delta vector.
If None, a hierarchical prior is set.
:param growth_prior_scale: (flt) The standard deviation of the prior on the growth.
:param changepoint_range: (flt) Proportion of history in which trend changepoints will be estimated.
:return g, A, s: (tt.vector, np.array, tt.vector)
"""
s = np.linspace(0, changepoint_range * np.max(t), n_changepoints + 1)[1:]
# * 1 casts the boolean to integers
A = (t[:, None] > s) * 1
with m:
# initial growth
k = pm.Normal('k', 0 , growth_prior_scale)
if changepoints_prior_scale is None:
changepoints_prior_scale = pm.Exponential('tau', 1.5)
# rate of change
delta = pm.Laplace('delta', 0, changepoints_prior_scale, shape=n_changepoints)
# offset
m = pm.Normal('m', 0, 5)
gamma = -s * delta
g = (k + det_dot(A, delta)) * t + (m + det_dot(A, gamma))
return g, A, s
def det_trend(k, m, delta, t, s, A):
return (k + np.dot(A, delta)) * t + (m + np.dot(A, (-s * delta)))
# Seasonality
def fourier_series(t, p=365.25, n=10):
# 2 pi n / p
x = 2 * np.pi * np.arange(1, n + 1) / p
# 2 pi n / p * t
x = x * t[:, None]
x = np.concatenate((np.cos(x), np.sin(x)), axis=1)
return x
def seasonality_model(m, df, period='yearly', seasonality_prior_scale=10):
if period == 'yearly':
n = 10
# rescale the period, as t is also scaled
p = 365.25 / (df['ds'].max() - df['ds'].min()).days
elif period == 'weekly':
n = 3
# rescale the period, as t is also scaled
p = 7 / (df['ds'].max() - df['ds'].min()).days
else: # daily
n = 4
# rescale the period, as t is also scaled
p = 24 / (df['ds'].max() - df['ds'].min()).days
x = fourier_series(df['t'], p, n)
with m:
beta = pm.Normal(f'beta_{period}', mu=0, sd=seasonality_prior_scale, shape=2 * n)
return x, beta
import numpy as np
import pandas as pd
import scipy.stats as stats
import pymc3 as pm
def sim_data():
ds = pd.date_range('2018-06-01 00:00:00', '2020-08-21 08:30:00',freq='1800S')
monthly = dict(zip(range(12), [1.2, 0.4, 0.9, 1.25, 0.75, -0.5, 0.4, 0.1,-1.5, -1, -0.5, 1.25]))
daily = dict(zip(range(7), [0.7, 1, 0.85, 0.1, -0.5, -1.35, -1]))
hourly = dict(zip(range(24), [-2,-3.35,-4,-4.3, -5,-4.75,-4,-3.15,-1.3,0.35,2.2,3.4,3.65,3.8,3.9,3.85,3.2,2.2,1.6,1.45,1.55,1.65,0.75,-0.4]))
df = pd.DataFrame({'ds':ds, 'month':ds.month - 1, 'day':ds.dayofweek, 'hour':ds.hour })
seasonality = (df.month.map(monthly) + df.day.map(daily) + df.hour.map(hourly))
lam = 10
scale = 1/6
df['y'] = stats.nbinom(lam+seasonality, 1/6).rvs(len(seasonality))
return df
df1 = sim_data()
df1['t'] = (df1['ds'] - df1['ds'].min()) / (df1['ds'].max() - df1['ds'].min())
with pm.Model() as m2:
a = pm.Normal('a', 5, 1.5)
b = pm.Beta('b', 16, 84)
g, A, s = trend_model(m2, df1['t'], changepoints_prior_scale=None)
x_monthly, beta_monthly = seasonality_model(m2, df1, 'yearly')
x_daily, beta_daily = seasonality_model(m2, df1, 'weekly')
x_hourly, beta_hourly = seasonality_model(m2, df1, 'daily')
y = pm.Deterministic('y', g + det_dot(x_monthly, beta_monthly) + det_dot(x_daily, beta_daily) + det_dot(x_hourly, beta_hourly))
sigma = pm.HalfCauchy('sigma', 0.5)
seasonality = pm.Normal('seasonality', mu=y, sd=sigma)
lam = pm.Gamma('lam', a, b)
obs = pm.Poisson('obs', lam + seasonality, observed=df1.y)
trace = pm.sample(1000)