# Vectorized autoregressive model

Hey,

i currently try to reproduce the hierarchical model from Chapados, Nicolas. “Effective Bayesian modeling of groups of related count time series.” arXiv preprint arXiv:1405.3738 (2014)..

In the paper they show an approximate inference method but also state, that they where able to get the posteriors by using Stan. I wanted to achieve the same with PyMC3. Right now I have the following model.

with pm.Model() as ar_model_hier:
# Global Hyperpriors
alpha = pm.Uniform("alpha", lower=0.001, upper=0.1)
kappa_tau = pm.Uniform("kappa_tau", lower=5, upper=10)
beta_tau = pm.Uniform("beta_tau", lower=2, upper=25)
mu_mu = pm.Normal("mu_mu", mu=0, sd=2**2)
tau_mu = pm.Uniform("tau_mu", lower=1, upper=10)
phi_plus = pm.Uniform("phi_plus", lower=1, upper=600)
phi_minus = pm.Uniform("phi_minus", lower=1, upper=50)

# AR part of the model
mu_l = pm.Normal("mu_l", mu_mu, 1. / tau_mu, shape=Y_hat.shape)
phi_l = pm.Beta("phi_l", phi_plus + phi_minus, phi_minus, shape=Y_hat.shape)
c_l = mu_l * (1 - phi_l)
tau_l = pm.Gamma("tau_l", kappa_tau, beta_tau, shape=Y_hat.shape)

# Binomial part of the model
alpha_l = pm.Exponential("alpha_l", alpha, shape=Y_hat.shape)

for i in range(Y_hat.shape):
y_hat = Y_hat[:, i]
eta_l = pm.AR("eta_%d" % i, rho=[c_l[i], phi_l[i]], tau=tau_l[i], constant=True, shape=len(y_hat))
y_t_l = pm.NegativeBinomial("y_t_%d" % i, tt.exp(eta_l), alpha_l[i], observed=y_hat)


Where Y_hat is a matrix, with observed counts in rows, and different series in columns. The above model works great as long as I just have ~ 10 columns. The datasets from the paper actually have > 1000 column/series. So currently I also tried to get rid of the for loop, for the autoregressive model, but was not successful so far.

## Are there any examples for a vectorized autoregressive model?

Cheers.
Joachim

Unfortunately the current implementation of pm.AR does not allow multidimensional rho:

A bit of rewrite of the pm.AR the model seems to run:

import theano.tensor as tt
from theano import scan
from pymc3.distributions.distribution import Continuous
from pymc3.distributions.continuous import get_tau_sd, Normal, Flat

class AR2d(Continuous):
R"""
Autoregressive process with p lags.
.. math::
x_t = \rho_0 + \rho_1 x_{t-1} + \ldots + \rho_p x_{t-p} + \epsilon_t,
\epsilon_t \sim N(0,\sigma^2)
The innovation can be parameterized either in terms of precision
or standard deviation. The link between the two parametrizations is
given by
.. math::
\tau = \dfrac{1}{\sigma^2}
Parameters
----------
rho : tensor
Vector of autoregressive coefficients.
sd : float
Standard deviation of innovation (sd > 0). (only required if tau is not specified)
tau : float
Precision of innovation (tau > 0). (only required if sd is not specified)
constant: bool (optional, default = False)
Whether to include a constant.
init : distribution
distribution for initial values (Defaults to Flat())
"""

def __init__(self, rho, sd=None, tau=None,
constant=False, init=Flat.dist(),
*args, **kwargs):

super(AR2d, self).__init__(*args, **kwargs)
tau, sd = get_tau_sd(tau=tau, sd=sd)
self.sd = tt.as_tensor_variable(sd)
self.tau = tt.as_tensor_variable(tau)

self.mean = tt.as_tensor_variable(0.)

rho = tt.as_tensor_variable(rho, ndim=2)
if constant:
self.p = rho.shape - 1
else:
self.p = rho.shape

self.constant = constant
self.rho = rho
self.init = init

def logp(self, value):
if self.constant:
results, _ = scan(lambda l_, obs, p, rho: rho[l_ - 1] * obs[p - l_:-l_],
outputs_info=None, sequences=[tt.arange(1, self.p + 1)],
non_sequences=[value, self.p, self.rho[1:]])
x = tt.sum(results, axis=0)
eps = value[self.p:] - self.rho - x
else:
results, _ = scan(lambda l_, obs, p, rho: rho[l_ - 1] * obs[p - l_:-l_],
outputs_info=None, sequences=[tt.arange(1, self.p + 1)],
non_sequences=[value, self.p, self.rho])
x = tt.sum(results, axis=0)
eps = value[self.p:] - x

innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
init_like = self.init.logp(value[:self.p])

return tt.sum(innov_like) + tt.sum(init_like)


You can have a look at this small notebook as a demonstration:

However, the implementation doesnt seems to improve the speed a lot, as you can see from the notebook.

Actually, the scan part seems unnecessary, try this:

import theano.tensor as tt
from theano import scan
from pymc3.distributions.distribution import Continuous
from pymc3.distributions.continuous import get_tau_sd, Normal, Flat

class AR(Continuous):
R"""
Autoregressive process with p lags.
.. math::
x_t = \rho_0 + \rho_1 x_{t-1} + \ldots + \rho_p x_{t-p} + \epsilon_t,
\epsilon_t \sim N(0,\sigma^2)
The innovation can be parameterized either in terms of precision
or standard deviation. The link between the two parametrizations is
given by
.. math::
\tau = \dfrac{1}{\sigma^2}
Parameters
----------
rho : tensor
Vector of autoregressive coefficients.
sd : float
Standard deviation of innovation (sd > 0). (only required if tau is not specified)
tau : float
Precision of innovation (tau > 0). (only required if sd is not specified)
constant: bool (optional, default = False)
Whether to include a constant.
init : distribution
distribution for initial values (Defaults to Flat())
"""

def __init__(self, rho, sd=None, tau=None,
constant=False, init=Flat.dist(),
*args, **kwargs):

super(AR, self).__init__(*args, **kwargs)
tau, sd = get_tau_sd(tau=tau, sd=sd)
self.sd = tt.as_tensor_variable(sd)
self.tau = tt.as_tensor_variable(tau)

self.mean = tt.as_tensor_variable(0.)

if isinstance(rho, list):
p = len(rho)
else:
try:
p = rho.shape.tag.test_value
except AttributeError:
p = rho.shape

if constant:
self.p = p - 1
else:
self.p = p

self.constant = constant
self.rho = rho = tt.as_tensor_variable(rho)
self.init = init

def logp(self, value):
if self.constant:
x = tt.add(*[self.rho[i+1]*value[self.p-(i+1):-(i+1)] for i in range(self.p)])
eps = value[self.p:] - self.rho - x
else:
x = tt.add(*[self.rho[i]*value[self.p-(i+1):-(i+1)] for i in range(self.p)])
eps = value[self.p:] - x

innov_like = Normal.dist(mu=0.0, tau=self.tau).logp(eps)
init_like = self.init.logp(value[:self.p])

return tt.sum(innov_like) + tt.sum(init_like)


FYI: I submitted a PR so this would be easier to implement: https://github.com/pymc-devs/pymc3/pull/3070

Very, cool. Tried both version. Both work fine. I don’t see a huge performance difference between the version with, or without scan. Thank you very much!

1 Like

I wanted to link a relevant post to this conversation:

It looks like the goal here was to fit AR models for multiple series simultaneously but not necessarily to let each series’ estimate inform the others. I’m hoping there’s a way to to estimate AR models that load on each others’ lagged values. Any suggestions would be appreciated. Thank you.

1 Like