# Implementing a manual AR1 model

Hi,

I am new to `pymc` and I am trying to understand how to build time series models. I am starting with an AR1 model where I am trying to replicate what the already available `AR` distribution in `pymc` gives us. This is to help me understand how to use `scan` to iterate through a time series and how I am supposed to structure the building blocks.

I had a look online at some examples and at the source code for the `AR` distribution but I can’t make it work. I am constantly fighting with the dimensions where the observed variable does not have the same shape as the output of my `scan` function. I would appreciate if someone could fix my code below or point me in the right direction.

Below there are 3 models:

1. The built-in AR Model
2. An ARMA model I found in the examples
3. My manual AR1 model which is not working
``````import os
import sys

import numpy as np
import arviz as az
import pymc as pm
import pytensor
import pytensor.tensor as pt

import matplotlib.pyplot as plt
%matplotlib inline

RANDOM_SEED = 123456
np.random.seed(RANDOM_SEED)
rng = np.random.default_rng(RANDOM_SEED)
sample_size = 100  # - small so it samples fast

def simulate_ar(intercept, coef1, coef2, noise=0.3, *, warmup=10, steps=200):
# We sample some extra warmup steps, to let the AR process stabilize
draws = np.zeros(warmup + steps)
# Initialize first draws at intercept
draws[:2] = intercept
for step in range(2, warmup + steps):
draws[step] = (
intercept
+ coef1 * draws[step - 1]
+ coef2 * draws[step - 2]
+ np.random.normal(0, noise)
)
return draws[warmup:]

# True parameters of the AR process
ar1_data = simulate_ar(10, -0.9, 0, steps = 200 * 2)

fig, ax = plt.subplots(figsize=(10, 3))
ax.set_title("Generated Autoregressive Timeseries", fontsize=15)
ax.plot(ar1_data)

# - https://www.pymc.io/projects/examples/en/latest/time_series/AR.html
# - https://www.pymc.io/projects/examples/en/latest/time_series/Forecasting_with_structural_timeseries.html

with pm.Model() as ar1:

y = pm.MutableData("y", ar1_data, dims="obs_id")
rho = pm.Normal("rho", mu=0.0, sigma=1.0, shape=2)
sigma = pm.HalfNormal("sigma", 5)
init = pm.Normal.dist(0, 10)

ar = pm.AR(
"ar",
rho=rho,
sigma = sigma,
constant=True,
init_dist = init,
steps = y.shape - rho.shape + 1
)

outcome = pm.Normal("likelihood", mu=ar, sigma=sigma, observed=y, dims="obs_id")
idata_ar = pm.sample_prior_predictive()
idata_ar = pm.sample(sample_size, tune=2000, random_seed=RANDOM_SEED)
idata_ar.extend(pm.sample_posterior_predictive(idata_ar))

# https://github.com/pymc-devs/pymc-examples/blob/main/examples/time_series/arma_example.py

with pm.Model() as arma_model:
sigma = pm.HalfNormal("sigma", 5.0)
theta = pm.Normal("theta", 0.0, sigma=1.0)
phi = pm.Normal("phi", 0.0, sigma=2.0)
mu = pm.Normal("mu", 0.0, sigma=10.0)

err0 = y - (mu + phi * mu)

def calc_next(last_y, this_y, err, mu, phi, theta):
nu_t = mu + phi * last_y + theta * err
return this_y - nu_t

err, _ = pytensor.scan(
fn=calc_next,
sequences=dict(input=y, taps=[-1, 0]),
outputs_info=[err0],
non_sequences=[mu, phi, theta],
)

pm.Potential("like", pm.logp(pm.Normal.dist(0, sigma=sigma), err))
example = pm.sample(sample_size, tune = 100, random_seed = RANDOM_SEED)

# --- this one does not work
# --- trying to replicate the original implementation without all the complexity
# https://github.com/pymc-devs/pymc/blob/main/pymc/distributions/timeseries.py

with pm.Model() as ar_manual:

# assumes 95% of prob mass is between -2 and 2
rho = pm.Normal("rho", mu=0.0, sigma=1.0, shape=2)

# precision of the innovation term
sigma = pm.HalfNormal("sigma", 5)

y = pm.MutableData("y", ar1_data, dims="obs_id")
init = pm.Normal.dist(0, 10, size = 1)

def calc_next(last_y, rho):
nu_t = rho + rho * last_y
return nu_t

upd, _ = pytensor.scan(
fn=calc_next,
outputs_info=dict(initial = init.type(), taps = [-1]),
non_sequences=rho,
n_steps = y.shape - rho.shape + 1,
strict = True
)

# --- Need to concat the initial value but the shapes don't match
# upd = pt.concatenate([init, upd], axis = -1)

ar = pm.Normal("ar", mu = upd, sigma = sigma)
outcome = pm.Normal("likelihood", mu=ar, sigma=sigma, observed=y, dims="obs_id")

idata_ar = pm.sample_prior_predictive()
idata_ar = pm.sample(sample_size, tune=2000, random_seed=RANDOM_SEED)
idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
``````

Thanks

1 Like

In the newer versions of pymc you can define such models generatively with CustomDist:

1 Like

The concatenate fails because you need to add a dummy batch index to `init`. `pt.concatenate([init[None], upd], axis=0])` should work.

What you’re implementing is quite close to the OLS formulation of AR, which would look something like this:

``````with pm.Model(coords={'params':['intercept', 'L1.data']}) as ar1:
y = pm.MutableData("y", ar1_data[:-1], dims="obs_id")
X = pm.MutableData('X', np.c_[np.ones_like(ar1_data[1:]), ar1_data[1:]], dims=[None, 'params'])
rhos = pm.Normal('rhos', dims=['params'])

mu = pm.Deterministic('mu', X @ rhos)
sigma = pm.Exponential('sigma', 1)
y_hat = pm.Normal('y_hat', mu=mu, sigma=sigma, observed=y)
idata = pm.sample()
``````

The problem with trying to do this via a scan is that, without any innovations inside the scan, it’s just going to converge to the steady state (assuming values of rho that induce stationarity). Here’s a demonstration:

``````n_lags = 1
coords = {'params': ['intercept'] + [f'L{i+1}' for i in range(n_lags)]}
with pm.Model(coords=coords) as ar1:
y = pm.MutableData("y", ar1_data, dims="obs_id")
rhos = pm.Normal('rhos', sigma=[1, 0.25], dims=['params'])
init = pm.Normal('y_init', 0, 10)

def ar_step(mu_t, rho):
mu_tp1 = rho + rho * mu_t
return mu_tp1

mu, _ = pytensor.scan(
ar_step,
outputs_info=[init],
non_sequences=rhos,
n_steps = y.shape - n_lags,
strict = True
)

mu = pm.Deterministic('mu', pt.concatenate([init[None], mu], axis=0))

# Analytic steady-state for AR(1) with non-zero constant
sigma = pm.Exponential('sigma', 1)
y_hat = pm.Normal('y_hat', mu=mu, sigma=sigma, observed=y)
prior_idata = pm.sample_prior_predictive()
``````

You can inspect the prior draws for `mu` and `mu_steady` to check I’m not lying to you:

``````prior = az.extract(prior_idata, 'prior')
idx = np.random.choice(500)
sample = prior.isel(sample=idx)

fig, ax = plt.subplots(figsize=(14,4))
ax.plot(sample.mu)
``````

This is why you need to follow the code @ricardoV94 linked when you do an AR model “by hand” with scan. You need to put a prior over the entire AR trajectory, which is not the same thing as running a deterministic AR simulation, then using that as the mean of an i.i.d normal process. What would be equivalent would be to scan over the data, and essentially make a sequence of 1-step forecasts, and pass that to normal:

``````n_lags = 1
coords = {'params': ['intercept'] + [f'L{i+1}' for i in range(n_lags)]}
with pm.Model(coords=coords) as ar1:
y = pm.MutableData("y", ar1_data, dims="obs_id")
rhos = pm.Normal('rhos', sigma=[1, 0.25], dims=['params'])
init = pm.Normal('y_init', 0, 10)

def ar_step(mu_t, rho):
mu_tp1 = rho + rho * mu_t
return mu_tp1

mu, _ = pytensor.scan(
ar_step,
sequences=dict(input=y, taps=[-1]),
non_sequences=rhos,
n_steps = y.shape - n_lags,
strict = True
)

mu = pm.Deterministic('mu', pt.concatenate([init[None], mu], axis=0))
The moral of the story is that pymc-examples is desperately in need of a “how to write a recursive model” notebook. Help wanted 