Hi,
I am new to pymc
and I am trying to understand how to build time series models. I am starting with an AR1 model where I am trying to replicate what the already available AR
distribution in pymc
gives us. This is to help me understand how to use scan
to iterate through a time series and how I am supposed to structure the building blocks.
I had a look online at some examples and at the source code for the AR
distribution but I can’t make it work. I am constantly fighting with the dimensions where the observed variable does not have the same shape as the output of my scan
function. I would appreciate if someone could fix my code below or point me in the right direction.
Below there are 3 models:
- The built-in AR Model
- An ARMA model I found in the examples
- My manual AR1 model which is not working
import os
import sys
import numpy as np
import arviz as az
import pymc as pm
import pytensor
import pytensor.tensor as pt
import matplotlib.pyplot as plt
%matplotlib inline
RANDOM_SEED = 123456
np.random.seed(RANDOM_SEED)
rng = np.random.default_rng(RANDOM_SEED)
sample_size = 100 # - small so it samples fast
def simulate_ar(intercept, coef1, coef2, noise=0.3, *, warmup=10, steps=200):
# We sample some extra warmup steps, to let the AR process stabilize
draws = np.zeros(warmup + steps)
# Initialize first draws at intercept
draws[:2] = intercept
for step in range(2, warmup + steps):
draws[step] = (
intercept
+ coef1 * draws[step - 1]
+ coef2 * draws[step - 2]
+ np.random.normal(0, noise)
)
# Discard the warmup draws
return draws[warmup:]
# True parameters of the AR process
ar1_data = simulate_ar(10, -0.9, 0, steps = 200 * 2)
fig, ax = plt.subplots(figsize=(10, 3))
ax.set_title("Generated Autoregressive Timeseries", fontsize=15)
ax.plot(ar1_data)
# Using links as reference
# - https://www.pymc.io/projects/examples/en/latest/time_series/AR.html
# - https://www.pymc.io/projects/examples/en/latest/time_series/Forecasting_with_structural_timeseries.html
with pm.Model() as ar1:
y = pm.MutableData("y", ar1_data, dims="obs_id")
rho = pm.Normal("rho", mu=0.0, sigma=1.0, shape=2)
sigma = pm.HalfNormal("sigma", 5)
init = pm.Normal.dist(0, 10)
ar = pm.AR(
"ar",
rho=rho,
sigma = sigma,
constant=True,
init_dist = init,
steps = y.shape[0] - rho.shape[0] + 1
)
outcome = pm.Normal("likelihood", mu=ar, sigma=sigma, observed=y, dims="obs_id")
idata_ar = pm.sample_prior_predictive()
idata_ar = pm.sample(sample_size, tune=2000, random_seed=RANDOM_SEED)
idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
# Using link as reference
# https://github.com/pymc-devs/pymc-examples/blob/main/examples/time_series/arma_example.py
with pm.Model() as arma_model:
sigma = pm.HalfNormal("sigma", 5.0)
theta = pm.Normal("theta", 0.0, sigma=1.0)
phi = pm.Normal("phi", 0.0, sigma=2.0)
mu = pm.Normal("mu", 0.0, sigma=10.0)
err0 = y[0] - (mu + phi * mu)
def calc_next(last_y, this_y, err, mu, phi, theta):
nu_t = mu + phi * last_y + theta * err
return this_y - nu_t
err, _ = pytensor.scan(
fn=calc_next,
sequences=dict(input=y, taps=[-1, 0]),
outputs_info=[err0],
non_sequences=[mu, phi, theta],
)
pm.Potential("like", pm.logp(pm.Normal.dist(0, sigma=sigma), err))
example = pm.sample(sample_size, tune = 100, random_seed = RANDOM_SEED)
# --- this one does not work
# --- trying to replicate the original implementation without all the complexity
# https://github.com/pymc-devs/pymc/blob/main/pymc/distributions/timeseries.py
with pm.Model() as ar_manual:
# assumes 95% of prob mass is between -2 and 2
rho = pm.Normal("rho", mu=0.0, sigma=1.0, shape=2)
# precision of the innovation term
sigma = pm.HalfNormal("sigma", 5)
y = pm.MutableData("y", ar1_data, dims="obs_id")
init = pm.Normal.dist(0, 10, size = 1)
def calc_next(last_y, rho):
nu_t = rho[0] + rho[1] * last_y
return nu_t
upd, _ = pytensor.scan(
fn=calc_next,
outputs_info=dict(initial = init.type(), taps = [-1]),
non_sequences=rho,
n_steps = y.shape[0] - rho.shape[0] + 1,
strict = True
)
# --- Need to concat the initial value but the shapes don't match
# upd = pt.concatenate([init, upd], axis = -1)
ar = pm.Normal("ar", mu = upd, sigma = sigma)
outcome = pm.Normal("likelihood", mu=ar, sigma=sigma, observed=y, dims="obs_id")
idata_ar = pm.sample_prior_predictive()
idata_ar = pm.sample(sample_size, tune=2000, random_seed=RANDOM_SEED)
idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
Thanks