What's the best way to implement an autoregressive moving average of a time series with a variable lag and window size?

Quick update on this - I’ve successfully written a model that I think should compute a lagged moving average. Here’s my code:

import numpy as np
import pymc as pm
import pytensor as pt
from pytensor.compile.sharedvalue import SharedVariable
from pytensor.tensor import conv


def moving_average(
    t: SharedVariable,
    width: pm.Discrete,
    lag: pm.Discrete,
) -> pt.tensor.TensorLike:
    """Compute the moving average signal t."""
    # Reshape into 4d arrays to make conv2d happy
    kernel_4d = (pm.math.ones((width,), dtype=float)/width).reshape((1, 1, 1,-1))
    temperature_4d = t.reshape((1, 1, 1, -1))

    # Only run conv2d on valid area; set boundary points which only partially
    # overlap the kernel to np.nan
    result = pm.math.full_like(t, np.nan, dtype=float)
    pt.tensor.set_subtensor(
        result[width//2:-width//2],
        conv.conv2d(temperature_4d, kernel_4d, border_mode='valid').flatten()
    )
    return lagged(result, lag)

def lagged(data: SharedVariable, lag: pm.Discrete) -> pt.tensor.TensorLike:
    """Lag an array by some amount."""
    # Keep the shape the same; the region where there is no lagged data is set
    # to np.nan.
    result = pm.math.full_like(data, np.nan, dtype=float)
    pt.tensor.set_subtensor(
        result[:-lag],
        data[lag:]
    )
    return result


def my_model(t_data, c_data, c0, a, b, g, w0, l0, w1, l1, l2):
    return (
        c0 +
        a*moving_average(t_data, w0, l0) +
        b*moving_average(t_data, w1, l1) -
        g*lagged(c_data, l2)
    )

def main():
    data = get_data()

    with pm.Model() as model:
        t_data = pm.MutableData("t_data", data["t"])
        c_data = pm.MutableData("c_data", data["c"])

        # Uninformative uniform prior for the initial number c.
        c0 = pm.DiscreteUniform("c0", lower=0, upper=1000)

        alpha = pm.HalfNormal("alpha", sigma=10)
        beta = pm.HalfNormal("beta", sigma=10)
        gamma = pm.HalfNormal("gamma", sigma=10)
        width_0 = pm.DiscreteUniform("width_0", 1, 200)
        width_1 = pm.DiscreteUniform("width_1", 1, 200)
        lag_0 = pm.DiscreteUniform("lag_0", lower=180, upper=545)
        lag_1 = pm.DiscreteUniform("lag_1", lower=550, upper=910)
        lag_2 = pm.DiscreteUniform("lag_2", lower=915, upper=1275)

        c_mu = pm.Deterministic(
            "c_mu",
            my_model(
                t_data,
                c_data,
                c0,
                alpha,
                beta,
                gamma,
                width_0,
                lag_0,
                width_1,
                lag_1,
                lag_2,
            ),
        )

        c_likelihood = pm.Poisson("c_likelihood", mu=c_mu, observed=c_data)
        idata = pm.sample(discard_tuned_samples=False)

Turns out that I didn’t need to use pymc.AR or anything like that. Here, my model of my observed c_data is a Poisson-distributed RV with a mu that depends on the moving average of two intervals some number of days in the past. The trickiest part so far was rewriting numpy-like array operations into valid pytensor operations. Even though I now have a valid model, I’m still having trouble getting the sampler to produce valid results:

pymc.exceptions.SamplingError: Initial evaluation of model at starting point failed!
Starting values:
{'c0': array(500), 'alpha_log__': array(2.30258509), 'beta_log__': array(2.30258509), 'gamma_log__': array(2.30258509), 'width_0': array(100), 'width_1': array(100), 'lag_0': array(362), 'lag_1': array(730), 'lag_2': array(1095)}

Logp initial evaluation results:
{'c0': -6.91, 'alpha': -0.73, 'beta': -0.73, 'gamma': -0.73, 'width_0': -5.3, 'width_1': -5.3, 'lag_0': -5.9, 'lag_1': -5.89, 'lag_2': -5.89, 'c_likelihood': -inf}
You can call `model.debug()` for more details.

I suspect the sampler is having trouble with the fact that I only have some certain sections of valid time series data, and I impute a bunch of nan values for all the dates where I don’t have any data. This makes computing the moving average simpler, but might also be the reason the sampler can’t produce a valid initial evaluation.