Issue with Local Linear State Space Model

Hello everyone,
I’m very new to PYMC and was trying to put together a local linear model,
L(t) = L(t-1) + d(t) + e_L(t)
d(t) = d(t-1) + e_d(t)

Where L is level (the observed value), d is slope, and e_L and e_d are their respective errors
I removed e_L(t) for testing purposes

I thought I got a model working, but the slope’s posterior is nonsensical.
the slope innovation should also be 0 after the 3rd point

The data is a simple line with a slope of 2

# %% Import libraries
%load_ext autoreload
%autoreload 2

import numpy as np
import pymc as pm
import matplotlib.pyplot as plt
import pytensor as pt


# Set the seed for reproducibility
np.random.seed(0)

# Define time series length
n = 100

# # Define change points
# change_points = [30, 60]

# # Define slopes for each segment
# slopes = [2, -1, -3]

# Generate time series

observed_time_series = np.arange(n)*2

model = None
trace = None

n_timesteps = len(observed_time_series)

model = pm.Model()
with model:

    # Define priors on the standard deviations of the random walks
    level_sigma = pm.HalfNormal('level_sigma', sigma=0.01)
    slope_sigma = pm.HalfNormal('slope_sigma', sigma=0.1)

    # Define priors on the initial level and slope
    initial_level = pm.Normal('initial_level', mu=0., sigma=0.001)
    initial_slope = pm.Normal('initial_slope', mu=0., sigma=1.)
    # Define the random walks for the level and slope
    #level_innovation = pm.Normal('level_innovation', mu=0., sigma=level_sigma, shape=n_timesteps )
    slope_innovation = pm.Normal('slope_innovation', mu=0., sigma=slope_sigma, shape=n_timesteps )

    # Define a function to update the level and slope
    def update_level_and_slope(level, slope, slope_innovation):
        
        new_level = level + slope # + level_innovation
        new_slope = slope + slope_innovation
        return new_level, new_slope

    # Use `scan` to iteratively compute the level and slope
    sequences = [ slope_innovation]
    outputs, _ = pt.scan(fn=update_level_and_slope, sequences=slope_innovation, outputs_info=[initial_level, initial_slope])
    print(outputs)
    level, slope = outputs
    # Define the likelihood

     # Store the values of 'level' and 'slope' as deterministic variables
    level = pm.Deterministic('level', level)
    slope = pm.Deterministic('slope', slope)

    likelihood_sigma = pm.HalfNormal('likelihood_sigma', sigma=1.)

    likelihood = pm.Normal('likelihood', mu=level, sigma=likelihood_sigma, observed=observed_time_series)

    # Sample from the posterior
    #trace = pm.sample(1000, tune=500, target_accept=0.9)
# %%

# %%
with model:
    import pymc.sampling_jax
    _trace = pymc.sampling_jax.sample_numpyro_nuts(1000, tune=500, target_accept=0.9)
    #trace = pm.sample(100)


def show_param_trace_sample(trace, params= []):
    fig, axes = plt.subplots(len(params), 1, figsize=(10, 10))
    if len(params) == 1:
        axes = [axes]
    for param in params:
        array = trace.posterior[param]
        param_reshaped = np.squeeze(np.reshape(array,(1,array.shape[0]*array.shape[1],array.shape[2]), order='F'),axis=0)
        sample_ints = np.random.choice(param_reshaped.shape[0], 10)
        print(param_reshaped.shape)
        print(param_reshaped)
        sample = np.array(param_reshaped)[sample_ints,:]
        print(sample.shape)
        print(sample)
        for s in sample:
            axes[params.index(param)].plot(s, color='blue', alpha=0.2)
        axes[params.index(param)].set(title='Mean ' + param + ' over time', xlabel='Time', ylabel=param)
    
    plt.tight_layout()
    plt.show()
#%%
show_param_trace_sample(_trace, params=['slope', 'level', 'slope_innovation'])
# %%

I think there are two things:

  1. The level and slope components are random variables by virtue of their autoregressive structures. As a result, you need to write the scan so PyMC knows this. Some examples are here and here , with some small discussion of the finer points here .
  2. MCMC struggles with data that is completely noiseless. For comparison you could try to estimate that straight line as a linear regression – you’ll get loads of divergences. If that’s your data, i’d remove the innovations and just try to estimate the initial states.