Modelling a timeseries of cumulative maximum

Update: I kind of gave up on Arviz and used Altair to plot the posterior predictive distribution:

# Generate predictive posterior
with model:
  post_pred = pm.sample_posterior_predictive(trace, var_names=["Ys"])

# Preprocess predictive posterior
post_pred_obss = post_pred['Ys']
lower_bound = np.quantile(post_pred_obss, q=0.05, axis=0)
median = np.quantile(post_pred_obss, q=0.5, axis=0)
upper_bound = np.quantile(post_pred_obss, q=0.95, axis=0)

# Encode as pandas DataFrame
import pandas as pd
data_df = pd.DataFrame({'t': ts, 'obs' : obss})
hdi_df = pd.DataFrame({
    't': ts,
    'lower': lower_bound,
    'median' : median,
    'upper': upper_bound,
    })

# Plot observations and predictive posterior with Altair
import altair as alt
data_chart = alt.Chart(data_df)\
.mark_line(
    # size=10, 
    color='purple',
    )\
.encode(
    x=alt.X('t:Q', title='Timestep'),
    y=alt.Y('obs:Q', title='Cumulative maximum'),
)

# https://stackoverflow.com/questions/60649486/line-chart-with-custom-confidence-interval-in-altair
line_chart = alt.Chart(hdi_df).mark_line().encode(
    x='t:Q',
    y='median:Q'
)

band_chart = alt.Chart(hdi_df).mark_area(
    opacity=0.5
).encode(
    x='t:Q',
    y='lower:Q',
    y2='upper:Q'
)

(data_chart + line_chart + band_chart)\
.properties(
  width=800,
  height=400
)\
.configure_axis(
  labelFontSize=20,
  titleFontSize=30
  )

I am a bit nervous because I expected the uncertainty to grow wider with time, but we see it is a band of constant width. Is my intuition off or is this symptom of a problem?

For 2 I am still stuck. I tried cheating my way out of the problem by defining a chain of variables with the conditional marginal likelihood we previously derived. But I don’t seem to be able to instruct PyMC to feed to the variable the last variable of the joint distribution we defined :thinking:

#@title PyMC model

# Sample parameters
N = 100
M = 5
mu = 10.0
sigma = 1.0

import numpy as np
import pymc3 as pm

# Random sampling function
def random(point=None, size=None, N=N):

  # Retrive parameters
  mu = point['mu']
  sigma = point['sigma']

  # Generate sample
  obss = []
  obs = -np.math.inf
  for t in range(N):
    aux = mu + sigma*np.random.randn()
    obs = np.max([obs, aux])
    obss.append(obs)
  obss = np.array(obss)

  return obss

# Generate fake observations
ts = np.arange(N) 
obss = random({'mu': mu, 'sigma':sigma})

### Separate the data where there were jumps in the running max
### and the data where the running max stayed constant
jump_mask = np.insert(np.diff(obss) > 0,0,True) 
jump_data = obss[jump_mask] # Slice out the data where a new record is set
flat_data = obss[~jump_mask] # Slice the data where the record is maintained

# Logarithmic likelihood of the joint distribution of data
def logp(jump_data, flat_data, mu, sigma):
  x_dist = pm.Normal.dist(mu=mu, sigma=sigma)
  
  # Add likelihood contribution from the jump data
  log_likelihood = pm.math.sum(x_dist.logp(jump_data))

  # Add likelihood contribution from the flat data
  log_likelihood += pm.math.sum(x_dist.logcdf(flat_data))
  
  return log_likelihood

def next_logp(o_n, o_n_minus_1, mu, sigma):
  x_dist = pm.Normal.dist(mu=mu, sigma=sigma)
  log_likelihood = \
    pm.math.switch(
        pm.math.eq(o_n, o_n_minus_1), 
        x_dist.logp(o_n), 
        x_dist.logcdf(o_n)
        )
  return log_likelihood

# PyMC3 model definition
with pm.Model() as model:
  mu = pm.Uniform('mu', 0., 20.)
  sigma = pm.Uniform('sigma', 0.5, 1.5)
  Ys = pm.DensityDist('Ys', logp, 
                      shape = (N,),
                      random=random,
                      observed = {'jump_data':jump_data, 
                                  'flat_data':flat_data, 
                                  'mu': mu, 
                                  'sigma': sigma}
                      )
  
  previous = Ys[-1]  # This does not work
  for i in range(M):
    Zi = pm.DensityDist(f'Z{i}', next_logp,
                        observed = {
                            'previous' : previous,
                            'mu': mu,
                            'sigma': sigma,
                        })
    previous = Zi
  
  # Sample 
  trace = pm.sample(1000, chains=2, tune=1000,
                    return_inferencedata=True, 
                    idata_kwargs={"density_dist_obs": False})
  pm.plot_trace(trace, ['mu', 'sigma'])

Eg with the snippet above I get the error TypeError: 'MultiObservedRV' object is not subscriptable. How can I access one dimension of a joint distribution?