Update: I kind of gave up on Arviz and used Altair to plot the posterior predictive distribution:
# Generate predictive posterior
with model:
post_pred = pm.sample_posterior_predictive(trace, var_names=["Ys"])
# Preprocess predictive posterior
post_pred_obss = post_pred['Ys']
lower_bound = np.quantile(post_pred_obss, q=0.05, axis=0)
median = np.quantile(post_pred_obss, q=0.5, axis=0)
upper_bound = np.quantile(post_pred_obss, q=0.95, axis=0)
# Encode as pandas DataFrame
import pandas as pd
data_df = pd.DataFrame({'t': ts, 'obs' : obss})
hdi_df = pd.DataFrame({
't': ts,
'lower': lower_bound,
'median' : median,
'upper': upper_bound,
})
# Plot observations and predictive posterior with Altair
import altair as alt
data_chart = alt.Chart(data_df)\
.mark_line(
# size=10,
color='purple',
)\
.encode(
x=alt.X('t:Q', title='Timestep'),
y=alt.Y('obs:Q', title='Cumulative maximum'),
)
# https://stackoverflow.com/questions/60649486/line-chart-with-custom-confidence-interval-in-altair
line_chart = alt.Chart(hdi_df).mark_line().encode(
x='t:Q',
y='median:Q'
)
band_chart = alt.Chart(hdi_df).mark_area(
opacity=0.5
).encode(
x='t:Q',
y='lower:Q',
y2='upper:Q'
)
(data_chart + line_chart + band_chart)\
.properties(
width=800,
height=400
)\
.configure_axis(
labelFontSize=20,
titleFontSize=30
)
I am a bit nervous because I expected the uncertainty to grow wider with time, but we see it is a band of constant width. Is my intuition off or is this symptom of a problem?
For 2 I am still stuck. I tried cheating my way out of the problem by defining a chain of variables with the conditional marginal likelihood we previously derived. But I don’t seem to be able to instruct PyMC to feed to the variable the last variable of the joint distribution we defined 
#@title PyMC model
# Sample parameters
N = 100
M = 5
mu = 10.0
sigma = 1.0
import numpy as np
import pymc3 as pm
# Random sampling function
def random(point=None, size=None, N=N):
# Retrive parameters
mu = point['mu']
sigma = point['sigma']
# Generate sample
obss = []
obs = -np.math.inf
for t in range(N):
aux = mu + sigma*np.random.randn()
obs = np.max([obs, aux])
obss.append(obs)
obss = np.array(obss)
return obss
# Generate fake observations
ts = np.arange(N)
obss = random({'mu': mu, 'sigma':sigma})
### Separate the data where there were jumps in the running max
### and the data where the running max stayed constant
jump_mask = np.insert(np.diff(obss) > 0,0,True)
jump_data = obss[jump_mask] # Slice out the data where a new record is set
flat_data = obss[~jump_mask] # Slice the data where the record is maintained
# Logarithmic likelihood of the joint distribution of data
def logp(jump_data, flat_data, mu, sigma):
x_dist = pm.Normal.dist(mu=mu, sigma=sigma)
# Add likelihood contribution from the jump data
log_likelihood = pm.math.sum(x_dist.logp(jump_data))
# Add likelihood contribution from the flat data
log_likelihood += pm.math.sum(x_dist.logcdf(flat_data))
return log_likelihood
def next_logp(o_n, o_n_minus_1, mu, sigma):
x_dist = pm.Normal.dist(mu=mu, sigma=sigma)
log_likelihood = \
pm.math.switch(
pm.math.eq(o_n, o_n_minus_1),
x_dist.logp(o_n),
x_dist.logcdf(o_n)
)
return log_likelihood
# PyMC3 model definition
with pm.Model() as model:
mu = pm.Uniform('mu', 0., 20.)
sigma = pm.Uniform('sigma', 0.5, 1.5)
Ys = pm.DensityDist('Ys', logp,
shape = (N,),
random=random,
observed = {'jump_data':jump_data,
'flat_data':flat_data,
'mu': mu,
'sigma': sigma}
)
previous = Ys[-1] # This does not work
for i in range(M):
Zi = pm.DensityDist(f'Z{i}', next_logp,
observed = {
'previous' : previous,
'mu': mu,
'sigma': sigma,
})
previous = Zi
# Sample
trace = pm.sample(1000, chains=2, tune=1000,
return_inferencedata=True,
idata_kwargs={"density_dist_obs": False})
pm.plot_trace(trace, ['mu', 'sigma'])
Eg with the snippet above I get the error TypeError: 'MultiObservedRV' object is not subscriptable. How can I access one dimension of a joint distribution?
