Is this half cauchy model correct?

The best practice for out of sample prediction is to:

  1. Use pm.MutableData objects to hold your data
  2. Include size=X.shape[0] in the likelihood term
  3. Use pm.set_data together with pm.sample_posterior_predictive to get out-of-sample predictions.

Here is how it looks with your model:

with pm.Model() as model:
    # Here's the mutable data
    X_pt = pm.MutableData('X', X_train)
    y_pt = pm.MutableData('y', y_train)
    
    α = pm.Normal("α", mu=0, sigma=1)
    β = pm.Normal("β", mu=0, sigma=1)
    
    mu =  α + β * X_pt
    sigma = pm.HalfCauchy("sigma", beta=1)

    # The size=X_pt.shape[0] part is very important, or else you will get some errors later on
    obs = pm.Normal('obs', mu=mu, sigma=sigma, observed=y_pt, size=X_pt.shape[0])
    
with model:
    trace = pm.sample(1000, tune=1000)

    # In-sample prediction
    trace = pm.sample_posterior_predictive(trace, extend_inferencedata=True)
    
    # Out-of-sample prediction
    pm.set_data({'X':X_test})
    # predictions=True makes a new group in the InferenceData object, so your in-sample predictions (saved in "posterior_predictive") won't be overwritten.
    trace = pm.sample_posterior_predictive(trace, extend_inferencedata=True, predictions=True)

Plotting the results:

fig, ax = plt.subplots()

for [group, X_data] in zip(['posterior_predictive', 'predictions'], [X_train, X_test]):
    hdi = az.hdi(trace[group])
    idata = az.extract(trace, group)
    ax.plot(X_data, idata.obs.mean(dim=['sample']), label=group)
    ax.fill_between(X_data, hdi.obs.sel(hdi='lower'), hdi.obs.sel(hdi='higher'), alpha=0.25)
    
ax.plot(X, y_data_scaled, color='tab:red', ls='--')
ax.legend()
plt.show()

1 Like