import pymc as pm
import pandas as pd
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
print(f"Runing on PyMC v{pm.__version__}")
df_penguins=pd.read_csv('penguins.csv').dropna(how = 'any').reset_index(drop=True)
flipper_length_obs = df_penguins.loc[df_penguins['species']=='Adelie',"flipper_length_mm"].values
mass_obs=df_penguins.loc[df_penguins['species']=='Adelie','body_mass_g'].values
with pm.Model() as model_adelie_flipper_regression:
data = pm.MutableData("data", flipper_length_obs)
β_0 = pm.Normal("β_0", 0, 4000)
β_1 = pm.Normal("β_1", 0, 4000)
σ = pm.HalfNormal("σ", 2000)
μ = pm.Deterministic("μ", β_0 + β_1 * data)
y = pm.Normal("y", mu=μ, sigma=σ, observed=mass_obs)
idata_r = pm.sample(chains=2)
idata_r.extend(pm.sample_posterior_predictive(idata_r))
ppc=idata_r.posterior_predictive['y'].values.flatten()
fig, ax = plt.subplots()
z=az.summary(idata_r,var_names=['β_0','β_1','σ'])
alpha_m=z['mean'][0]
beta_m =z['mean'][1]
flipper_length = np.linspace(flipper_length_obs.min(), flipper_length_obs.max(), 100)
flipper_length_mean = alpha_m + beta_m * flipper_length
ax.plot(flipper_length, flipper_length_mean, c='C4',
label=f'y = {alpha_m:.2f} + {beta_m:.2f} * x')
ax.scatter(flipper_length_obs, mass_obs,marker='.')
az.plot_hdi(flipper_length_obs, idata_r.posterior['μ'], hdi_prob=0.94, color='lightblue', ax=ax)
ax.set_xlabel('Flipper Length')
ax.set_ylabel('Mass')
This last bit of code is not working for me:I am trying to obtain the posterior predictive mass values ‘y’ for a single flipper_length=190. It was originally written in pymc3 and I suspect I am porting it incorrectly to pymc v4.4.0. or not understanding it correctly.
# Change the underlying value to the mean observed flipper length to 190 and get predicted mass 'y' distribution.
with model_adelie_flipper_regression:
pm.set_data({"data": [190]})
ppcs_at_190 = pm.sample_posterior_predictive(idata_r.posterior, var_names=["y", "μ"])
fig, ax = plt.subplots()
az.plot_dist(ppcs_at_190["y"], label="Posterior Predictive of \nIndividual Penguin Mass", ax=ax)
az.plot_dist(ppcs_at_190["μ"],label="Posterior Predictive of μ", color="C4", ax=ax) .
It would be great if anybody could clue me in to also adding az.hdi() lines to the above graph for the predicted mass values ‘y’ for eaclh flipper length ‘x’. Thanking you, Declan