I’m following this tutorial that parallel’s McElreath’s Statistical Rethinking.
When I get to cell 43 in the notebook, I get the following error:
ValueError: x and y must have same first dimension, but have shapes (4,) and (504,)
Here’s the full code
import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pymc as pm
d = pd.read_csv("Data/chimpanzees.csv", sep=";")
treatment = (d.prosoc_left + 2 * d.condition).values
Ntreatments = len(np.unique(treatment))
actor = (d.actor - 1).astype(int).values
Nactor = len(np.unique(actor))
block = (d.block - 1).astype(int).values
Nblock = len(np.unique(block))
with pm.Model() as m_13_4:
a_bar = pm.Normal("a_bar", 0.0, 1.5)
sigma_a = pm.Exponential("sigma_a", 1.0)
sigma_g = pm.Exponential("sigma_g", 1.0)
a = pm.Normal("a", a_bar, sigma_a, shape=Nactor)
g = pm.Normal("g", 0.0, sigma_g, shape=Nblock)
b = pm.Normal("b", 0.0, 0.5, shape=Ntreatments)
actor_ = pm.Data("actor", actor)
block_ = pm.Data("block", block)
treatment_ = pm.Data("treatment", treatment)
#data = pm.Data("data", d, mutable=True)
p = pm.Deterministic("p", pm.math.invlogit(a[actor_] + g[block_] + b[treatment_]))
pulled_left = pm.Binomial("pulled_left", 1, p, observed=d.pulled_left.values)
trace_13_4 = pm.sample(tune=3000, target_accept=0.99, random_seed=RANDOM_SEED)
chimp = 2
new_data = dict(actor=np.repeat(chimp - 1, 4), block=np.repeat(0, 4), treatment=np.arange(4))
with m_13_4:
post_pred = pm.sample_posterior_predictive(
trace_13_4, var_names=["p"], random_seed=RANDOM_SEED
)["posterior_predictive"]
post_pred["p"].mean(["chain", "draw"]).round(2), az.hdi(post_pred)["p"].round(2)
def chimp_pp_plot(hpd_data, mean_data, title):
_, ax = plt.subplots(1, 1, figsize=(5, 5))
az.plot_hdi(range(4), hpd_data)
ax.plot(range(4), mean_data)
ax.set_ylim(0, 1.1)
ax.set_xlabel("treatment")
ax.set_ylabel("proportion pulled left")
ax.set_xticks(range(4), ("R/N", "L/N", "R/P", "L/P"))
plt.title(title)
chimp_pp_plot(
hpd_data=np.array(post_pred["p"]).T,
mean_data=post_pred["p"].mean(["chain", "draw"]).round(2),
title=f"Posterior predictions for chimp #{chimp}",
)
I understand that it’s a dimension problem, but not sure how to fix as post_pred is a dataset type.