For ease of reference, here is a copy+pastable snippet we’re trying to figure out:
import pymc as pm
import pytensor.tensor as pt
import pandas as pd
import numpy as np
df = pd.read_csv('https://raw.githubusercontent.com/dustinstansbury/statistical-rethinking-2023/main/data/Primates301.csv', delimiter=';')
distance_matrix = pd.read_csv('https://raw.githubusercontent.com/dustinstansbury/statistical-rethinking-2023/main/data/Primates301_distance_matrix.csv', delimiter=';')
distance_matrix.columns = distance_matrix.columns.map(int)
df.dropna(subset='brain', inplace=True)
distance_matrix = distance_matrix.loc[df.index, :].loc[:, df.index].copy()
D_mat = (distance_matrix / distance_matrix.max())
def log_standardize(x):
x = np.log(x)
return (x - x.mean()) / x.std()
coords = {'primate': df['name'].values}
with pm.Model(coords=coords) as naive_imputation_model:
G_obs = log_standardize(df.group_size.values)
M_obs = log_standardize(df.body.values)
B_obs = log_standardize(df.brain.values)
# Priors
alpha = pm.Normal("alpha", 0, 1)
beta_G = pm.Normal("beta_G", 0, 0.5)
beta_M = pm.Normal("beta_M", 0, 0.5)
# Phylogenetic distance covariance prior, L1-kernel function
eta_squared = pm.TruncatedNormal("eta_squared", 1, .25, lower=.001)
rho = pm.TruncatedNormal("rho", 3, .25, lower=.001)
K = pm.Deterministic('K', eta_squared * pt.exp(-rho * D_mat))
# Naive imputation for G, M
G = pm.Normal("G", 0, 1, observed=G_obs, dims='primate')
M = pm.Normal("M", 0, 1, observed=M_obs, dims='primate')
# Likelihood for B
mu = alpha + beta_G * G + beta_M * M
pm.MvNormal("B", mu=mu, cov=K, observed=B_obs)
naive_imputation_inference = pm.sample()