Turns out there’s a nice example at https://docs.pymc.io/notebooks/hierarchical_partial_pooling.html , which works OK for my synthetic data!
For anyone else trying this, here’s a self-contained example:
%matplotlib inline
import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import theano.tensor as tt
#Creating a synthetic dataset:
data = list()
probs = [0.9, 0.1]
for j in range(100):
observations = list()
#all globes must have at least one measurement:
observations.append(np.random.choice([0,1], p=probs))
#now take several more measurements:
while np.random.uniform(0,1)>0.1:
observations.append(np.random.choice([0,1], p=probs))
data.append(observations)
#parse for input to pymc3:
num_measurements = [len(i) for i in data]
num_waters = [sum(i) for i in data]
N = len(data)
#pymc3 gear:
with pm.Model() as baseball_model:
#group parameters:
phi = pm.Uniform('phi', lower=0.0, upper=1.0)
##
kappa_log = pm.Exponential('kappa_log', lam=1.5)
kappa = pm.Deterministic('kappa', tt.exp(kappa_log))
#instance parameters:
thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
#likelihood:
y = pm.Binomial('y', n=num_measurements, p=thetas, observed=num_waters)
with baseball_model:
trace = pm.sample(draws=500, tune=500, chains=2,
target_accept=0.9)
pm.traceplot(trace, var_names=['phi', 'kappa']);
Now that I’ve got the hang of it, I happen to think this model using odds and probability might be more intuitive:
#pymc3 gear:
with pm.Model() as baseball_model:
#group parameter:
phi = pm.Normal('phi', mu=0, sigma=1.0)
#probability:
logit = pm.Deterministic('logit', 1 / (1 + np.exp(phi)))
odds = pm.Deterministic('odds', (1-logit)/logit)
#instance parameters:
thetas = pm.Beta('thetas', alpha=1, beta=odds, shape=N)
#likelihood:
y = pm.Binomial('y', n=num_measurements, p=thetas, observed=num_waters)
with baseball_model:
trace = pm.sample(draws=2000, tune=2000, chains=2,
target_accept=0.99)
pm.traceplot(trace, var_names=['phi', 'logit', 'odds']);