Expanding the common globe tossing example to multiple globes

Turns out there’s a nice example at https://docs.pymc.io/notebooks/hierarchical_partial_pooling.html , which works OK for my synthetic data!

For anyone else trying this, here’s a self-contained example:

%matplotlib inline
import pymc3 as pm
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import theano.tensor as tt

#Creating a synthetic dataset:
data = list()
probs = [0.9, 0.1]
for j in range(100):
    observations = list()
    #all globes must have at least one measurement:
    observations.append(np.random.choice([0,1], p=probs))
    #now take several more measurements:
    while np.random.uniform(0,1)>0.1:
        observations.append(np.random.choice([0,1], p=probs))
    data.append(observations)
    
    
#parse for input to pymc3:
num_measurements = [len(i) for i in data]
num_waters = [sum(i) for i in data]
N = len(data)



#pymc3 gear:
with pm.Model() as baseball_model:
    #group parameters:
    phi = pm.Uniform('phi', lower=0.0, upper=1.0)
    
    ##
    kappa_log = pm.Exponential('kappa_log', lam=1.5)
    kappa = pm.Deterministic('kappa', tt.exp(kappa_log))
    
    #instance parameters:
    thetas = pm.Beta('thetas', alpha=phi*kappa, beta=(1.0-phi)*kappa, shape=N)
    
    #likelihood:
    y = pm.Binomial('y', n=num_measurements, p=thetas, observed=num_waters)

with baseball_model:
    trace = pm.sample(draws=500, tune=500, chains=2,
                      target_accept=0.9)
    
    
pm.traceplot(trace, var_names=['phi', 'kappa']);

Now that I’ve got the hang of it, I happen to think this model using odds and probability might be more intuitive:

#pymc3 gear:
with pm.Model() as baseball_model:
    #group parameter:
    phi = pm.Normal('phi', mu=0, sigma=1.0)
    #probability:
    logit = pm.Deterministic('logit', 1 / (1 + np.exp(phi)))
    odds = pm.Deterministic('odds', (1-logit)/logit)
        
    #instance parameters:
    thetas = pm.Beta('thetas', alpha=1, beta=odds, shape=N)
    
    #likelihood:
    y = pm.Binomial('y', n=num_measurements, p=thetas, observed=num_waters)

with baseball_model:
    trace = pm.sample(draws=2000, tune=2000, chains=2,
                      target_accept=0.99)
    
    
pm.traceplot(trace, var_names=['phi', 'logit', 'odds']);
3 Likes