Here is the code. Please let me know if something is wrong or could be done more efficiently:
import sys
import numpy as np
import pymc3 as pm
import matplotlib.pyplot as plt
from scipy.stats import bernoulli
p_true = 0.60 # hidden success rate
n = 100 # sample size
# synthetic dataset
X = bernoulli.rvs(p_true, size=n)
print(X)
print( "*** BOOTSTRAPPING ***" )
model = pm.Model()
with model:
p = pm.Beta('p', alpha=2, beta=2)
y_obs = pm.Binomial('y_obs', p=p, n=n, observed=np.sum(X))
step = pm.Metropolis()
trace = pm.sample(10000, step=step, progressbar=False)
interval = pm.hpd(trace, 0.05)
interval_width = interval['p'][1] - interval['p'][0]
print("Interval width: %f" % interval_width)
p_hat = np.mean(trace["p"])
print("Estimated success rate: %f" % p_hat)
#mu_p = p_hat
#sd_p = np.std(trace["p"])
#pm.traceplot(trace)
#pm.plot_posterior(trace)
#plt.show()
#sys.exit(0)
print("\n\nIterations")
s = 100
sample_sizes = list()
avg_interval_widths = list()
while s < 1000:
s = s + 50
sample_sizes.append(s)
interval_widths = list()
for i in range(5):
# generate data
x = bernoulli.rvs(p_hat, size=s)
with pm.Model():
p = pm.Beta('p', alpha=2, beta=2)
y_obs = pm.Binomial('y_obs', p=p, n=s, observed=np.sum(x))
step = pm.Metropolis()
trace = pm.sample(10000, step=step, progressbar=False)
interval = pm.hpd(trace, 0.05)
interval_width = interval['p'][1] - interval['p'][0]
interval_widths.append(interval_width)
print("\tInterval width: %f" % interval_width)
p_hat = np.mean(trace["p"])
print("\tEstimated success rate: %f" % p_hat)
print()
avg_interval_width = sum(interval_widths) / len(interval_widths)
avg_interval_widths.append(avg_interval_width)
print("\n*** SAMPLE SIZE: %d ***" % s)
print("Average interval width: %f" % avg_interval_width)
fig = plt.figure()
plt.plot( np.asarray(sample_sizes), np.asarray(avg_interval_widths) )
plt.show()