Hello
I’m still quite new to pymc, thanks for the workshop and Q&A last week.
I have a question re: parameters for modeling weird latents. I posted last week that I was trying to evaluate how close pymc/3 gets to bayesian optimal solutions and I’ve now fixed the code (thanks so much).
I’ve now got a more specific question re: the limits of pymc/3. It seems that for simpler multi-variate distributions it can do quite well:
So here I’m using a simple biased coin-toss paradigm. On the left I’m creating the latent of the coin bias from the sum of the Beta functions. I then draw a single bais, in this case 0.76 probability biased coin. For clarity, pymc does not learn the latent, we bootstrap it as ground truth.
On the right I simulate an episode where the coin is tossed 10 times (red and blue vertical lines). Then I show what the pymc3 predicted bias is as a function of coin toss vs. the analytical (closed-form solution). As you can see pymc is very close to the closed for solution. This is great, and before using pymc, I was not able to show this!
However, if I increase the complexity of the latent distributions, it seems pymc3 really struggles especially early on.
This is my code for generating the plot.
Question: am I using insufficient # of samples, or # of tuning steps? Or is this kind of funky distribution too challenging for pymc3? (note: I tried using pymc, but it seemed much slower, it couldn’t even output the first step of the prediction after waiting for about 10minutes).
# no. of samples to test on
n_samples = 20000
n_tuning = 10000
n_obs = 10
n_cores = 24
# Step 3: Define parameters of the Beta distributions
#alpha1, beta1 = 5, 1.2
#alpha2, beta2 = 15, 50
two_betas = False
if two_betas:
coin_toss_params_list = [
#[1,1],
[5,1.2],
#[0.1,0.1],
[15,50],
]
alpha1 = coin_toss_params_list[0][0]
beta1 = coin_toss_params_list[0][1]
alpha2 = coin_toss_params_list[1][0]
beta2 = coin_toss_params_list[1][1]
#
x = np.linspace(0, 1, 100)
y1 = scipy.stats.beta.pdf(x, alpha1, beta1)
y2 = scipy.stats.beta.pdf(x, alpha2, beta2)
y = y1+y2
else:
# OPTION #2
coin_toss_params_list = []
for i in range(0, 1100,100):
temp = [1100-i, i+100]
coin_toss_params_list.append(temp)
print ("# of latents: ", len(coin_toss_params_list))
#
alpha1 = coin_toss_params_list[0][0]
alpha2 = coin_toss_params_list[1][0]
alpha3 = coin_toss_params_list[2][0]
alpha4 = coin_toss_params_list[3][0]
alpha5 = coin_toss_params_list[4][0]
alpha6 = coin_toss_params_list[5][0]
alpha7 = coin_toss_params_list[6][0]
alpha8 = coin_toss_params_list[7][0]
alpha9 = coin_toss_params_list[8][0]
alpha10 = coin_toss_params_list[9][0]
alpha11 = coin_toss_params_list[10][0]
#
beta1 = coin_toss_params_list[0][1]
beta2 = coin_toss_params_list[1][1]
beta3 = coin_toss_params_list[2][1]
beta4 = coin_toss_params_list[3][1]
beta5 = coin_toss_params_list[4][1]
beta6 = coin_toss_params_list[5][1]
beta7 = coin_toss_params_list[6][1]
beta8 = coin_toss_params_list[7][1]
beta9 = coin_toss_params_list[8][1]
beta10 = coin_toss_params_list[9][1]
beta11 = coin_toss_params_list[10][1]
#
x = np.linspace(0, 1, 100)
y1 = scipy.stats.beta.pdf(x, alpha1, beta1)
y2 = scipy.stats.beta.pdf(x, alpha2, beta2)
y3 = scipy.stats.beta.pdf(x, alpha3, beta3)
y4 = scipy.stats.beta.pdf(x, alpha4, beta4)
y5 = scipy.stats.beta.pdf(x, alpha5, beta5)
y6 = scipy.stats.beta.pdf(x, alpha6, beta6)
y7 = scipy.stats.beta.pdf(x, alpha7, beta7)
y8 = scipy.stats.beta.pdf(x, alpha8, beta8)
y9 = scipy.stats.beta.pdf(x, alpha9, beta9)
y10 = scipy.stats.beta.pdf(x, alpha10, beta10)
y11 = scipy.stats.beta.pdf(x, alpha11, beta11)
y = y1+y2+y3+y4+y5+y6+y7+y8+y9+y10+y11
# normalize y so that it's a pdf
y /= y.sum()
########################################
# manually create examples
if two_betas:
observed_data_all = np.array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1]) # hardcode bos token here for easy
observed_bias = 0.76767677
else:
observed_data_all = np.array([1, 0, 0, 0, 0, 0, 0, 0, 1, 0])
observed_bias = 0.16161616
# ########################################
# # autognenerate examples
# else:
# # make a pdf from two beta distributions as defined above using the scipy beta function
# observed_bias, observed_data_all = generate_pymc3_example(y, x, n_obs=10)
# plot the pdf
plt.figure(figsize=(5,4), facecolor='white')
plt.plot(x, y)
plt.axvline(observed_bias, color='red', linestyle='--',label=str(observed_bias))
plt.legend(loc='best')
plt.savefig('/home/cat/beta_distribution.svg')
plt.show()
print ("observed_data_all: ", observed_data_all)
print ("observed_data_all value: ", np.sum(observed_data_all)/n_obs)
#########################################
traces = []
for k in range(0,observed_data_all.shape[0]+1,1):
observed_data = observed_data_all[:k]
print ('')
print ('*******************************************')
print ("observed_data: ", observed_data)
# Step 4: Define prior distribution as sum of two Beta distributions
with pm.Model() as model:
# # Define the first Beta distribution
# beta1_ = pm.Beta('beta1', alpha=alpha1, beta=beta1)
# # Define the second Beta distribution
# beta2_ = pm.Beta('beta2', alpha=alpha2, beta=beta2)
# Define the prior distribution as the sum of the two Beta distributions
#prior = pm.Deterministic('prior', beta1_ + beta2_)
if two_betas:
w = pm.Dirichlet('w', a=np.array([1, 1]))
prior = pm.Mixture('prior',
w=w,
comp_dists=[
pm.Beta.dist(alpha=alpha1, beta=beta1),
pm.Beta.dist(alpha=alpha2, beta=beta2)
])
else:
w = pm.Dirichlet('w', a=np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]))
prior = pm.Mixture('prior',
w=w,
comp_dists=[
pm.Beta.dist(alpha=alpha1, beta=beta1),
pm.Beta.dist(alpha=alpha2, beta=beta2),
pm.Beta.dist(alpha=alpha3, beta=beta3),
pm.Beta.dist(alpha=alpha4, beta=beta4),
pm.Beta.dist(alpha=alpha5, beta=beta5),
pm.Beta.dist(alpha=alpha6, beta=beta6),
pm.Beta.dist(alpha=alpha7, beta=beta7),
pm.Beta.dist(alpha=alpha8, beta=beta8),
pm.Beta.dist(alpha=alpha9, beta=beta9),
pm.Beta.dist(alpha=alpha10, beta=beta10),
pm.Beta.dist(alpha=alpha11, beta=beta11)
])
# Step 5: Define the likelihood
likelihood = pm.Bernoulli('likelihood',
p=prior,
observed=observed_data)
# Step 6: Combine prior and likelihood
posterior = pm.sample(n_samples, tune=n_tuning, cores=n_cores)
# Step 7: Run inference algorithm
trace = pm.sample(n_samples,
tune=n_tuning,
cores=n_cores,
chains=1)