Being a devout Bayesian, I (as well as most everyone else here as well) have heard of Kruschke’s BEST model, which is the more rigorous Bayesian alternative to the basic t-test we all know (with the original paper here). Wanting to experiment more with Hypothesis testing on my own, I found PyMC’s implementation of it here - https://docs.pymc.io/notebooks/BEST.html - and starting playing around.
To make a long story short, using a pure pm.Normal()
to describe the data - instead of pm.StudentT()
as in the official implementation above - usually produced better estimates for both
- differences between the 2 group means, and
- differences between the 2 group std’s.
Wanting to confirm this, I then simulated a lot of Normally- and T-distributed data, all with different A) standard deviations, and B) sample sizes, to see how well, as a relative % of error, they calculated the true group mean and std differences.
These are my results (it’s a big image, so I needed imgur). The green bars represent the % error of my Normal model’s estimates, whereas the blue bars represent the official implementation’s estimates’ errors. Each quadrant measures
- what type of data was fed into the models, and
- what was being estimated (mean or std differences between the groups).
Looking at the results, why does it seem like the Normal distribution produces overall better results for both Normally- and T-distributed data? I would extremely appreciate anyone’s thoughts on this!
Lastly, my code (which I’m pretty sure has no errors),
import numpy as np
from scipy.stats import t
import pymc3 as pm
set_mean = 50
n_sample_size_to_loop_over = [10,30,60,120,180]
scales_to_loop_over = [5,10,20,30,50,80,120,240,480]
## Final dictionary to hold results in the end
dict_to_hold_results = {}
for n_samples in n_sample_size_to_loop_over:
for scales in scales_to_loop_over:
## Creating Normal data
y_normal_1 = np.array( [int(i) for i in np.random.normal(loc = set_mean, scale = scales,
size = n_samples)] )
y_normal_2 = np.array( [int(i) for i in np.random.normal(loc = set_mean + 5, scale = scales + 3,
size = n_samples)] )
## Creating t-distributed data.
y_t_1 = np.array( [int(i) for i in t.rvs(df = 5, loc = set_mean, scale = scales,
size = n_samples)] )
y_t_2 = np.array( [int(i) for i in t.rvs(df = 20, loc = set_mean + 5, scale = scales + 3,
size = n_samples)] )
## Creating true values that the models are attempting to estimate.
true_normal_mean_diffs = y_normal_1.mean() - y_normal_2.mean()
true_normal_std_diffs = y_normal_1.std() - y_normal_2.std()
true_T_mean_diffs = y_t_1.mean() - y_t_2.mean()
true_T_std_diffs = y_t_1.std() - y_t_2.std()
## Creating statistics to help inform priors
y_normal_mean = np.hstack((y_normal_1, y_normal_2)).mean()
y_normal_std = np.hstack((y_normal_1, y_normal_2)).std()
y_t_mean = np.hstack((y_t_1, y_t_2)).mean()
y_t_std = np.hstack((y_t_1, y_t_2)).std()
## Wrapping everything up for use by models
true_values = {'Normal_data': [true_normal_mean_diffs, true_normal_std_diffs],
'T_data': [true_T_mean_diffs, true_T_std_diffs]}
aggreagate_stats = {'Normal_data': [y_normal_mean, y_normal_std],
'T_data': [y_t_mean, y_t_std]}
data = {'Normal_data': [y_normal_1, y_normal_2], 'T_data': [y_t_1, y_t_2]}
models_results = {'Normal_data': {'T_model': {}, 'Normal_model': {}},
'T_data': {'T_model': {}, 'Normal_model': {}}}
## Starting the modelling
for i, data_type in enumerate(data):
data_mean = aggreagate_stats[data_type][0]
data_std = aggreagate_stats[data_type][1]
## The actual true values, against which estimate values will be compared at the end
true_mean_diff = true_values[data_type][0]
true_std_diff = true_values[data_type][1]
individual_results = [ [[],[]],
[[],[]] ]
## Each estimate done 3 times and then averaged, to get more accurate results
## of what the model is trying to estimate.
for j in range(3):
## Online BEST model implementation, using T-distribution
with pm.Model() as best_online_model:
std_prior = pm.Uniform("std_prior", lower = 1, upper = 10)
y1_mu = pm.Normal('y1_mu', mu = data_mean, sd = std_prior)
y2_mu = pm.Normal('y2_mu', mu = data_mean, sd = std_prior)
y1_std = pm.Uniform('y1_std', lower = data_std/1000, upper = data_std*1000)
y2_std = pm.Uniform('y2_std', lower = data_std/1000, upper = data_std*1000)
ν = pm.Exponential('ν_minus_one', 1/29.) + 1
λ1 = y1_std**-2
λ2 = y2_std**-2
y1 = pm.StudentT('y1', nu = ν, mu = y1_mu, lam = λ1, observed = data[data_type][0])
y2 = pm.StudentT('y2', nu = ν, mu = y1_mu, lam = λ2, observed = data[data_type][1])
## Trying to do the magic here
diff_of_means = pm.Deterministic('estimated difference of means', y1_mu - y2_mu)
diff_of_stds = pm.Deterministic('estimated difference of stds', y1_std - y2_std)
best_online_trace = pm.sample(draws=2000, tune=2000, target_accept=0.9, cores=4)
q = pm.summary(best_online_trace)
model_mean_accuracy = abs((q.loc['estimated difference of means', 'mean'] - true_mean_diff) / true_mean_diff)
model_std_accuracy = abs((q.loc['estimated difference of stds', 'mean'] - true_std_diff) / true_std_diff)
individual_results[0][0].append(model_mean_accuracy)
individual_results[0][1].append(model_std_accuracy)
## My model, Normally distributed.
with pm.Model() as best_my_own_ver_model:
std_prior = pm.Uniform("std_prior", lower = 1, upper = 10)
y1_mu = pm.Normal("y1_mu", mu = data_mean, sd = std_prior)
y2_mu = pm.Normal("y2_mu", mu = data_mean, sd = std_prior)
y1_std = pm.Uniform("y1_std", lower = 1/1000, upper = 1*1000) ## These could also be too
y2_std = pm.Uniform("y2_std", lower = 1/1000, upper = 1*1000) ## dispersed, like std_prior.
y1 = pm.Normal("y1", mu = y1_mu, sd = y1_std, observed = data[data_type][0])
y2 = pm.Normal("y2", mu = y2_mu, sd = y2_std, observed = data[data_type][1])
## Trying to do the magic here
diff_of_means = pm.Deterministic('estimated difference of means', y1_mu - y2_mu)
diff_of_stds = pm.Deterministic('estimated difference of stds', y1_std - y2_std)
best_my_ver_trace = pm.sample(draws=3000, tune=3000, target_accept=0.9, cores=4)
w = pm.summary(best_my_ver_trace)
model_mean_accuracy = abs((w.loc['estimated difference of means', 'mean'] - true_mean_diff) / true_mean_diff)
model_std_accuracy = abs((w.loc['estimated difference of stds', 'mean'] - true_std_diff) / true_std_diff)
individual_results[1][0].append(model_mean_accuracy)
individual_results[1][1].append(model_std_accuracy)
## Averaging results to reduce error in estimates
models_results[data_type]['T_model']["% model_mean_accuracy"] = np.mean(individual_results[0][0])
models_results[data_type]['T_model']["% model_std_accuracy"] = np.mean(individual_results[0][1])
models_results[data_type]['Normal_model']["% model_mean_accuracy"] = np.mean(individual_results[1][0])
models_results[data_type]['Normal_model']["% model_std_accuracy"] = np.mean(individual_results[1][1])
dict_to_hold_results["n_size: {}, scale: {}".format(n_samples, scales)] = models_results