Hi all,
I’m doing a Bayesian ANOVA and would like to move from normal distributions in my likelihood to mixture ones that better fit the data. Here’s my original code:
with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Define priors
sigma = pm.HalfCauchy('sigma', beta=10)
# Get the groups from the dummy vars
mu = theDataFrame.loc[theDataFrame['group1'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group1'] == 1, varOfInterest].std()
group1 = pm.Normal('group1', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group2'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group2'] == 1, varOfInterest].std()
group2 = pm.Normal('group2', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group3'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group3'] == 1, varOfInterest].std()
group3 = pm.Normal('group3', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group4'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group4'] == 1, varOfInterest].std()
group4 = pm.Normal('group4', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group5'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group5'] == 1, varOfInterest].std()
group5 = pm.Normal('group5', mu=mu, sd=sd)
# Define likelihood
likelihood = pm.Normal(varOfInterest,
mu = group1 * theDataFrame['group1']
+ group2 * theDataFrame['group2']
+ group3 * theDataFrame['group3']
+ group4 * theDataFrame['group4']
+ group5 * theDataFrame['group5'],
sd=sigma,
observed=originalData[varOfInterest])
trace = pm.sample(draws=1000, tune=1000, chains=2, target_accept=0.95, cores=2, return_inferencedata=True)
This produces posterior distributions like this:
However, my data are not normally distributed (as you might see in the posteriors) so I’d like to use a distribution something like this:
Which is produced with this mixture:
w = pm.Dirichlet('w', a=np.array([0.3, 1]))
dist1 = pm.HalfNormal.dist(sigma=0.8)
dist2 = pm.Normal.dist(mu=10, sigma=1.5)
customDist = pm.Mixture('customDist', w=w, comp_dists = [dist1, dist2])
The problem is that I’m not figuring out how to combine these properly. Here’s what I have, but it generates an error.
with pm.Model() as model: # model specifications in PyMC3 are wrapped in a with-statement
# Define priors
sigma = pm.HalfCauchy('sigma', beta=10)
# Get the groups from the dummy vars
mu = theDataFrame.loc[theDataFrame['group1'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group1'] == 1, varOfInterest].std()
group1 = pm.Normal('group1', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group2'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group2'] == 1, varOfInterest].std()
group2 = pm.Normal('group2', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group3'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group3'] == 1, varOfInterest].std()
group3 = pm.Normal('group3', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group4'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group4'] == 1, varOfInterest].std()
group4 = pm.Normal('group4', mu=mu, sd=sd)
mu = theDataFrame.loc[theDataFrame['group5'] == 1, varOfInterest].mean()
sd = theDataFrame.loc[theDataFrame['group5'] == 1, varOfInterest].std()
group5 = pm.Normal('group5', mu=mu, sd=sd)
# Define likelihood
w = pm.Dirichlet('w', a=np.array([0.3, 1]))
dist1 = pm.HalfNormal.dist(sigma=0.8)
dist2 = pm.Normal.dist(mu = group1 * theDataFrame['group1']
+ group2 * theDataFrame['group2']
+ group3 * theDataFrame['group3']
+ group4 * theDataFrame['group4']
+ group5 * theDataFrame['group5'],
sigma=sigma)
likelihood = pm.Mixture(varOfInterest,
w=w,
comp_dists=[dist1, dist2],
observed=originalData[varOfInterest])
trace = pm.sample(draws=1000, tune=1000, chains=2, target_accept=0.95, cores=2, return_inferencedata=True)
Can someone set me straight here on how to integrate these two approaches? Thanks in advance!