I am pretty new to Gaussian Mixture Modeling. I am following a few tutorials and all of them take a lot of time to fit.
The first one is the most intuitive one
import pymc as pm
import numpy as np
import matplotlib.pyplot as plt
## Generate data with K components.
# Means, standard deviations, proportions
mus = [ 0, 6,-5]
sigmas = [ 1, 1.5, 3]
ps = [.2, .5,.3]
# Total amount of data
N = 1000
# Stack data into a single array
data = np.hstack([np.random.normal(mus[0], sigmas[0], int(ps[0]*N)),
np.random.normal(mus[1], sigmas[1], int(ps[1]*N)),
np.random.normal(mus[2], sigmas[2], int(ps[2]*N))])
## Build model
gmm = pm.Model()
# Specify number of groups
K = 3
with gmm:
# Prior over z
p = pm.Dirichlet('p', a=np.array([1.]*K))
# z is the component that the data point is being sampled from.
# Since we have N data points, z should be a vector with N elements.
z = pm.Categorical('z', p=p, shape=N)
# Prior over the component means and standard deviations
mu = pm.Normal('mu', mu=0., sigma=10., shape=K)
sigma = pm.HalfCauchy('sigma', beta=1., shape=K)
# Specify the likelihood
Y_obs = pm.Normal('Y_obs', mu=mu[z], sigma=sigma[z], observed=data)
trace = pm.sample()
and the second one is from the Mixture Docs of PyMC,
n_components = 3
with pm.Model() as gauss_mix:
mu = pm.Normal("mu", mu=data.mean(),
sigma=10,
shape=n_components,
transform=pm.distributions.transforms.univariate_ordered,
initval=[1, 2, 3],
)
sigma = pm.HalfNormal("sigma", sigma=10, shape=n_components)
weights = pm.Dirichlet("w", np.ones(n_components))
y_ = pm.NormalMixture("y_", w=weights, mu=mu, sigma=sigma, observed=data)
trace = pm.sample()
What is going on here? I was reading about reparametrizations, but only in the case of Multivariate Mixture Models. Why is taking so long?