Seems to run fine for me:
NumberOfFeatures = 5
NumberOfClusters = 3
y = np.random.randn(1000, NumberOfFeatures)
with pm.Model() as Model:
# Create a covariance matrix for each potential cluster which relates all features of our data
Lower = tt.stack([pm.LKJCholeskyCov('Sigma_{}'.format(k), n=NumberOfFeatures, eta=2.,
sd_dist=pm.HalfNormal.dist(sd=1.)) for k in range(NumberOfClusters)])
Chol = tt.stack([pm.expand_packed_triangular(
NumberOfFeatures, Lower[k]) for k in range(NumberOfClusters)])
# The center of each cluster
Mus = tt.stack([pm.Uniform('Mu_{}'.format(k), lower=0., upper=1.,
shape=NumberOfFeatures) for k in range(NumberOfClusters)])
# Create the multivariate normal distribution for each cluster
MultivariateNormals = [pm.MvNormal.dist(
Mus[k], chol=Chol[k], shape=NumberOfFeatures) for k in range(NumberOfClusters)]
# Create the weights for each cluster which measures how much impact they have
Weights = pm.Dirichlet('w', np.ones(NumberOfClusters) / NumberOfClusters)
Prob = pm.Mixture("prob", w=Weights, comp_dists=MultivariateNormals,
observed=y)