Input dimension mismatch in hierarchical mixture model

I’m fairly new to PyMC3, and I’m trying to put together a hierarchical model where I try to infer the evolution of the Dirichlet concentration parameters controlling a changing mixture of two distributions. When I try to specify the model the way that seems intuitive to me, I get a `ValueError: Input dimension mis-match.` I don’t understand where I need to specify shapes in order to get this to work. I share below first my data generation, and then my attempt at the model.

``````import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import pandas as pd

def linear_function(x, c0, c1):
return c0 + c1 * (x - 0.5)

def generate_data(N=1000):
"""Generate data where mixture concentration varies
"""
x = np.random.random(N)

c0_0 = 0
c1_0 = 2

c0_1 = 0
c1_1 = -2

log_alpha0 = linear_function(x, c0_0, c1_0)
log_alpha1 = linear_function(x, c0_1, c1_1)

alpha0 = np.exp(log_alpha0)
alpha1 = np.exp(log_alpha1)

data = np.zeros(N)

for i in range(N):
weights = st.dirichlet(alpha=[alpha0[i], alpha1[i]]).rvs().squeeze()

u = np.random.random()

if u < weights[0]:
data[i] = st.beta(a=3, b=1).rvs()
else:
data[i] = np.random.normal(3, 1)

return pd.DataFrame({"x": x, "y": data})

data = generate_data()
data.plot.scatter("x", "y")
``````

``````import pymc3 as pm
from theano import tensor as tt
import theano

with pm.Model() as model:

x = pm.Data('x', data['x'])
y = pm.Data('y', data['y'])

# linear hyperparams
c0_0 = pm.Uniform('c0_0', -10, 10)
c1_0 = pm.Uniform('c1_0', -10, 10)

c0_1 = pm.Uniform('c0_1', -10, 10)
c1_1 = pm.Uniform('c1_1', -10, 10)

log_alpha0 = pm.Deterministic('log_alpha0', linear_function(x, c0_0, c1_0))
alpha0 = pm.Deterministic('alpha0', pm.math.exp(log_alpha0))

log_alpha1 = pm.Deterministic('log_alpha1', linear_function(x, c0_1, c1_1))
alpha1 = pm.Deterministic('alpha1', pm.math.exp(log_alpha1))

alpha = pm.Deterministic('alpha', tt.stack([alpha0, alpha1]))

w = pm.Dirichlet('w', a=alpha)

distributions = [pm.Beta.dist(alpha=3, beta=1), pm.Normal.dist(mu=3, sigma=1)]

like = pm.Mixture('like', w=w, comp_dists=distributions, observed=y)
``````

This gives the following:

``````---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-5-e309e2b11551> in <module>
30     distributions = [pm.Beta.dist(alpha=3, beta=1), pm.Normal.dist(mu=3, sigma=1)]
31
---> 32     like = pm.Mixture('like', w=w, comp_dists=distributions, observed=y, shape=1000)
33
34 #     # Trying suggestion from https://discourse.pymc.io/t/problem-with-fitting-multivariate-mixture-of-gaussians/846/2

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/pymc3/distributions/distribution.py in __new__(cls, name, *args, **kwargs)
117         # Some distributions do not accept shape=None
118         if has_shape or shape is not None:
--> 119             dist = cls.dist(*args, **kwargs, shape=shape)
120         else:
121             dist = cls.dist(*args, **kwargs)

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/pymc3/distributions/distribution.py in dist(cls, *args, **kwargs)
128     def dist(cls, *args, **kwargs):
129         dist = object.__new__(cls)
--> 130         dist.__init__(*args, **kwargs)
131         return dist
132

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/pymc3/distributions/mixture.py in __init__(self, w, comp_dists, *args, **kwargs)
155
156             try:
--> 157                 self.mean = (w * self._comp_means()).sum(axis=-1)
158
159                 if "mean" not in defaults:

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/theano/tensor/var.py in __mul__(self, other)
126         # and the return value in that case
127         try:
--> 128             return theano.tensor.mul(self, other)
129         except (NotImplementedError, TypeError):
130             return NotImplemented

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/theano/graph/op.py in __call__(self, *inputs, **kwargs)
251
252         if config.compute_test_value != "off":
--> 253             compute_test_value(node)
254
255         if self.default_output is not None:

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/theano/graph/op.py in compute_test_value(node)
128     thunk.outputs = [storage_map[v] for v in node.outputs]
129
--> 130     required = thunk()
131     assert not required  # We provided all inputs
132

~/miniconda3/envs/grey-matter/lib/python3.7/site-packages/theano/graph/op.py in rval()
604
605         def rval():
--> 606             thunk()
607             for o in node.outputs:
608                 compute_map[o][0] = True