Marginalized Mixture won't begin sampling/throws assertion error

Hi all,

I am encountering an error that I can’t identify in trying to use a Gaussian Mixture model. I tried to switch to a marginalized mixture to make it easier to compile, but it essentially won’t get off the ground at all. I’m fairly certain the model is well-specified- if I put in the data to scikit-learn’s Gaussian Mixture it will immediately spit out the two peaks on the hist/kde plot for ~160,000 observations. Here is the snippet (I can’t publicly share this specific data but can describe shapes- the graphviz indicates that everything looks kosher on that front)

with pm.Model(coords={'cluster': np.arange(2),'batters': mean_in_batters}) as Basic_Mixture_Model:
    mu = pm.Normal(
        'mu',
        mu = 80,
        sigma = 10,
        initval = [70,90],
        transform = pm.distributions.transforms.ordered,
        dims = 'cluster'
    )
    weights = pm.Dirichlet("w", np.ones(2), dims="cluster")
    tau = pm.Gamma("tau", 1.0, 1.0, dims="cluster")
    y = pm.NormalMixture("x", w=weights, mu=mu, tau = tau, observed=obs_ev_means,dims='batters')

    GM_trace = pm.sample_prior_predictive(1000)
    GM_trace.extend(pm.sample(chains=2, cores=4, random_seed=0))
    pm.sample_posterior_predictive(GM_trace, extend_inferencedata=True)

If I just call sample_prior_predictive, it will spit out essentially no separation- giving me two identical means. When I call extend, or try creating the trace directly by using pm.sample, I get the following Assertion Error

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[72], line 14
     11 tau = pm.Gamma("tau", 1.0, 1.0, dims="cluster")
     12 y = pm.NormalMixture("x", w=weights, mu=mu, tau = tau, observed=obs_ev_means, dims='batters')
---> 14 GM_trace = pm.sample(draws=1000,tune=1000, chains=2)

File ~\anaconda3\Lib\site-packages\pymc\sampling\mcmc.py:714, in sample(draws, tune, chains, cores, random_seed, progressbar, progressbar_theme, step, var_names, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, blas_cores, model, **kwargs)
    711         auto_nuts_init = False
    713 initial_points = None
--> 714 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
    716 if nuts_sampler != "pymc":
    717     if not isinstance(step, NUTS):

File ~\anaconda3\Lib\site-packages\pymc\sampling\mcmc.py:223, in assign_step_methods(model, step, methods, step_kwargs)
    221 if has_gradient:
    222     try:
--> 223         tg.grad(model_logp, var)  # type: ignore
    224     except (NotImplementedError, tg.NullTypeGradError):
    225         has_gradient = False

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:607, in grad(cost, wrt, consider_constant, disconnected_inputs, add_names, known_grads, return_disconnected, null_gradients)
    604     if hasattr(g.type, "dtype"):
    605         assert g.type.dtype in pytensor.tensor.type.float_dtypes
--> 607 _rval: Sequence[Variable] = _populate_grad_dict(
    608     var_to_app_to_idx, grad_dict, _wrt, cost_name
    609 )
    611 rval: MutableSequence[Variable | None] = list(_rval)
    613 for i in range(len(_rval)):

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1402, in _populate_grad_dict(var_to_app_to_idx, grad_dict, wrt, cost_name)
   1399     # end if cache miss
   1400     return grad_dict[var]
-> 1402 rval = [access_grad_cache(elem) for elem in wrt]
   1404 return rval

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1357, in _populate_grad_dict.<locals>.access_grad_cache(var)
   1355 for node in node_to_idx:
   1356     for idx in node_to_idx[node]:
-> 1357         term = access_term_cache(node)[idx]
   1359         if not isinstance(term, Variable):
   1360             raise TypeError(
   1361                 f"{node.op}.grad returned {type(term)}, expected"
   1362                 " Variable instance."
   1363             )

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1032, in _populate_grad_dict.<locals>.access_term_cache(node)
   1029 if node not in term_dict:
   1030     inputs = node.inputs
-> 1032     output_grads = [access_grad_cache(var) for var in node.outputs]
   1034     # list of bools indicating if each output is connected to the cost
   1035     outputs_connected = [
   1036         not isinstance(g.type, DisconnectedType) for g in output_grads
   1037     ]

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1357, in _populate_grad_dict.<locals>.access_grad_cache(var)
   1355 for node in node_to_idx:
   1356     for idx in node_to_idx[node]:
-> 1357         term = access_term_cache(node)[idx]
   1359         if not isinstance(term, Variable):
   1360             raise TypeError(
   1361                 f"{node.op}.grad returned {type(term)}, expected"
   1362                 " Variable instance."
   1363             )

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1032, in _populate_grad_dict.<locals>.access_term_cache(node)
   1029 if node not in term_dict:
   1030     inputs = node.inputs
-> 1032     output_grads = [access_grad_cache(var) for var in node.outputs]
   1034     # list of bools indicating if each output is connected to the cost
   1035     outputs_connected = [
   1036         not isinstance(g.type, DisconnectedType) for g in output_grads
   1037     ]

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1357, in _populate_grad_dict.<locals>.access_grad_cache(var)
   1355 for node in node_to_idx:
   1356     for idx in node_to_idx[node]:
-> 1357         term = access_term_cache(node)[idx]
   1359         if not isinstance(term, Variable):
   1360             raise TypeError(
   1361                 f"{node.op}.grad returned {type(term)}, expected"
   1362                 " Variable instance."
   1363             )

File ~\anaconda3\Lib\site-packages\pytensor\gradient.py:1187, in _populate_grad_dict.<locals>.access_term_cache(node)
   1179         if o_shape != g_shape:
   1180             raise ValueError(
   1181                 "Got a gradient of shape "
   1182                 + str(o_shape)
   1183                 + " on an output of shape "
   1184                 + str(g_shape)
   1185             )
-> 1187 input_grads = node.op.L_op(inputs, node.outputs, new_output_grads)
   1189 if input_grads is None:
   1190     raise TypeError(
   1191         f"{node.op}.grad returned NoneType, expected iterable."
   1192     )

File ~\anaconda3\Lib\site-packages\pytensor\graph\op.py:398, in Op.L_op(self, inputs, outputs, output_grads)
    371 def L_op(
    372     self,
    373     inputs: Sequence[Variable],
    374     outputs: Sequence[Variable],
    375     output_grads: Sequence[Variable],
    376 ) -> list[Variable]:
    377     r"""Construct a graph for the L-operator.
    378 
    379     The L-operator computes a row vector times the Jacobian.
   (...)
    396 
    397     """
--> 398     return self.grad(inputs, output_grads)

File ~\anaconda3\Lib\site-packages\pytensor\tensor\subtensor.py:1897, in IncSubtensor.grad(self, inputs, grads)
   1895         gx = g_output
   1896     gy = Subtensor(idx_list=self.idx_list)(g_output, *idx_list)
-> 1897     gy = _sum_grad_over_bcasted_dims(y, gy)
   1899 return [gx, gy] + [DisconnectedType()()] * len(idx_list)

File ~\anaconda3\Lib\site-packages\pytensor\tensor\subtensor.py:1933, in _sum_grad_over_bcasted_dims(x, gx)
   1931 x_dim_added = gx.ndim - x.ndim
   1932 x_broad = (True,) * x_dim_added + x.broadcastable
-> 1933 assert sum(gx.broadcastable) <= sum(x_broad)
   1934 axis_to_sum = []
   1935 for i in range(gx.ndim):

AssertionError: 

I very straightforwardly followed the examples so I’m not sure exactly what’s going wrong. Thanks for your help in advance.

What version of PyMC are you using? Try updating the the most recent if not yet on it.

5.16.1

If It’s any indication of anything also, when I call az.summary on the trace that’s formed from the pm.sample_prior_predictive, it also gives me the following warning:

arviz - WARNING - Shape validation failed: input_shape: (1, 1000), minimum_shape: (chains=2, draws=4)

I may be onto something- when I import pymc I get the following error

WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.

I am looking at a thread on this now- I did a fresh install of anaconda recently but I did not get the BLAS apparently.

That means your installation is suboptimal but doesn’t explain the issue. Can you provide a reproducible snippet with fake data?

I’m really starting to think that the NumPY C-API warning may be relevant, even if the model isn’t able to get off the ground here- I just ran a fairly straightforward logistic regression on ~65k observations with an intercept and 3 dependent variables and sampling took over 20 minutes. I have read several threads about fixing it but none of the fixes seem to be working great- would providing the conda list be helpful in this instance?

Using a mix of two gaussians I get the same AssertionError. Using three gaussians and it works fine.

Can you share some reproducible code?

RANDOM_SEED = 8927
rng = np.random.default_rng(RANDOM_SEED)

N = 1000

W = np.array([0.5, 0.5])

μ_orig = np.array([0.5, 2.5])
σ_orig = np.array([0.45, 0.5])

component = rng.choice(μ_orig.size, size=N, p=W)
data = rng.normal(μ_orig[component], σ_orig[component], size=N)

with pm.Model(coords={"cluster": np.arange(len(W)), "data_id": np.arange(N)}) as model_multi:
    w = pm.Dirichlet("w", np.ones_like(W))

    mu = pm.Normal(
        "mu",
        np.zeros_like(W),
        1.0,
        dims="cluster",
        transform=pm.distributions.transforms.ordered,
        initval=[1, 2],
    )
    tau = pm.Gamma("tau", 1.0, 1.0, dims="cluster")

    data_obs = pm.NormalMixture("data_obs", w, mu, tau=tau, observed=data, dims="data_id")
    trace = pm.sample(5000, n_init=10000, tune=1000, return_inferencedata=True)

The transform=pm.distributions.transforms.ordered is not playing nicely with the dims="cluster". Either of these seem to sample just fine:

    mu = pm.Normal(
        "mu",
        np.zeros_like(W),
        1.0,
        #dims="cluster",
        shape=2,
        transform=pm.distributions.transforms.ordered,
        initval=[1, 2],
    )

or

    mu = pm.Normal(
        "mu",
        np.zeros_like(W),
        1.0,
        dims="cluster",
        #transform=pm.distributions.transforms.ordered,
        initval=[1, 2],
    )

Any clues about why @ricardoV94 ?

Plus, as mentioned by @ulfjoh1523 , there’s something funny going on when n_components==2 that is different than what happens when n_components==3.

I can reproduce. PyTensor gradient is failing because it doesn’t know the length of the raw mu is (2,). You can pass shape=(2,) besides dims to avoid the error for now.

    mu = pm.Normal(
        "mu",
        np.zeros_like(W),
        1.0,
        dims="cluster",
        transform=pm.distributions.transforms.ordered,
        initval=[1, 2],
        shape=(2,)  # <- Note shape
    )

This is unrelated to the original topic, so any follow-up should be in a different thread. My bad, it’s exactly the same

Bug should be fixed in Fix bug from too strict type check in `_sum_grad_over_bcasted_dims` by ricardoV94 · Pull Request #1036 · pymc-devs/pytensor · GitHub

Thanks @Legstrong and @ulfjoh1523 for sharing the bug and @cluhmann for debugging! Apologies for the trouble. As I mentioned, you can get away for now by setting shape=(2,) besides dims.

1 Like

Thank you all for your help- this also may have been the issue underlying some of the other problems I posted about before. Appreciate the community support!