AttributeError: Can't pickle local object 'Mixture._comp_dist_random_wrapper.<locals>.wrapped_random'

I’m new to pyMC3 and am still struggling to make sense of the API, with that said, my issue appears to be a dependency issue with multiprocessing

I’m attempting to make a mixture model, specifying each distribution separately and then assigning them to comp_dists kwarg of Mixture() as a list. See the following example (reproduced from the second example here: https://docs.pymc.io/api/distributions/mixture.html)

Running the following:

def poisson_mixtures(lams, sizes):
    grp1 = np.random.poisson(lam=lams[0], size=sizes[0])
    grp2 = np.random.poisson(lam=lams[1], size=sizes[1])

    mixture = np.concatenate([grp1, grp2])
    return mixture

mixture = poisson_mixtures(lams=[14, 30], sizes=[1000, 250])

with pm.Model() as model1:
    lam1 = pm.Exponential('lam1', lam=1)
    lam2 = pm.Exponential('lam2', lam=1)

    pois1 = pm.Poisson.dist(mu=lam1)
    pois2 = pm.Poisson.dist(mu=lam2)

    w = pm.Dirichlet('w', a=np.array([1, 1]))
    
    lh = pm.Mixture('lh', w=w, comp_dists = [pois1, pois2], observed=mixture)
    
    step = pm.Metropolis()
    trace1 = pm.sample(1000, step=step)

Which returns the following error:

AttributeError                            Traceback (most recent call last)
<ipython-input-33-4c8684396dbc> in <module>
     20 
     21     step = pm.Metropolis()
---> 22     trace1 = pm.sample(1000, step=step)

c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
    467         _print_step_hierarchy(step)
    468         try:
--> 469             trace = _mp_sample(**sample_args)
    470         except pickle.PickleError:
    471             _log.warning("Could not pickle model, sampling singlethreaded.")

c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, **kwargs)
   1052 
   1053     sampler = ps.ParallelSampler(
-> 1054         draws, tune, chains, cores, random_seed, start, step, chain, progressbar
   1055     )
   1056     try:

c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num, progressbar)
    357                 draws, tune, step_method, chain + start_chain_num, seed, start
    358             )
--> 359             for chain, seed, start in zip(range(chains), seeds, start_points)
    360         ]
    361 

c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in <listcomp>(.0)
    357                 draws, tune, step_method, chain + start_chain_num, seed, start
    358             )
--> 359             for chain, seed, start in zip(range(chains), seeds, start_points)
    360         ]
    361 

c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
    240         # We fork right away, so that the main process can start tqdm threads
    241         try:
--> 242             self._process.start()
    243         except IOError as e:
    244             # Something may have gone wrong during the fork / spawn

c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\process.py in start(self)
    110                'daemonic processes are not allowed to have children'
    111         _cleanup()
--> 112         self._popen = self._Popen(self)
    113         self._sentinel = self._popen.sentinel
    114         # Avoid a refcycle if the target function holds an indirect

c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\context.py in _Popen(process_obj)
    221     @staticmethod
    222     def _Popen(process_obj):
--> 223         return _default_context.get_context().Process._Popen(process_obj)
    224 
    225 class DefaultContext(BaseContext):

c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\context.py in _Popen(process_obj)
    320         def _Popen(process_obj):
    321             from .popen_spawn_win32 import Popen
--> 322             return Popen(process_obj)
    323 
    324     class SpawnContext(BaseContext):

c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
     87             try:
     88                 reduction.dump(prep_data, to_child)
---> 89                 reduction.dump(process_obj, to_child)
     90             finally:
     91                 set_spawning_popen(None)

c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
     58 def dump(obj, file, protocol=None):
     59     '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60     ForkingPickler(file, protocol).dump(obj)
     61 
     62 #

AttributeError: Can't pickle local object 'Mixture._comp_dist_random_wrapper.<locals>.wrapped_random'

If I instead use a multidimensional pymc3 distribution like this:

lam = pm.Exponential("lam", lam=1, shape=(2,))
components = pm.Poisson.dist(mu=lam, shape=(2,)) 

Specifying the mixtures like this:

lh = pm.Mixture('lh', w=w, comp_dists = components, observed=mixture)

It does not produce the same pickle related error. Is this a known issue (I couldn’t find evidence of it online) and if so, is there a workaround that still allows me to independently specify different distributions for the mixture? Thanks for any help.

Could you try removing step = pm.Metropolis() and just call trace = pm.sample(1000)?

Yes. Doing so produces the exact same error.

OK, it could be one of those windows related error like

could you please raise a bug on Github?

Setting the number of cores equal to 1 solved the issue. Specifically:

trace1 = pm.sample(1000, cores=1)

I’ll raise a bug on Github.

1 Like

I’m on a Mac, but I’m seeing the same error when trying to pickle a model that contains a mixture where one of the components of the mixture is itself a mixture. Here’s a MWE to reproduce what I’m seeing:

import numpy as np
import pymc3 as pm
import theano.tensor as tt


class MixHelper:

    def __init__(self, pv1, pv2, w):
        self.pv1 = pv1
        self.pv2 = pv2
        self.w = w

    def __call__(self, pt):
        a = self.pv1.logp(pt).sum() + tt.log(self.w[0])
        b = self.pv2.logp(pt).sum() + tt.log(self.w[1])
        return pm.logsumexp([a, b])


def get_model():
    # Number of prior mixture components:
    with pm.Model() as model:
        pv1 = pm.Normal.dist(np.zeros(3), 10., shape=3)

        pv2dists = []
        for k in range(3):
            pvtmp = pm.Normal.dist(np.random.random(size=3), 5., shape=3)
            pv2dists.append(pvtmp)
        pv2 = pm.Mixture.dist(w=np.array([0.6, 0.3, 0.1]),
                            comp_dists=pv2dists,
                            shape=3)

        w = pm.Dirichlet('w', a=np.ones(2))
        mixlogp = MixHelper(pv1, pv2, w)
        v = pm.DensityDist('v', mixlogp, shape=3)

    return model


def main():
    import pickle
    model = get_model()

    with open('test.pkl', 'wb') as f:
        pickle.dump(model, f)

    with open('test.pkl', 'rb') as f:
        model = pickle.load(f)


if __name__ == '__main__':
    main()

Here’s the error I get from running this on macOS 10.15.5, Python v3.7, pymc3 v3.9.2:

Traceback (most recent call last):
File "simple_test.py", line 61, in <module>
    main()
File "simple_test.py", line 47, in main
    pickle.dump(model, f)
AttributeError: Can't pickle local object 'Mixture._comp_dist_random_wrapper.<locals>.wrapped_random'

Ah, I see now — this happens because of the way Mixture._comp_dist_random_wrapper is implemented, which prevents defining a model within a function or method. So one option for me is to move the model definition out of the function. An alternate fix for my use case was to get rid of pm.Mixture.dist() and use my custom mixture class instead:

import numpy as np
import pymc3 as pm
import theano.tensor as tt


class MixHelper:

    def __init__(self, K, w, ps):
        self.K = K
        self.ps = ps
        self.w = w

    def __call__(self, pt):
        logps = []
        for k in range(self.K):
            logp = self.ps[k].logp(pt).sum() + tt.log(self.w[k])
            logps.append(logp)
        return pm.logsumexp(logps)


def get_model():
    # Number of prior mixture components:
    with pm.Model() as model:
        pv1 = pm.Normal.dist(np.zeros(3), 10., shape=3)

        pv2dists = []
        for k in range(3):
            pvtmp = pm.Normal.dist(np.random.random(size=3), 5., shape=3)
            pv2dists.append(pvtmp)
        pv2 = pm.DensityDist.dist(
            MixHelper(K=3, w=np.array([0.6, 0.3, 0.1]), ps=pv2dists),
            shape=3)

        w = pm.Dirichlet('w', a=np.ones(2))
        v = pm.DensityDist('v', MixHelper(K=2, w=w, ps=[pv1, pv2]), shape=3)

    return model


def main():
    import pickle
    model = get_model()

    with open('test.pkl', 'wb') as f:
        pickle.dump(model, f)

    with open('test.pkl', 'rb') as f:
        model = pickle.load(f)


if __name__ == '__main__':
    main()

I’m having the same issue. I’m not seeing anything on GitHub, did you create an issue that I’m not finding?

I also haven’t figured out a work-around besides cores=1, which is really unfortunate. I’d define a multi-dimensional distribution like in your example, but the mixture models I’m trying to create are mixtures of different distributions.