I’m new to pyMC3 and am still struggling to make sense of the API, with that said, my issue appears to be a dependency issue with multiprocessing
I’m attempting to make a mixture model, specifying each distribution separately and then assigning them to comp_dists
kwarg of Mixture()
as a list. See the following example (reproduced from the second example here: https://docs.pymc.io/api/distributions/mixture.html)
Running the following:
def poisson_mixtures(lams, sizes):
grp1 = np.random.poisson(lam=lams[0], size=sizes[0])
grp2 = np.random.poisson(lam=lams[1], size=sizes[1])
mixture = np.concatenate([grp1, grp2])
return mixture
mixture = poisson_mixtures(lams=[14, 30], sizes=[1000, 250])
with pm.Model() as model1:
lam1 = pm.Exponential('lam1', lam=1)
lam2 = pm.Exponential('lam2', lam=1)
pois1 = pm.Poisson.dist(mu=lam1)
pois2 = pm.Poisson.dist(mu=lam2)
w = pm.Dirichlet('w', a=np.array([1, 1]))
lh = pm.Mixture('lh', w=w, comp_dists = [pois1, pois2], observed=mixture)
step = pm.Metropolis()
trace1 = pm.sample(1000, step=step)
Which returns the following error:
AttributeError Traceback (most recent call last)
<ipython-input-33-4c8684396dbc> in <module>
20
21 step = pm.Metropolis()
---> 22 trace1 = pm.sample(1000, step=step)
c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
467 _print_step_hierarchy(step)
468 try:
--> 469 trace = _mp_sample(**sample_args)
470 except pickle.PickleError:
471 _log.warning("Could not pickle model, sampling singlethreaded.")
c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, **kwargs)
1052
1053 sampler = ps.ParallelSampler(
-> 1054 draws, tune, chains, cores, random_seed, start, step, chain, progressbar
1055 )
1056 try:
c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num, progressbar)
357 draws, tune, step_method, chain + start_chain_num, seed, start
358 )
--> 359 for chain, seed, start in zip(range(chains), seeds, start_points)
360 ]
361
c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in <listcomp>(.0)
357 draws, tune, step_method, chain + start_chain_num, seed, start
358 )
--> 359 for chain, seed, start in zip(range(chains), seeds, start_points)
360 ]
361
c:\users\jstanley\miniconda3\envs\dd\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
240 # We fork right away, so that the main process can start tqdm threads
241 try:
--> 242 self._process.start()
243 except IOError as e:
244 # Something may have gone wrong during the fork / spawn
c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\context.py in _Popen(process_obj)
320 def _Popen(process_obj):
321 from .popen_spawn_win32 import Popen
--> 322 return Popen(process_obj)
323
324 class SpawnContext(BaseContext):
c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
87 try:
88 reduction.dump(prep_data, to_child)
---> 89 reduction.dump(process_obj, to_child)
90 finally:
91 set_spawning_popen(None)
c:\users\jstanley\miniconda3\envs\dd\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #
AttributeError: Can't pickle local object 'Mixture._comp_dist_random_wrapper.<locals>.wrapped_random'
If I instead use a multidimensional pymc3 distribution like this:
lam = pm.Exponential("lam", lam=1, shape=(2,))
components = pm.Poisson.dist(mu=lam, shape=(2,))
Specifying the mixtures like this:
lh = pm.Mixture('lh', w=w, comp_dists = components, observed=mixture)
It does not produce the same pickle related error. Is this a known issue (I couldn’t find evidence of it online) and if so, is there a workaround that still allows me to independently specify different distributions for the mixture? Thanks for any help.