Hi PyMC3 team.
I am currently trying to model the distribution for a dataset of historical minimal temperature.
The code is:
data = [9.8, 11.1, 6.8, 13.6, 11.4, 10.5, 9.2, 10. , 9.9, 10.5, 10.8,
8.7, 8.7, 12.2, 10.1, 4.2, 10.3, 11.7, 6.8, 11.7, 11.4, 11.9,
11.3, 8.5, 8.3, 12. , 11.4, 10.2, 12.5]
I choose the mielke distribution, because seems to be the best fit among many.
The proble here is to code it with pm.DensityDist()
def mielke(x, k, s, loc, scale):
return k*x**(k-1)/(1+xs)(1+k/s) #Mielke
def mielke_rng(loc, scale, k, s, point=None, size=None):
# draw a numerical value for the parameters
loc_, scale_, k_, s_ = pm.distributions.draw_values([loc, scale, k, s], point=point)
size = 1 if size is None else size
return pm.distributions.generate_samples(stats.norm.rvs, loc=loc_, scale=scale_, k=k_, s=s_, size=size, broadcast_shape=(size,)) # I am not sure of that!!!
with pm.Model() as M1:
loc = pm.Normal(‘loc’, mu=-7, sd=5)
scale = pm.Normal(‘scale’, mu= 19, sd=5)
k = pm.Normal(‘k’, mu=9, sd=5)
s = pm.Normal(‘s’, mu=40, sd=5)
y = pm.DensityDist('y',logp=mielke,observed={'loc':loc, 'scale':scale, 's':s, 'k':k, 'x': data}, random=mielke_rng)
with M1:
trace = pm.sample(draws=1000, tune= 1000, return_inferencedata=True, idata_kwargs={“density_dist_obs”: False}, chains=4)
First Problem: if I choose “chains=4” I get this error:
Auto-assigning NUTS sampler…
INFO:pymc3:Auto-assigning NUTS sampler…
Initializing NUTS using jitter+adapt_diag…
INFO:pymc3:Initializing NUTS using jitter+adapt_diag…
Multiprocess sampling (4 chains in 4 jobs)
INFO:pymc3:Multiprocess sampling (4 chains in 4 jobs)
NUTS: [s, k, scale, loc]
INFO:pymc3:NUTS: [s, k, scale, loc]
0.00% [0/8000 00:00<00:00 Sampling 4 chains, 0 divergences]
RemoteTraceback Traceback (most recent call last)
RemoteTraceback:
“”"
Traceback (most recent call last):
File “C:\Users\eid0107418\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py”, line 116, in _unpickle_step_method
self._step_method = pickle.loads(self._step_method)
AttributeError: Can’t get attribute ‘mielke_rng’ on <module ‘main’ (built-in)>
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File “C:\Users\eid0107418\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py”, line 135, in run
self._unpickle_step_method()
File “C:\Users\eid0107418\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py”, line 118, in _unpickle_step_method
raise ValueError(unpickle_error)
ValueError: The model could not be unpickled. This is required for sampling with more than one core and multiprocessing context spawn or forkserver.
“”"
The above exception was the direct cause of the following exception:
ValueError Traceback (most recent call last)
File ~\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py:135, in run()
132 try:
133 # We do not create this in init, as pickling this
134 # would destroy the shared memory.
→ 135 self._unpickle_step_method()
136 self._point = self._make_numpy_refs()
File ~\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py:118, in _unpickle_step_method()
117 except Exception:
→ 118 raise ValueError(unpickle_error)
119 elif self._pickle_backend == “dill”:
ValueError: The model could not be unpickled. This is required for sampling with more than one core and multiprocessing context spawn or forkserver.
The above exception was the direct cause of the following exception:
RuntimeError Traceback (most recent call last)
Input In [247], in <cell line: 1>()
1 with M1:
----> 2 trace = pm.sample(draws=1000, tune= 1000, return_inferencedata=True, idata_kwargs={“density_dist_obs”: False}, chains=4)
File ~\Anaconda3-2022\lib\site-packages\pymc3\sampling.py:559, in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, pickle_backend, **kwargs)
557 _print_step_hierarchy(step)
558 try:
→ 559 trace = _mp_sample(**sample_args, **parallel_args)
560 except pickle.PickleError:
561 _log.warning(“Could not pickle model, sampling singlethreaded.”)
File ~\Anaconda3-2022\lib\site-packages\pymc3\sampling.py:1477, in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, callback, discard_tuned_samples, mp_ctx, pickle_backend, **kwargs)
1475 try:
1476 with sampler:
→ 1477 for draw in sampler:
1478 trace = traces[draw.chain - chain]
1479 if trace.supports_sampler_stats and draw.stats is not None:
File ~\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py:479, in ParallelSampler.iter(self)
476 self._progress.update(self._total_draws)
478 while self._active:
→ 479 draw = ProcessAdapter.recv_draw(self._active)
480 proc, is_last, draw, tuning, stats, warns = draw
481 self._total_draws += 1
File ~\Anaconda3-2022\lib\site-packages\pymc3\parallel_sampling.py:359, in ProcessAdapter.recv_draw(processes, timeout)
357 else:
358 error = RuntimeError(“Chain %s failed.” % proc.chain)
→ 359 raise error from old_error
360 elif msg[0] == “writing_done”:
361 proc._readable = True
RuntimeError: Chain 0 failed.
If I put “chains=1” no problem at all
Actually the problem with “chains=4” rises only when I try to define the “random method” for the DensityDist.
If I do not define the “random method” everithing smooth (at list apparently).
Moving on, the second problem:
I try to get the sample_posterior_predictive, but I get lots of errors:
with M1:
y_pred = pm.sample_posterior_predictive(trace, samples = 200, model=M1)
0.00% [0/200 00:00<00:00]
TypeError Traceback (most recent call last)
Input In [249], in <cell line: 1>()
1 with M1:
----> 2 y_pred = pm.sample_posterior_predictive(trace, samples = 200, model=M1)
File ~\Anaconda3-2022\lib\site-packages\pymc3\sampling.py:1733, in sample_posterior_predictive(trace, samples, model, var_names, size, keep_size, random_seed, progressbar)
1728 # there’s only a single chain, but the index might hit it multiple times if
1729 # the number of indices is greater than the length of the trace.
1730 else:
1731 param = trace[idx % len_trace]
→ 1733 values = draw_values(vars, point=param, size=size)
1734 for k, v in zip(vars_, values):
1735 ppc_trace_t.insert(k.name, v, idx)
File ~\Anaconda3-2022\lib\site-packages\pymc3\distributions\distribution.py:791, in draw_values(params, point, size)
787 temp_givens = [givens[k] for k in givens if k in children]
788 try:
789 # This may fail for autotransformed RVs, which don’t
790 # have the random method
→ 791 value = draw_value(next, point=point, givens=temp_givens, size=size)
792 givens[next_.name] = (next_, value)
793 drawn[(next_, size)] = value
File ~\Anaconda3-2022\lib\site-packages\pymc3\distributions\distribution.py:990, in _draw_value(param, point, givens, size)
988 return dist_tmp.random(point=point, size=size)
989 else:
→ 990 return param.distribution.random(point=point, size=size)
991 else:
992 if givens:
File ~\Anaconda3-2022\lib\site-packages\pymc3\distributions\distribution.py:577, in DensityDist.random(self, point, size, **kwargs)
575 size = to_tuple(size)
576 with _DrawValuesContextBlocker():
→ 577 test_draw = generate_samples(
578 self.rand,
579 size=None,
580 not_broadcast_kwargs=not_broadcast_kwargs,
581 )
582 test_shape = test_draw.shape
583 if self.shape[: len(size)] == size:
File ~\Anaconda3-2022\lib\site-packages\pymc3\distributions\distribution.py:1116, in generate_samples(generator, *args, **kwargs)
1100 raise TypeError(
1101 “”"Attempted to generate values with incompatible shapes:
1102 size: {size}
(…)
1113 )
1114 )
1115 if dist_bcast_shape[: len(size_tup)] == size_tup:
→ 1116 samples = generator(size=dist_bcast_shape, *args, **kwargs)
1117 else:
1118 samples = generator(size=size_tup + dist_bcast_shape, *args, **kwargs)
TypeError: mielke_rng() missing 4 required positional arguments: ‘loc’, ‘scale’, ‘k’, and ‘s’
This is due to the fact that I haven’t coded the “random method” correctly: actually I did not find a tutorial to code the DensityDistr and often could be of use to customize likelihood, because the ones available suffice not.
And yes, I am quite new in the use of PyMC3.
Any idea what I am doing wrong?
Thank you for your help.