Hi, yes that’s why I use the Dirichlet to model my data: each sample is a distribution over K items but I do not now much about the underlying generative process. So I thought that using a Dirichlet would be appropriate and want to look at changing concentration parameters depending on subsets of the whole sample.
Initiating the model seems to work, I don’t get any error and can call both the gamma
and theta
variables. But when I try to run trace = pm.sample(draws=1000)
(of course with in the context manager), my notebook server crashes with the error message:
Server Connection Error
A connection to the Jupyter server could not be established. JupyterLab will continue trying to reconnect. Check your network connection or Jupyter server configuration.
I am sure that calling sample
is the cause for this but don’t know what I’d have to change.
When I reproduced the code in the ipython CLI, I got the following error message:
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [gamma]
Sampling 4 chains, 0 divergences: 0%| | 0/6000 [00:00<?, ?draws/s]T
raceback (most recent call last):
File "<string>", line 1, in <module>
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 105, in spa
wn_main
exitcode = _main(fd)
File "C:\ProgramData\Anaconda3\lib\multiprocessing\spawn.py", line 115, in _ma
in
self = reduction.pickle.load(from_parent)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\compile\function_modul
e.py", line 1082, in _constructor_Function
f = maker.create(input_storage, trustme=True)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\compile\function_modul
e.py", line 1715, in create
input_storage=input_storage_lists, storage_map=storage_map)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\link.py", line 699
, in make_thunk
storage_map=storage_map)[:3]
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\vm.py", line 1091,
in make_all
impl=impl))
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\op.py", line 955,
in make_thunk
no_recycling)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\op.py", line 858,
in make_c_thunk
output_storage=node_output_storage)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cc.py", line 1217,
in make_thunk
keep_lock=keep_lock)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cc.py", line 1157,
in __compile__
keep_lock=keep_lock)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cc.py", line 1623,
in cthunk_factory
module = get_module_cache().module_from_key(
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cc.py", line 48, i
n get_module_cache
return cmodule.get_module_cache(config.compiledir, init_args=init_args)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line
1587, in get_module_cache
_module_cache = ModuleCache(dirname, **init_args)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line
703, in __init__
self.refresh()
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\cmodule.py", line
826, in refresh
key_data = pickle.load(f)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\tensor\elemwise.py", l
ine 412, in __setstate__
super(Elemwise, self).__setstate__(d)
File "C:\ProgramData\Anaconda3\lib\site-packages\theano\gof\op.py", line 1160,
in __setstate__
self.__dict__.update(d)
KeyboardInterrupt
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\sampling.py in _mp_sample(draws
, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, mode
l, **kwargs)
1058 with sampler:
-> 1059 for draw in sampler:
1060 trace = traces[draw.chain - chain]
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __iter_
_(self)
393 while self._active:
--> 394 draw = ProcessAdapter.recv_draw(self._active)
395 proc, is_last, draw, tuning, stats, warns = draw
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\parallel_sampling.py in recv_dr
aw(processes, timeout)
283 pipes = [proc._msg_pipe for proc in processes]
--> 284 ready = multiprocessing.connection.wait(pipes)
285 if not ready:
C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py in wait(object_list,
timeout)
858
--> 859 ready_handles = _exhaustive_wait(waithandle_to_obj.keys(), t
imeout)
860 finally:
C:\ProgramData\Anaconda3\lib\multiprocessing\connection.py in _exhaustive_wait(h
andles, timeout)
790 while L:
--> 791 res = _winapi.WaitForMultipleObjects(L, False, timeout)
792 if res == WAIT_TIMEOUT:
KeyboardInterrupt:
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-14-1a746dc8612f> in <module>
2 gamma = pm.Gamma("gamma", alpha=1., beta=1., shape=K)
3 theta = pm.Dirichlet("theta", a=gamma, shape=K, observed=x_)
----> 4 trace = pm.sample(draws=1000)
5
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\sampling.py in sample(draws, st
ep, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, mod
el, random_seed, discard_tuned_samples, compute_convergence_checks, **kwargs)
467 _print_step_hierarchy(step)
468 try:
--> 469 trace = _mp_sample(**sample_args)
470 except pickle.PickleError:
471 _log.warning("Could not pickle model, sampling singlethreade
d.")
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\sampling.py in _mp_sample(draws
, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, mode
l, **kwargs)
1078 return MultiTrace(traces)
1079 except KeyboardInterrupt:
-> 1080 traces, length = _choose_chains(traces, tune)
1081 return MultiTrace(traces)[:length]
1082 finally:
C:\ProgramData\Anaconda3\lib\site-packages\pymc3\sampling.py in _choose_chains(t
races, tune)
1094 lengths = [max(0, len(trace) - tune) for trace in traces]
1095 if not sum(lengths):
-> 1096 raise ValueError("Not enough samples to build a trace.")
1097
1098 idxs = np.argsort(lengths)[::-1]
ValueError: Not enough samples to build a trace.
The sample that I passed to observed
has shape=(12625,12)
and I would assume that the sample size is not really the problem. To test that PyMC3 does not interprete x
as a single sample of shape (12625,12)
, I passed shape=x.shape
to the initialization of theta
but got the same error.