My PyMC3 model fails to run pm.sample(...)
when K = 10, but for smaller values of K (like 3), it runs fine. Does this throw an error because my model is too big for PyMC3 to handle?
As you can see, I am trying to do Bayesian time series analysis. The model is Markov chain but is also bayesian. This model is a basic one and I intend to do this for very large amount of data
Question 1. Is there anyway so I can avoid the error Max. recursion depth exceeded?
and get this running?
I previously got the same error a different but similar model: where i was doing theta[i] = pm.Normal(...)
inside an ‘i’ loop (also for other vectorised priors lambda0, lambda1 etc…) which I guess is inefficient. Later, I got rid of the error by vectorising theta and other priors using “shape” and this got rid of the error in my previous model.
Question 2. Is there any way I can vectorise the distributions in my code even more?
Question 3. Why is this error being caused? Is it because its too large for PyMC3 to handle?
Question 4. Is there any other better tool (Python or otherwise but preferably in Python) that is faster for Bayesian time series analysis? I read somewhere that stan/PyStan is faster than PyMC3 at least for Bayesian time series analysis. Is this true? Is there anything faster than stan/PyStan? How does edwardlib compare to PyMC3 and PyStan for the same purpose?
By the way: I need to do this on really large amount of data: ~ 200000 observations would be okay. Running on 10M records would be wonderful: But, I do not know if PyMC (or any other package for that matter) can fit on such a large dataset even if one uses Variational Inference instead of MCMC. Can someone please confirm this?
trace = None
summary_df = None
I = 1
K = 10
MAXSKILLS = 4
with pm.Model() as hotDINA:
# Priors: bk, ak, learn_k, ones, ss_k, g_k
theta = pm.Normal('theta', mu=0.0, sd=1.0, shape=(I, 1))
lambda0 = pm.Normal('lambda0', mu=0.0, sd=1.0, shape=(K, 1)) #bk
lambda1 = pm.Uniform('lambda1', 0.0, 2.5, shape=(K, 1)) #ak
learn = pm.Beta('learn', alpha=1, beta=1, shape=(K, 1))
ones = pm.Bernoulli('known', p=1, shape=(K, 1))
ss = pm.Uniform('ss', 0.5, 1.0, shape=(K, 1))
g = pm.Uniform('g', 0, 0.5, shape=(K, 1))
for i in range(I):
# t = 0
for k in range(K):
prob[i][0][k] = pm.math.invlogit((1.7) * lambda1[k,0] * (theta[i,0] - lambda0[k,0]))
alpha_name = 'alpha[' + str(i) + ',0,' + str(k) + ']'
alpha[i][0][k] = pm.Bernoulli(alpha_name, prob[i][0][k])
for s in range(MAXSKILLS):
idx = int(idxY[i][0][s] - 1)
if idx >= K: continue
py[i][0][idx] = pow(ss[idx,0], alpha[i][0][idx]) * pow(g[idx,0], (1-alpha[i][0][idx]))
# t = 1,2...T[i]-1
for t in tqdm(range(1, T[i])):
for k in range(K):
alpha[i][t][k] = pm.math.switch(alpha[i][t-1][k], ones[k,0], learn[k,0])
for s in range(MAXSKILLS):
idx = int(idxY[i][t][s] - 1)
if idx >= K: continue
py[i][t][idx] = pow(ss[idx,0], alpha[i][t][idx]) * pow(g[idx,0], (1-alpha[i][t][idx]))
start = time.time()
for t in tqdm(range(T[i])):
for k in range(K):
if py[i][t][k] != 0:
Y[i][t][k] = pm.Bernoulli(f'y_{i}_{t}_{k}', p=py[i][t][k], observed=[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0])
end = time.time()
res = (end - start)/T[i]
print("TIME w/o mini batch: ", res)
print("DONE")
trace = pm.sample(1000, tune=500)
pm.save_trace(trace=trace, directory=".pymc_1.trace", overwrite=True)
pm.traceplot(trace, ['theta', 'lambda0', 'lambda1'])
summary_df = pm.stats.summary(trace)
Traceback:
RecursionError Traceback (most recent call last)
<ipython-input-36-c58d2f4da611> in <module>
48
49 print("DONE")
---> 50 trace = pm.sample(1000, tune=500)
51 pm.save_trace(trace=trace, directory=".pymc_1.trace", overwrite=True)
52 pm.traceplot(trace, ['theta', 'lambda0', 'lambda1'])
D:\anaconda3\lib\site-packages\pymc3\sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, return_inferencedata, idata_kwargs, **kwargs)
520 _print_step_hierarchy(step)
521 try:
--> 522 trace = _mp_sample(**sample_args)
523 except pickle.PickleError:
524 _log.warning("Could not pickle model, sampling singlethreaded.")
D:\anaconda3\lib\site-packages\pymc3\sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, callback, **kwargs)
1410
1411 sampler = ps.ParallelSampler(
-> 1412 draws, tune, chains, cores, random_seed, start, step, chain, progressbar
1413 )
1414 try:
D:\anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, chains, cores, seeds, start_points, step_method, start_chain_num, progressbar)
372 draws, tune, step_method, chain + start_chain_num, seed, start
373 )
--> 374 for chain, seed, start in zip(range(chains), seeds, start_points)
375 ]
376
D:\anaconda3\lib\site-packages\pymc3\parallel_sampling.py in <listcomp>(.0)
372 draws, tune, step_method, chain + start_chain_num, seed, start
373 )
--> 374 for chain, seed, start in zip(range(chains), seeds, start_points)
375 ]
376
D:\anaconda3\lib\site-packages\pymc3\parallel_sampling.py in __init__(self, draws, tune, step_method, chain, seed, start)
257 )
258 try:
--> 259 self._process.start()
260 except IOError as e:
261 # Something may have gone wrong during the fork / spawn
D:\anaconda3\lib\multiprocessing\process.py in start(self)
110 'daemonic processes are not allowed to have children'
111 _cleanup()
--> 112 self._popen = self._Popen(self)
113 self._sentinel = self._popen.sentinel
114 # Avoid a refcycle if the target function holds an indirect
D:\anaconda3\lib\multiprocessing\context.py in _Popen(process_obj)
221 @staticmethod
222 def _Popen(process_obj):
--> 223 return _default_context.get_context().Process._Popen(process_obj)
224
225 class DefaultContext(BaseContext):
D:\anaconda3\lib\multiprocessing\context.py in _Popen(process_obj)
320 def _Popen(process_obj):
321 from .popen_spawn_win32 import Popen
--> 322 return Popen(process_obj)
323
324 class SpawnContext(BaseContext):
D:\anaconda3\lib\multiprocessing\popen_spawn_win32.py in __init__(self, process_obj)
87 try:
88 reduction.dump(prep_data, to_child)
---> 89 reduction.dump(process_obj, to_child)
90 finally:
91 set_spawning_popen(None)
D:\anaconda3\lib\multiprocessing\reduction.py in dump(obj, file, protocol)
58 def dump(obj, file, protocol=None):
59 '''Replacement for pickle.dump() using ForkingPickler.'''
---> 60 ForkingPickler(file, protocol).dump(obj)
61
62 #
RecursionError: maximum recursion depth exceeded
Thanks in advance!