Dataset size dependent EOFError

Hello

I’m running the following model:

import numpy as np
import pymc as pm
import pytensor.tensor as tt

# simulate some data
np.random.seed(42)

n = 10_000  # number of individuals
group_size = n // 2

t = 12  # number of time intervals

# event probabilites for group 1 and 2
p1 = 0.01
p2 = 0.05


def simulate_group(n, p):
    return np.random.choice((0, 1), size=(n, t), p=(1 - p, p))


# observed data
observed = np.vstack((simulate_group(group_size, p1), simulate_group(group_size, p2)))

# group indicators
group = np.hstack((np.zeros(group_size), np.ones(group_size)))


with pm.Model(coords=dict(intervals=range(t))):
    lam0 = pm.Gamma(
        "lam0",
        mu=pm.Gamma("lam0_mu", mu=0.5, sigma=0.5),
        sigma=pm.Gamma("lam0_sigma", mu=0.5, sigma=0.1),
        dims="intervals",
    )
    beta = pm.Normal("beta", mu=0, sigma=1)
    lam = tt.outer(tt.exp(beta * group), lam0)
    pm.Poisson("obs", lam, observed=observed)
    pm.sample()

Which generates the following error:

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [lam0_mu, lam0_sigma, lam0, beta]
Traceback (most recent call last):-----------------------------------| 0.00% [0/8000 00:00<? Sampling 4 chains, 0 divergences]
  File "<path>/test.py", line 38, in <module>
    pm.sample()
  File "<path>/lib/python3.10/site-packages/pymc/sampling/mcmc.py", line 666, in sample
    _mp_sample(**sample_args, **parallel_args)
  File "<path>/lib/python3.10/site-packages/pymc/sampling/mcmc.py", line 1055, in _mp_sample
    for draw in sampler:
  File "<path>/lib/python3.10/site-packages/pymc/sampling/parallel.py", line 448, in __iter__
    draw = ProcessAdapter.recv_draw(self._active)
  File "<path>/lib/python3.10/site-packages/pymc/sampling/parallel.py", line 320, in recv_draw
    msg = ready[0].recv()
  File "<path>/.pyenv/versions/3.10.4/lib/python3.10/multiprocessing/connection.py", line 255, in recv
    buf = self._recv_bytes()
  File "<path>/.pyenv/versions/3.10.4/lib/python3.10/multiprocessing/connection.py", line 419, in _recv_bytes
    buf = self._recv(4)
  File "<path>/.pyenv/versions/3.10.4/lib/python3.10/multiprocessing/connection.py", line 388, in _recv
    raise EOFError
EOFError

Setting n to something lower (e.g. 1000) resolves the error, and the model samples as expected.

Is there a way to run this with larger n?

Thanks
David

I’m using pymc 5.1.1

Can you try with the last version of PyMC?

Same behaviour on pymc 5.3.0.

Im getting the same issue with a dataset at work on pymc 5.3.1 (M1 mac).

Start getting the error consistently when the dataset size is N=2235, but consistently works for N=2234. Unfortunately cant share reproducible code for that one, but here’s the model block

with pm.Model(coords=coords) as m_sku:
    
    alpha = pm.Normal("alpha", 0, 1, dims='product')
    hour_effect = pm.Normal("beta",0,1,dims='hour')
    size_effect = pm.Normal("Bsize", 0, 0.5, dims='size')
    lambd = pm.Deterministic('lambd', pm.math.exp( alpha[i] + hour_effect[h] + (size_effect @ X.T)  ) ) 
    
    dist = pm.Poisson.dist(mu=lambd)
    obs = pm.Censored("obs", dist, lower=None, upper=data.available.values, observed=data.rentals)
    idata2 = pm.sample(idata_kwargs={"log_likelihood": True})

I then realized that my design matrix was un-identifiable and dropped a dummy column and then the max dataset size I could go up to without an EOFerror was N=2457 (even if I randomly sampled the dataset, it seems to be a consistent limit).

Error:

---------------------------------------------------------------------------
EOFError                                  Traceback (most recent call last)
/var/folders/p4/f1r08vbs6lqbl816qzghkwtr0000gp/T/ipykernel_35693/2207676018.py in <cell line: 19>()
     27     cens = np.where(data.available <= data.rentals, data.available, np.inf)
     28     obs = pm.Censored("obs", dist, lower=0, upper=cens, observed=data.rentals)
---> 29     idata2 = pm.sample(idata_kwargs={"log_likelihood": True})

~/.pyenv/versions/3.9.7/envs/local_env/lib/python3.9/site-packages/pymc/sampling/mcmc.py in sample(draws, tune, chains, cores, random_seed, progressbar, step, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, model, **kwargs)
    675         _print_step_hierarchy(step)
    676         try:
--> 677             _mp_sample(**sample_args, **parallel_args)
    678         except pickle.PickleError:
    679             _log.warning("Could not pickle model, sampling singlethreaded.")

~/.pyenv/versions/3.9.7/envs/local_env/lib/python3.9/site-packages/pymc/sampling/mcmc.py in _mp_sample(draws, tune, step, chains, cores, random_seed, start, progressbar, traces, model, callback, mp_ctx, **kwargs)
   1064         try:
   1065             with sampler:
-> 1066                 for draw in sampler:
   1067                     strace = traces[draw.chain]
   1068                     strace.record(draw.point, draw.stats)

~/.pyenv/versions/3.9.7/envs/local_env/lib/python3.9/site-packages/pymc/sampling/parallel.py in __iter__(self)
    446 
    447         while self._active:
--> 448             draw = ProcessAdapter.recv_draw(self._active)
    449             proc, is_last, draw, tuning, stats = draw
    450             self._total_draws += 1

~/.pyenv/versions/3.9.7/envs/local_env/lib/python3.9/site-packages/pymc/sampling/parallel.py in recv_draw(processes, timeout)
    318         idxs = {id(proc._msg_pipe): proc for proc in processes}
    319         proc = idxs[id(ready[0])]
--> 320         msg = ready[0].recv()
    321 
    322         if msg[0] == "error":

~/.pyenv/versions/3.9.7/lib/python3.9/multiprocessing/connection.py in recv(self)
    253         self._check_closed()
    254         self._check_readable()
--> 255         buf = self._recv_bytes()
    256         return _ForkingPickler.loads(buf.getbuffer())
    257 

~/.pyenv/versions/3.9.7/lib/python3.9/multiprocessing/connection.py in _recv_bytes(self, maxsize)
    417 
    418     def _recv_bytes(self, maxsize=None):
--> 419         buf = self._recv(4)
    420         size, = struct.unpack("!i", buf.getvalue())
    421         if size == -1:

~/.pyenv/versions/3.9.7/lib/python3.9/multiprocessing/connection.py in _recv(self, size, read)
    386             if n == 0:
    387                 if remaining == size:
--> 388                     raise EOFError
    389                 else:
    390                     raise OSError("got end of file during message")

EOFError: 

edit: setting pm.sample(...,cores=1) seems to atleast work for now for me atleast @davipatti