Very parallel MCMC sampling

It seems like most of the speedup just comes from one-shot sampling and pre-allocation of memory. It seems like this trick can be used in pymc3 just by adding a dimension and “cheating” by assigning the same observations to each dimension:

import numpy as np
import pymc3 as pm

mu_true = 3.0
sd_true = 0.4

observed = np.random.normal(size=(10,))*sd_true + mu_true

# 150 chains in 3 "chains"
CHEAT_CHAINS=50
with pm.Model() as mod:
    mu = pm.Normal('mu', 0, 1, shape=CHEAT_CHAINS)
    err = pm.HalfNormal('sd', 1, shape=CHEAT_CHAINS)
    for j in range(CHEAT_CHAINS):
        xi = pm.Normal('x_%d' % j, mu[j], err[j], observed=observed)
    tr = pm.sample(1000, chains=3, cores=3, tune=500)

# 00:44<00:00, 101.14draws/s
# 00:40<00:00, 109.96draws/s

# old-fashioned:
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (2500, hard))
with pm.Model() as mod:
    mu = pm.Normal('mu', 0, 1)
    err = pm.HalfNormal('sd', 1)
    x = pm.Normal('x', mu, err, observed=observed)
    tr = pm.sample(1000, chains=CHEAT_CHAINS*3, cores=3, tune=500)

# 01:04<00:00, 3471.98draws/s
# 01:13<00:00, 3056.69draws/s

so a pretty decent speedup even using NUTS.

2 Likes