Memory error with pm.Metropolis() Bayesian NN pm.ADVI() works fine

Hi,
I am trying to build a NN as per https://twiecki.io/blog/2016/07/05/bayesian-deep-learning/.

When executiing:
with neural_network:
step = pm.Metropolis()
trace = pm.sample(80000, step=step)[5000:]

I get a memory error.
Am I doing something wrong?

This works fine:
with neural_network:
inference = pm.ADVI()
approx = pm.fit(n=15000, method=inference, score=True)

See below for the error and the NN definition

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep

Metropolis: [w6]
Metropolis: [w5]
Metropolis: [w4]
Metropolis: [w3]
Metropolis: [w2]
Metropolis: [w1]


MemoryError Traceback (most recent call last)
in
1 with neural_network:
2 step = pm.Metropolis()
----> 3 trace = pm.sample(80000, step=step)[5000:]

/u01/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
437 _print_step_hierarchy(step)
438 try:
–> 439 trace = _mp_sample(**sample_args)
440 except pickle.PickleError:
441 _log.warning(“Could not pickle model, sampling singlethreaded.”)

/u01/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
977 update_start_vals(start[idx - chain], model.test_point, model)
978 if step.generates_stats and strace.supports_sampler_stats:
–> 979 strace.setup(draws + tune, idx + chain, step.stats_dtypes)
980 else:
981 strace.setup(draws + tune, idx + chain)

/u01/anaconda3/lib/python3.6/site-packages/pymc3/backends/ndarray.py in setup(self, draws, chain, sampler_vars)
197 for varname, shape in self.var_shapes.items():
198 self.samples[varname] = np.zeros((draws, ) + shape,
–> 199 dtype=self.var_dtypes[varname])
200
201 if sampler_vars is None:

MemoryError:

The NN definition:

def build_ann(init, input_var, target_var):

with pm.Model() as neural_network:

    l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
                                 input_var=input_var)

    # Add a fully-connected layer of 800 units, using the linear rectifier, and
    # initializing weights with Glorot's scheme (which is the default anyway):
    n_hid1 = 800
    l_hid1 = lasagne.layers.DenseLayer(
        l_in, num_units=n_hid1,
        nonlinearity=lasagne.nonlinearities.tanh,
        b=init,
        W=init
    )

    n_hid2 = 800
    # Another 800-unit layer:
    l_hid2 = lasagne.layers.DenseLayer(
        l_hid1, num_units=n_hid2,
        nonlinearity=lasagne.nonlinearities.tanh,
        b=init,
        W=init
    )

    # Finally, we'll add the fully-connected output layer, of 10 softmax units:
    l_out = lasagne.layers.DenseLayer(
        l_hid2, num_units=10,
        nonlinearity=lasagne.nonlinearities.softmax,
        b=init,
        W=init
    )

    prediction = lasagne.layers.get_output(l_out)

    # 10 discrete output classes -> pymc3 categorical distribution
    out = pm.Categorical('out', prediction, observed=target_var)

return neural_network

Likely you dont have enough memory for chains with 80000 samples.
You should avoid using Metropolis (it sucks in high dimensional problems!) and use the default instead. Using the default NUTS sampler you usually only need ~1000 samples as the quality (translate: effective sample size) of the samples are SO much better.

Understood!
Thanks.