Hi,
I am trying to build a NN as per https://twiecki.io/blog/2016/07/05/bayesian-deep-learning/.
When executiing:
with neural_network:
step = pm.Metropolis()
trace = pm.sample(80000, step=step)[5000:]
I get a memory error.
Am I doing something wrong?
This works fine:
with neural_network:
inference = pm.ADVI()
approx = pm.fit(n=15000, method=inference, score=True)
See below for the error and the NN definition
Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
Metropolis: [w6]
Metropolis: [w5]
Metropolis: [w4]
Metropolis: [w3]
Metropolis: [w2]
Metropolis: [w1]
MemoryError Traceback (most recent call last)
in
1 with neural_network:
2 step = pm.Metropolis()
----> 3 trace = pm.sample(80000, step=step)[5000:]
/u01/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
437 _print_step_hierarchy(step)
438 try:
→ 439 trace = _mp_sample(**sample_args)
440 except pickle.PickleError:
441 _log.warning(“Could not pickle model, sampling singlethreaded.”)
/u01/anaconda3/lib/python3.6/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
977 update_start_vals(start[idx - chain], model.test_point, model)
978 if step.generates_stats and strace.supports_sampler_stats:
→ 979 strace.setup(draws + tune, idx + chain, step.stats_dtypes)
980 else:
981 strace.setup(draws + tune, idx + chain)
/u01/anaconda3/lib/python3.6/site-packages/pymc3/backends/ndarray.py in setup(self, draws, chain, sampler_vars)
197 for varname, shape in self.var_shapes.items():
198 self.samples[varname] = np.zeros((draws, ) + shape,
→ 199 dtype=self.var_dtypes[varname])
200
201 if sampler_vars is None:
MemoryError:
The NN definition:
def build_ann(init, input_var, target_var):
with pm.Model() as neural_network:
l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
input_var=input_var)
# Add a fully-connected layer of 800 units, using the linear rectifier, and
# initializing weights with Glorot's scheme (which is the default anyway):
n_hid1 = 800
l_hid1 = lasagne.layers.DenseLayer(
l_in, num_units=n_hid1,
nonlinearity=lasagne.nonlinearities.tanh,
b=init,
W=init
)
n_hid2 = 800
# Another 800-unit layer:
l_hid2 = lasagne.layers.DenseLayer(
l_hid1, num_units=n_hid2,
nonlinearity=lasagne.nonlinearities.tanh,
b=init,
W=init
)
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
l_out = lasagne.layers.DenseLayer(
l_hid2, num_units=10,
nonlinearity=lasagne.nonlinearities.softmax,
b=init,
W=init
)
prediction = lasagne.layers.get_output(l_out)
# 10 discrete output classes -> pymc3 categorical distribution
out = pm.Categorical('out', prediction, observed=target_var)
return neural_network