Setup environment for using GPU with Theano

Ahh…setting cores=1 when sampling (PyMC3 v3.5) seems to have solved my issues as well. Or having theano only use cpu through settings in .theanorc.

I was seeing below exception:

RemoteTraceback                           Traceback (most recent call last)
RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/theano/compile/function_module.py", line 903, in __call__
    self.fn() if output_subset is None else\
  File "pygpu/gpuarray.pyx", line 700, in pygpu.gpuarray.pygpu_empty
  File "pygpu/gpuarray.pyx", line 301, in pygpu.gpuarray.array_empty
pygpu.gpuarray.GpuArrayException: b'cuEventCreate: CUDA_ERROR_NOT_INITIALIZED: initialization error'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/parallel_sampling.py", line 73, in run
    self._start_loop()
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/parallel_sampling.py", line 113, in _start_loop
    point, stats = self._compute_point()
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/parallel_sampling.py", line 139, in _compute_point
    point, stats = self._step_method.step(self._point)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/arraystep.py", line 247, in step
    apoint, stats = self.astep(array)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 112, in astep
    start = self.integrator.compute_state(q0, p0)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/integration.py", line 29, in compute_state
    logp, dlogp = self._logp_dlogp_func(q)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 478, in __call__
    logp, dlogp = self._theano_function(array)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/theano/compile/function_module.py", line 917, in __call__
    storage_map=getattr(self.fn, 'storage_map', None))
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/theano/gof/link.py", line 325, in raise_with_op
    reraise(exc_type, exc_value, exc_trace)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/six.py", line 692, in reraise
    raise value.with_traceback(tb)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/theano/compile/function_module.py", line 903, in __call__
    self.fn() if output_subset is None else\
  File "pygpu/gpuarray.pyx", line 700, in pygpu.gpuarray.pygpu_empty
  File "pygpu/gpuarray.pyx", line 301, in pygpu.gpuarray.array_empty
pygpu.gpuarray.GpuArrayException: b'cuEventCreate: CUDA_ERROR_NOT_INITIALIZED: initialization error'
Apply node that caused the error: GpuFromHost<None>(__args_joined)
Toposort index: 1
Inputs types: [TensorType(float32, vector)]
Inputs shapes: [(3,)]
Inputs strides: [(4,)]
Inputs values: [array([ 2.5801713 , -0.8394047 ,  0.49180257], dtype=float32)]
Outputs clients: [[GpuSubtensor{int64:int64:}(GpuFromHost<None>.0, Constant{0}, Constant{1}), GpuSubtensor{int64}(GpuFromHost<None>.0, ScalarFromTensor.0), GpuSubtensor{int64}(GpuFromHost<None>.0, ScalarFromTensor.0), GpuSubtensor{int64:int64:}(GpuFromHost<None>.0, Constant{1}, Constant{3})]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py", line 405, in sample
    progressbar=progressbar, **args)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py", line 1508, in init_nuts
    step = pm.NUTS(potential=potential, model=model, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/nuts.py", line 152, in __init__
    super(NUTS, self).__init__(vars, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 63, in __init__
    dtype=dtype, **theano_kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/arraystep.py", line 228, in __init__
    vars, dtype=dtype, **theano_kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 709, in logp_dlogp_function
    return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 440, in __init__
    self._cost, grad_vars, self._ordering.vmap)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 520, in _build_joined
    args_joined = tt.vector('__args_joined')

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
"""

The above exception was the direct cause of the following exception:

GpuArrayException                         Traceback (most recent call last)
GpuArrayException: b'cuEventCreate: CUDA_ERROR_NOT_INITIALIZED: initialization error'
Apply node that caused the error: GpuFromHost<None>(__args_joined)
Toposort index: 1
Inputs types: [TensorType(float32, vector)]
Inputs shapes: [(3,)]
Inputs strides: [(4,)]
Inputs values: [array([ 2.5801713 , -0.8394047 ,  0.49180257], dtype=float32)]
Outputs clients: [[GpuSubtensor{int64:int64:}(GpuFromHost<None>.0, Constant{0}, Constant{1}), GpuSubtensor{int64}(GpuFromHost<None>.0, ScalarFromTensor.0), GpuSubtensor{int64}(GpuFromHost<None>.0, ScalarFromTensor.0), GpuSubtensor{int64:int64:}(GpuFromHost<None>.0, Constant{1}, Constant{3})]]

Backtrace when the node is created(use Theano flag traceback.limit=N to make it longer):
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py", line 405, in sample
    progressbar=progressbar, **args)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py", line 1508, in init_nuts
    step = pm.NUTS(potential=potential, model=model, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/nuts.py", line 152, in __init__
    super(NUTS, self).__init__(vars, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/hmc/base_hmc.py", line 63, in __init__
    dtype=dtype, **theano_kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/step_methods/arraystep.py", line 228, in __init__
    vars, dtype=dtype, **theano_kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 709, in logp_dlogp_function
    return ValueGradFunction(self.logpt, grad_vars, extra_vars, **kwargs)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 440, in __init__
    self._cost, grad_vars, self._ordering.vmap)
  File "/home/jordi/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/model.py", line 520, in _build_joined
    args_joined = tt.vector('__args_joined')

HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.

The above exception was the direct cause of the following exception:

RuntimeError                              Traceback (most recent call last)
<ipython-input-9-e709b6d7ffc3> in <module>()
      1 with pooled_model:
----> 2     pooled_trace = sample(1000, tune=1000)

~/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py in sample(draws, step, init, n_init, start, trace, chain_idx, chains, cores, tune, nuts_kwargs, step_kwargs, progressbar, model, random_seed, live_plot, discard_tuned_samples, live_plot_kwargs, compute_convergence_checks, use_mmap, **kwargs)
    447             _print_step_hierarchy(step)
    448             try:
--> 449                 trace = _mp_sample(**sample_args)
    450             except pickle.PickleError:
    451                 _log.warning("Could not pickle model, sampling singlethreaded.")

~/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/sampling.py in _mp_sample(draws, tune, step, chains, cores, chain, random_seed, start, progressbar, trace, model, use_mmap, **kwargs)
    997         try:
    998             with sampler:
--> 999                 for draw in sampler:
   1000                     trace = traces[draw.chain - chain]
   1001                     if trace.supports_sampler_stats and draw.stats is not None:

~/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/parallel_sampling.py in __iter__(self)
    303 
    304         while self._active:
--> 305             draw = ProcessAdapter.recv_draw(self._active)
    306             proc, is_last, draw, tuning, stats, warns = draw
    307             if self._progress is not None:

~/anaconda3/envs/jwv/lib/python3.5/site-packages/pymc3/parallel_sampling.py in recv_draw(processes, timeout)
    221         if msg[0] == 'error':
    222             old = msg[1]
--> 223             six.raise_from(RuntimeError('Chain %s failed.' % proc.chain), old)
    224         elif msg[0] == 'writing_done':
    225             proc._readable = True

~/anaconda3/envs/jwv/lib/python3.5/site-packages/six.py in raise_from(value, from_value)

RuntimeError: Chain 0 failed.
1 Like