Very slow generation of computation graph with convolution

I wanted to experiment with variational inference on an image segmentation task.
This required me to use theano’s convolutions (T.nnet.conv2d()) with Filters as Gaussian random variables.

My problem is that it only works for very small networks because building the computation-graph (not the compiling or running) takes a very long time to finish. in one case I interrupted the process after 5 hours.

I use Ubuntu 18.04.1 LTS with the newest versions of both theano and pymc3. It does not matter for the Problem whether i use the GPU or CPU. Time-profiling shows that the largest amount of time is spent in the conv2d function.

The following code is a minimal example which encounters the problem:

import numpy as np
import matplotlib.pyplot as plt

import pymc3 as pm
import theano
from theano import tensor as T
from theano import shared, function

#this is my attempt at image segmentation
im_height = 101
im_width = 101
batch_size = 32

#dummy inputs and targets
X_train = np.random.randn(1000,1,im_height,im_width)
Y_train = np.random.randint(0, high = 2, size=(1000,1,im_height,im_width))


dil =    [ 1, 2, 4, 8,16, 1]
fil =    [ 3, 3, 3, 3, 3, 1]
cha = [ 1, 8,16,32,64,64, 1]

def construct_nn(ann_input, ann_output):

  with pm.Model() as neural_network:

    x = ann_input

    for i, (d,f,inp,outp) in enumerate(zip(dil,fil,cha[:-1],cha[1:])):

      print(	'number:', i,
		'filter_size:', f,
		'dilation:', d,
		'input_channels:', inp,
		'output_channels:', outp)

      F = pm.Normal('F' + str(i), 0, sd = 1/np.sqrt(inp*f*f), shape = (outp,inp,f,f))

      x = T.nnet.conv2d(x, F,
	border_mode 	= 'half',
	filter_dilation = (d,d),
	input_shape 	= (batch_size,inp,im_height,im_width),
	filter_shape 	= (outp,inp,f,f) )
    #output probabilities
    p = pm.math.sigmoid(x) 

    #leads in effect to cross-entropy-loss
    out = pm.Bernoulli('out', p, observed = ann_output, total_size = Y_train.shape[0])

  return neural_network

minibatch_x = pm.Minibatch(X_train, batch_size=batch_size)
minibatch_y = pm.Minibatch(Y_train, batch_size=batch_size)
neural_network = construct_nn(minibatch_x, minibatch_y)

print('compiling starts here')

with neural_network:
  inference = pm.ADVI()
  approx =, method=inference)

I’m looking forward to your input (1.6 KB)