Super slow sampling

Hello,

I’m trying to transfer from PyMC2 to PyMC3. A simplified version of my code is given below.

In brief, I have a custom Theano Op (for root finding) and a loop. The model takes 2 inputs - E1 and E2, and outputs y while E2 is derived from y, E3 and E2 from the last time step. The parameters I’d like to invert are p1 and p2 (and pE20).

My problem is NUTS is too slow. It gets slower after every interation - it needs about 60s on average after 10 iters. So is Metropolis - it needs 3.5s/it, which is much slower than PyMC2. I’ve check my custom function and its derivatives (with theano.tensor.verify_grad()). They are all good. And the loop gives correct results as well. I would like to know why…

Many thanks for your help!

import numpy as np
from scipy import optimize
import pymc3 as pm
import theano
import theano.tensor as tt
theano.config.optdb.max_use_ratio = 20

# PyMC3
model = pm.Model()
with model:
    ''' Priors '''
    p1est = pm.Uniform('p1est', lower = 0.001, upper = 0.2)
	p2est = pm.Uniform('p2est', lower = 0.001, upper = 0.2)
	pE20est = pm.Uniform('pE20est', lower = 0.0, upper = 1.0)
    sigma = pm.HalfNormal('sigma', tau=1)
    
    ''' Inputs & output '''
    E1tt = tt.as_tensor(E1measured)# E1measured, E2measured & ymeasured are numpy.ndarray
    E3tt = tt.as_tensor(E3measured)
    ytt = tt.as_tensor(ymeasured)
       
    ''' Custom theano functions '''
    def yf(p1, E1, E2): # E2 depends on E3; see below
        def modelf(x, p1, E1, E2):
            return value
        
        def xminf(p1, E1, E2):
            return value

        def xmaxf(p1, E1, E2):
            return value

        # Root find
        xmin = xminf(p1, E1, E2)
        xmax = xmaxf(p1, E1, E2)
        testmin = modelf(xmin, p1, E1, E2)
        testmax = modelf(xmax, p1, E1, E2)
        if testmin > 0 and testmax < 0:
            return optimize.brenth(modelf, xmin, xmax, args=(p1, E1, E2))
        else:
			print('Bad proposal')
            return -999.0
    
    class Yf(tt.Op):
        __props__ = ()
        
        itypes = [tt.dscalar, tt.dscalar, tt.dscalar]
        otypes = [tt.dscalar]
        
        def perform(self, node, inputs, outputs):
            p1, E1, E2 = inputs
            x = yf(p1, E1, E2)
            outputs[0][0] = np.array(x)
            
        def grad(self, inputs, g):
            p1, E1, E2 = inputs
            x = self(p1, E1, E2)
            # derivatives
			dfdx = ...
			dfdp1 = ...
			dfdE1 = ...
			dfdE2 = ...
            # Implicit function theorm
			dxdp1 = -dfdp1/dfdx
			dxdE1 = -dfdE1/dfdx
			dxdE2 = -dfdE2/dfdx
            return [g[0]*dxdp1, g[0]*dxdE1, g[0]*dxdE2]
            
    ''' Sap flow simulation '''
    def step(E1, E3, E2previous, p1, p2): # E2previous is E2 from the last time step
        y = Yf()(p1, E1, E2previous)
		E2 = ... # depends on y and E3 and p2
        return y, E2
    outputs, update = theano.scan(fn = step,
                                  sequences = [E1tt, E3tt],
                                  outputs_info = [None, pE20est],
                                  non_sequences = [p1est, p2est])
    
    ''' Sampling '''
    obs = pm.Normal('obs', mu = outputs[0], sd = sigma, observed = ytt)
    start = pm.find_MAP()
    print(start)
    step = pm.Metropolis()#pm.NUTS(profile = True)#scaling = start
    trace = pm.sample(1e6, step = step, chains = 1)#, trace = db, start = start