Obtaining gradients from a lumped black box output

Hi All,

I want to use pymc3 to sample from a posterior distribution which includes parameters calculated by a “black box” model. Using the ideas outlined in this tutorial I can get it working with Slice, but when I try to include gradients to enable use of NUTS, I get an error:

TypeError: object of type ‘TensorVariable’ has no len()

Because of my reliance on an external “black box” model, I have structured my Theano ops to run all calculations with the black box in one Theano op, which outputs a 2d array, then use additional ops downstream to extract the output and gradients for each of the modelled variables separately.
When I introduce the downstream ops (mp2 and mp3 in the example code below) the code fails:

f2e = pm.Normal(‘f2e’, mu = mp2(bbm(theta)), sigma = 0.3, observed=np.ones(1))
f3e = pm.Normal(‘f3e’, mu = mp3(bbm(theta)), sigma = 0.3, observed=np.ones(1))

If I exclude the downstream ops, and simply extract the scalar output from the blackbox results (no gradient), the code runs successfully with Slice:

f2e = pm.Normal(‘f2e’, mu = bbm(theta)[0][0], sigma = 0.3, observed=np.ones(1))
f3e = pm.Normal(‘f3e’, mu = bbm(theta)[1][0], sigma = 0.3, observed=np.ones(1))

I haven’t been able to work out what is causing this error, any ideas you have would be much appreciated.

Full example code is below. I am using pymc3 3.8 and theano 1.0.5.

import pymc3 as pm
import pandas as pd
import numpy as np
import math
from scipy import optimize
import theano.tensor as tt

# This would actually be a call via COM automation to a simulation package which is a black box for my purposes
# For troubleshooting, I have replaced it here with a mock "black box" model
class simulationObj:
    
    def getSimValue(self, sVal, theta):
        f1e, sep1Cut, sep1Qual = theta
        
        if sVal == 'f1dm':
            out = 800
        elif sVal == 'f2dm':
            out = 850 + ((sep1Cut-35)*sep1Qual)
        elif sVal == 'f3dm':
            out = 720 + ((sep1Cut-35)*sep1Qual)
        elif sVal == 'f2mr':
            out = 1/(1+math.exp(-sep1Qual*(33+1-sep1Cut)))
        
        return out

simObj = simulationObj()

# Retreive required values from the black box model, and do some calculations
def runSim(theta, data, simObj):
    
    f1e, sep1Cut, sep1Qual = theta
    
    x["f1dm"] = simObj.getSimValue('f1dm', theta)
    x["f1vm"] = x["f1v"]*f1e
    x['f1mm'] = x["f1vm"]*x["f1dm"]
    
    x['f2dm'] = simObj.getSimValue('f2dm', theta)
    x['f3dm'] = simObj.getSimValue('f3dm', theta)
    x['f2mr'] = simObj.getSimValue('f2mr', theta)
    x['f2mm'] = x['f1mm']*x['f2mr']
    x['f2vm'] = x['f2mm']/x['f2dm']
    x['f3vm'] = (x['f1mm']-x['f2mm'])/x['f3dm']
    
    x['f2e'] = x['f2vm']/x['f2v']
    x['f3e'] = x['f3vm']/x['f3v']
    
    out = np.array([x['f2e'].median(), x['f3e'].median()])
    
    return out

# custom Theano op to retrieve results (and gradients of those results) from black box model for all modelled variables
class blackBoxModel(tt.Op):

    itypes = [tt.dvector]  # expects a vector of parameter values when called
    otypes = [tt.dmatrix]  # outputs a matrix

    def __init__(self, x, simObj):
        # add inputs as class attributes
        self.x = x
        self.simObj = simObj

    def perform(self, node, inputs, outputs):
        # the method that is used when calling the Op
        (theta,) = inputs
        
        # define a form of the function which only requires theta to be provided
        def simOnTheta(theta):
            return runSim(theta, data=self.x, simObj=self.simObj)
        
        # run the model with the parameters provided
        vals = np.array([simOnTheta(theta)])     # needs to be a 2D array for np.concatenate
        # get the matrix of first derivatives (Jacobian)
        eps = np.sqrt(np.finfo(float).eps)
        grads = optimize.slsqp.approx_jacobian(theta, func=simOnTheta, epsilon = eps)
        # multiply each row by the values of theta to get vector-Jacobian product
        grads = np.multiply(grads, np.array(theta))
        
        # Concat together the output array, which contains the simulation results and their partial gradients
        out = np.concatenate((vals.T, grads), axis=1)

        outputs[0][0] = out

# custom Theano op to extract the model output and gradients for one modelled variable
class modelProcessorWithGrad(tt.Op):

    itypes = [tt.dmatrix]  # expects a matrix when called
    otypes = [tt.dscalar]  # outputs a single scalar value

    def __init__(self, stream):
        # add inputs as class attributes
        self.stream = stream

    def perform(self, node, inputs, outputs):
        # the method that is used when calling the Op
        (arrModel,) = inputs

        # extract the relevant model result based on user choice
        if self.stream == 'f2':
            out = arrModel[0][0]
        elif self.stream == 'f3':
            out = arrModel[1][0]
            
        outputs[0][0] = out  
        
    def grad(self, inputs, g):
        (arrModel,) = inputs
        if self.stream == 'f2':
            gradOut = arrModel[0][1:]
        elif self.stream == 'f3':
            gradOut = arrModel[1][1:]
            
        return [gradOut]

x=pd.DataFrame(data={'f1v':[7.75,7,7.25],'f2v':[1.662, 1.480, 1.533],'f3v':[6.111, 5.520, 5.717]})
ndraws = 250  # number of draws from the distribution
nburn = 50 # number of "burn-in points"

# create our Ops
bbm = blackBoxModel(x=x, simObj=simObj)
mp2 = modelProcessorWithGrad(stream = 'f2')
mp3 = modelProcessorWithGrad(stream = 'f3')

with pm.Model() as model:
    
    # uniform priors on parameters
    f1e = pm.Uniform("f1e", lower=0, upper=2)
    sep1Cut = pm.Uniform("sep1Cut", lower=30.0, upper=40.0)
    sep1Qual = pm.Uniform("sep1Qual", lower=0.05, upper=3.0)
    
    # convert parameters to a tensor vector
    theta = tt.as_tensor_variable([f1e, sep1Cut, sep1Qual])
    
    # This form works but has no gradients
    #f2e = pm.Normal('f2e', mu = bbm(theta)[0][0], sigma = 0.3, observed=np.ones(1))
    #f3e = pm.Normal('f3e', mu = bbm(theta)[1][0], sigma = 0.3, observed=np.ones(1))
    
    # This form throws an error
    f2e = pm.Normal('f2e', mu = mp2(bbm(theta)), sigma = 0.3, observed=np.ones(1))
    f3e = pm.Normal('f3e', mu = mp3(bbm(theta)), sigma = 0.3, observed=np.ones(1))
    
    pm.Potential("likelihood", f1e*f2e*f3e)

    trace = pm.sample(ndraws, tune=nburn, discard_tuned_samples=True, cores=1)
    pm.traceplot(trace)