At first glance that seems like it’s just returning the first element of the vector that you actually want ( tt.dot(grad_outputs, J) )? Hopefully someone else can answer that.
My main reason for chiming in is that I recently wrote an Op that sounds similar to what you’re trying to do so thought I’d share. In my case, I have trained a neural network so solve an ODE at N time points and I need the gradient of each of these points w.r.t. the model parameter \theta (1-dimensional). So my perform method is returning a vector like yours is. Admittedly I couldn’t figure out why my gradient Op needed to output just a scalar, but my PyMC3 model seems to work fine and recover the true (known) parameters in the posterior so I think it’s correct? ¯\(ツ)/¯
Here’s my Op:
# Define the Theano Op for computing the gradient w.r.t. theta
class ODEGradop(Op):
itypes = [tt.dscalar, tt.dvector] # First input is theta, second is for gradient
otypes = [tt.dscalar] # I'm not sure why this is a scalar but it makes my code work :)
def __init__(self, pydens_model, t_obs):
self.pydens_model = pydens_model
# Setup the input for the neural net
self.t_comp = np.stack([t_obs, np.zeros_like(t_obs)]).T
self.theta_comp = np.stack([np.zeros_like(t_obs), np.ones_like(t_obs)]).T
def perform(self, node, inputs, outputs):
theta, g = inputs
nn_input = self.t_comp + theta*self.theta_comp
result = np.float64(np.squeeze(self.pydens_model.solve(nn_input, fetches="dxdalpha")))
outputs[0][0] = np.dot(result, g)
# Define the Theano Op for querying the solution of the ODE for any input theta
class ODEop(Op):
itypes = [tt.dscalar]
otypes = [tt.dvector]
def __init__(self, pydens_model, t_obs):
self.pydens_model = pydens_model
self.t_obs = t_obs
# Setup the input for the neural net
self.t_comp = np.stack([t_obs, np.zeros_like(t_obs)]).T
self.theta_comp = np.stack([np.zeros_like(t_obs), np.ones_like(t_obs)]).T
def perform(self, node, inputs, outputs):
theta, = inputs
nn_input = self.t_comp + theta*self.theta_comp
result = np.float64(np.squeeze(self.pydens_model.solve(nn_input)))
outputs[0][0] = result
def grad(self, inputs, output_grads):
x, = inputs
g, = output_grads
grad_op = ODEGradop(self.pydens_model, self.t_obs)
grad_op_apply = grad_op(x, g)
return [grad_op_apply]