Following the documentation for Using a “black box” likelihood function and the code in def verify_grad, I am trying to set up a custom op for a function with vector input & scalar output. The numpy implementation returns the function value and gradient at the input location. Here is a minimal example:
from numpy import array
def my_func_w_grad(x):
"""
Calculate a representative R^n -> R function that inputs a vector and outputs a
scalar function value as well as the gradient of the function at that value.
"""
# Simplest case: a linear function in two dimensions...
A = array([1.1, 2.5])
b = 1
y = array([A @ x + b])
dydx = A
return y, dydx
print(f'{my_func_w_grad(array([1.0, 1.0])) = }') # demo
Which behaves as expected, returning:
my_func_w_grad(array([1.0, 1.0])) = (array([4.6]), array([1.1, 2.5]))
And here is my closest attempt at getting this into a pytensor…
from pytensor.graph.op import Op
from pytensor.graph.basic import Apply
import pytensor.tensor as pt
class MyGradOp(Op):
def make_node(self, *inputs):
x, = inputs
x = pt.as_tensor(x) # Overwrite x — ensuring it is a tensor.
return Apply(self, [x], [x.type()]) # Note: using x_.type() is dangerous.
def perform(self, node, inputs, output_storage):
x = inputs[0][0, 0] # why where two extra dimensions added?
z = output_storage[0]
z[0] = my_func_w_grad(x)[1].reshape((-1, 1, 1)) # why require two extra dimensions?
return None
my_grad_op = MyGradOp() # create op obj. (a symbolic func. def. for the gradient)
class MyFuncOp(Op):
def make_node(self, *inputs):
x, = inputs
x = pt.as_tensor_variable(x) # Overwrite x — ensuring it is a tensor.
return Apply(self, [x], [pt.type.TensorType("f8", (1,))()])
def perform(self, node, inputs, output_storage):
x = inputs[0][0] # why was an extra dimension added?
z = output_storage[0]
z[0] = my_func_w_grad(x)[0]
return None
def grad(self, inputs, gradients):
"""
For "gradients" they actually want gradient-vector products
(the gradient itself is computed in pytensor.gradient.grad),
"""
grad = my_grad_op(inputs)[1]
return [grad * gradients[0]] # is this the product they are looking for?
my_func_op = MyFuncOp()
Next, to demonstrate to myself that it is working…
from pytensor import function, gradient, dprint
x_point = array([1.0, 2.0])
x_ndarrays = [x_point]
x_svar_onepnt = [pt.vector(shape=(2,), name="input_x")]
f_svar = my_func_op(x_svar_onepnt)
fn_kwargs = dict(accept_inplace=True, allow_input_downcast=True, on_unused_input="ignore", mode=None)
f_func_onepnt = function(x_svar_onepnt, f_svar, name="func: 2D linear", **fn_kwargs)
f_val_onepnt = f_func_onepnt(x_point.copy())
print(f'{f_val_onepnt = }')
cost = f_svar[0] # <big question mark here>
g_svar = gradient.grad(cost, x_svar_onepnt, disconnected_inputs="ignore")
g_func_onepnt = function(x_svar_onepnt, g_svar, name="=grad: coeffs of affine transform", **fn_kwargs)
g_val_onepnt = g_func_onepnt(x_point.copy())
print(f'{g_val_onepnt = }')
This seems to get me close, but on the gradient it only provides the value in the second direction:
f_val_onepnt = array([7.1])
g_val_onepnt = [array([2.5])]
As an an example using built-in functions, the following function & cost combination work as expected:
f_svar = pt.sin(x_svar_onepnt[0][0]) + pt.cos(x_svar_onepnt[0][1], name="egg_carton")
cost = f_svar
Can anyone help me?