Dimension error when using pm.Potential

Hi! I’m trying to calibrate a physical model using PyMC. I’ve gone with the approach of wrapping my jax model in a PyTensor Op. This works fine with the Metropolis sampler which is finding some issues when sampling. Since I want to take advantage of NUTS, I implemented a grad method and a grad Op for my model:

@jax.jit
def impedance_logp(observed, theta, ws):
    """Compute the marginal log-likelihood of a single HMM process."""

    (h1, h2, sigma, mu1, mu2) = theta[0]
    model = impedance_operation(ws, h1, h2, mu1, mu2)
    logp_impedance = -(0.5 / sigma**2) * jnp.sum((observed - model) ** 2)

    return jnp.array(logp_impedance)

class ImpedanceOp(Op):
    itypes = [pt.dvector]  # expects a vector of parameter values when called
    otypes = [pt.dscalar]  # outputs a single scalar value (the log likelihood)

    def __init__(self, ws, data, **kwargs):
        super().__init__(**kwargs)
        self.ws = ws
        self.data = data
        self.logpgrad = ImpedanceGrad(self.data, self.ws)

    def impedance_logp(self, theta):
        return impedance_logp(self.data, theta, self.ws)

    def perform(self, node, inputs, outputs):
        # (h,) = inputs
        result = self.impedance_logp(inputs)
        outputs[0][0] = np.asarray(result, dtype=node.outputs[0].dtype)

    def grad(self, inputs, output_gradients):
        grads = self.logpgrad(*inputs)
        output_gradient = output_gradients[0]
        return [output_gradient * grads]


class ImpedanceGrad(Op):
    itypes = [pt.dvector]
    otypes = [pt.dvector]

    def __init__(self, data, ws) -> None:
        super().__init__()
        self.data = data
        self.ws = ws

    def impedance_logp(self, theta):
        return impedance_logp(self.data, theta, self.ws)

    def impedance_logp_grad(self, theta):
        return jax.grad(self.impedance_logp)(theta)

    def perform(self, node, inputs, outputs):
        grads = self.impedance_logp_grad(inputs)
        outputs[0][0] = np.asarray(grads, dtype=node.outputs[0].dtype)

And here is the model I’m trying to sample:

with pm.Model() as impedance_calibration:
    h1 = pm.HalfNormal("h1", sigma=1)
    h2 = pm.HalfNormal("h2", sigma=1)
    mu1 = pm.HalfNormal("mu1", sigma=1)
    mu2 = pm.HalfNormal("mu2", sigma=1)
    # sigma = pm.TruncatedNormal("sigma", sigma=1, mu=5, lower=0, upper=10)
    sigma = pm.HalfNormal("sigma", sigma=1)
    theta = pt.as_tensor_variable([h1, h2, sigma, mu1, mu2])
    likelihood = pm.Potential(
        "likelihood",
        impedance_mean(theta),
    )

    if not os.path.exists(TRACE_PATH):
        # step = pm.Metropolis()
        # trace = pm.sample(step=step, tune=10000, draws=5000)
        trace = pm.sample(tune=10000, draws=5000)
        # trace = pm.sample_smc(draws=5000, chains=4, parallel=True)
        trace.to_netcdf(TRACE_PATH)
    else:
        trace = az.from_netcdf(TRACE_PATH)

Problem is that sampling with nuts yields the following error:

pymc.sampling.parallel.ParallelSamplingError: Chain 0 failed with: Expected 1 dimensions input
Apply node that caused the error: Subtensor{i}(ImpedanceGrad.0, 0)
Toposort index: 10
Inputs types: [TensorType(float64, shape=(None,)), ScalarType(uint8)]
Inputs shapes: [(1, 5), ()]
Inputs strides: [(40, 8), ()]
Inputs values: [array([[ 4.21017600e+08, -1.44086746e+09,  2.44285594e+09,
        -1.65140378e+09, -8.00240256e+08]]), 0]
Outputs clients: [[Composite{(((i0 + i1) * i2) + i3)}(Composite{...}.2, Subtensor{i}.0, h1, (d__logp/dh1_log___logprob){1.0})]]

Which I need some help to interpret exactly which of the variables is causing trouble
Thanks for the help!

pm.sample is passing an additional integer parameter to my pytensor grad which is causing the issue

  File "/home/blueman69/git/vibra-analyzer/impedance_diana.py", line 84, in <module>
    trace = pm.sample(tune=10000, draws=5000, chains=1, cores=1)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/mcmc.py", line 785, in sample
    _sample_many(**sample_args)
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/mcmc.py", line 927, in _sample_many
    _sample(
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/mcmc.py", line 1000, in _sample
    for it, diverging in enumerate(sampling):
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/fastprogress/fastprogress.py", line 50, in __iter__
    raise e
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/fastprogress/fastprogress.py", line 41, in __iter__
    for i,o in enumerate(self.gen):
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/sampling/mcmc.py", line 1068, in _iter_sample
    point, stats = step.step(point)
                   ^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/step_methods/arraystep.py", line 174, in step
    return super().step(point)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/step_methods/arraystep.py", line 100, in step
    apoint, stats = self.astep(q)
                    ^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/step_methods/hmc/base_hmc.py", line 168, in astep
    start = self.integrator.compute_state(q0, p0)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/step_methods/hmc/integration.py", line 56, in compute_state
    logp, dlogp = self._logp_dlogp_func(q)
                  ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pymc/model.py", line 373, in __call__
    cost, *grads = self._pytensor_function(*grad_vars)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Have you tried the utility verify_grad? Should help you debug the gradient with less layers in between. It’s suggested throughout this blogpost in case it helps: How to use JAX ODEs and Neural Networks in PyMC - PyMC Labs

Thanks for the reply! That utility has indeed been helpful. However I’m finding the issue that I can’t get the correct inputs to my model:

TypeError: impedance_matrix() missing 4 required positional arguments: 'h1', 'h2', 'mu1', and 'mu2'
Apply node that caused the error: ImpedanceGrad(input 0)
Toposort index: 0
Inputs types: [TensorType(float64, shape=(5,))]
Inputs shapes: [(5,)]
Inputs strides: [(8,)]
Inputs values: [array([0., 1., 2., 3., 4.])]
Outputs clients: [[Mul(ExpandDims{axis=0}.0, ImpedanceGrad.0)]]

During execution, empty values will be pased to my gradfun resulting in the following:

VALUE PASSED=> [1.73490671 1.44366054 0.82801176 0.75424598]
VALUE PASSED=> [1.73490671 1.44366054 0.82801176 0.75424598]
VALUE PASSED=> [1.73490671 1.44366054 0.82801176 0.75424598]
VALUE PASSED=> []

I’m getting 4 values since I dropped one when calculating my model (the last one was the standard deviation). When the empty value gets passed I get an error and the code crashes

Difficult to say without being able to run the code myself. You can consider not packing the 5 inputs in a vector/tuple and instead make you Op have 5 separate inputs.

Maybe that will help you pinpoint the source of the issue?

I re wrote the function to add those changes. Here are the files I use to check the gradient:
impedance_matrix_new.py (5.4 KB)
test_gradient.py (1.3 KB)
This now fails with:

Traceback (most recent call last):
  File "/home/blueman69/git/vibra-analyzer/test_gradient.py", line 36, in <module>
    pytensor.gradient.verify_grad(
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 1838, in verify_grad
    symbolic_grad = grad(cost, tensor_pt, disconnected_inputs="ignore")
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 617, in grad    _rval: Sequence[Variable] = _populate_grad_dict(
                                ^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 1420, in _populate_grad_dict
    rval = [access_grad_cache(elem) for elem in wrt]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 1420, in <listcomp>
    rval = [access_grad_cache(elem) for elem in wrt]
            ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 1375, in access_grad_cache
    term = access_term_cache(node)[idx]
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/blueman69/mambaforge/envs/pymc_env/lib/python3.11/site-packages/pytensor/gradient.py", line 1213, in access_term_cache
    raise ValueError(
ValueError: ImpedanceOp returned the wrong number of gradient terms.

If you can check this it will help me a lot!

Your Grad Op will now have to return 5 outputs instead of 1 (one for each input). I haven’t looked at the code, but I imagine that’s the problem you see now from the error message.

That indeed was the problem, after correcting the make_node method of the grad op to the following:

def make_node(self, h1, h2, mu1, mu2, sigma, gz) -> Apply:
     inputs = [
         pt.as_tensor_variable(h1),
         pt.as_tensor_variable(h2),
         pt.as_tensor_variable(mu1),
         pt.as_tensor_variable(mu2),
         pt.as_tensor_variable(sigma),
         pt.as_tensor_variable(gz),
     ]
     outputs = [
         inputs[0].type(),
         inputs[0].type(),
         inputs[0].type(),
         inputs[0].type(),
         inputs[0].type(),
     ]
     return Apply(self, inputs, outputs)

I get the same error.
The following image is the debugger output. It seems to be passing a list of a list of vectors instead of a list of vectors as it should. I don’t know how to correct this.

Bump cause of urgency!

You are still not returning the right outputs from the gradient. Here is a simple Op which multiplies two inputs and uses another Op to compute the gradients

import numpy as np

import pytensor
import pytensor.tensor as pt
from pytensor.graph.op import Op
from pytensor.graph.basic import Apply

class MulScalars(Op):

    def make_node(self, x, y):
        inputs = [
            pt.as_tensor(x),
            pt.as_tensor(y),
        ]
        # Todo: Check inputs have the right type
        outputs = [inputs[0].type()]

        return Apply(self, inputs, outputs)

    def perform(self, node, inputs, output_storage):
        [x, y] = inputs
        result = x * y
        output_storage[0][0] = np.asarray(result)

    def grad(self, inputs, output_grads):
        [x, y] = inputs
        [g_output] = output_grads
        [grad_x, grad_y] = grad_mul_scalars(x, y, g_output)
        return [grad_x, grad_y]


class GradMulScalars(Op):

    def make_node(self, x, y, g_output):
        inputs = [
            pt.as_tensor(x), 
            pt.as_tensor(y),
            pt.as_tensor(g_output),
        ]
        # TODO: Check inputs have the right type
        outputs = [
            inputs[0].type(), 
            inputs[1].type(),
        ]
        return Apply(self, inputs, outputs)

    def perform(self, node, inputs, output_storage):
        [x, y, g_output] = inputs
        res_x = y * g_output
        res_y = x * g_output
        output_storage[0][0] = res_x
        output_storage[1][0] = res_y


mul_scalars = MulScalars()
grad_mul_scalars = GradMulScalars()

pytensor.gradient.verify_grad(
    mul_scalars, 
    pt=(np.array(5.0), np.array(2.0)), 
    rng=np.random.default_rng(),
)

If this is still unclear I suggest checking these other two guides as well:

https://pytensor.readthedocs.io/en/latest/extending/creating_an_op.html

1 Like