Hi folks!
I’ve been trying to use the DensityDist class in conjunction with a custom Op in order to represent a black-box model likelihood. (The likelihood is calculated by an external library and it would not be feasible to represent it as a symbolic function, hence custom Op. I would like to be able to do predictive tests on the models, hence DensityDist rather than Potential.) It’s currently erroring-out with a ValueError
relating to the number of parameters given to the custom Op, and I haven’t been able to figure it out. I understand the documentation is still in need of some clarifications following the v4 refactor (Expand DensityDist docstrings · Issue #5641 · pymc-devs/pymc · GitHub) so I wanted to ask for help before searching for a bug.
Let’s suppose you had an Op
instance that could be passed to Potential
like so:
# var1, var2, ... = Variables
lik_op = MyOp(data)
lik = pm.Potential("lik", lik_op(var1, var2, ...))
Is the correct way to use DensityDist
with a provided logp as follows?
lik = pm.DensityDist("lik", var1, var2, ..., logp=lik_op)
As a concrete example, here is the issue reproduced with an example based off of the black-box likelihood case study notebook (Using a “black box” likelihood function (numpy) — PyMC documentation). Here’s the code condensed:
import aesara.tensor as at
import numpy as np
import pymc as pm
print(f"Running on PyMC v{pm.__version__}")
def my_model(m, c, x):
return m * x + c
def my_loglike(m, c, x, data, sigma):
model = my_model(m, c, x)
return -(0.5 / sigma**2) * np.sum((data - model) ** 2)
class LogLike(at.Op):
itypes = [at.dscalar, at.dscalar]
otypes = [at.dscalar]
def __init__(self, loglike, data, x, sigma):
self.likelihood = loglike
self.data = data
self.x = x
self.sigma = sigma
def perform(self, node, inputs, outputs):
m, c = inputs
logl = self.likelihood(m, c, self.x, self.data, self.sigma)
outputs[0][0] = np.array(logl)
N = 10 # number of data points
sigma = 1.0 # standard deviation of noise
x = np.linspace(0.0, 9.0, N)
mtrue = 0.4 # true gradient
ctrue = 3.0 # true y-intercept
truemodel = my_model(mtrue, ctrue, x)
rng = np.random.default_rng(716743)
data = sigma * rng.normal(size=N) + truemodel
logl = LogLike(my_loglike, data, x, sigma)
with pm.Model():
m = pm.Uniform("m", lower=-10.0, upper=10.0)
c = pm.Uniform("c", lower=-10.0, upper=10.0)
lik = pm.Potential("likelihood", logl(m, c)) # Works
# lik = pm.DensityDist("likelihood", m, c, logp=logl) # Does not work
idata = pm.sample()
If I comment out the Potential and uncomment the DensityDist, I get the following error:
/home/cove/project-dev/testing_blackbox.ipynb Cell 1' in <module>
42 # lik = pm.Potential("likelihood", logl(m, c)) # Works
43 lik = pm.DensityDist("likelihood", m, c, logp=logl) # Does not work
---> 45 idata = pm.sample()
File ~/project-dev/venv-project/lib/python3.10/site-packages/pymc/sampling.py:472, in sample(draws, step, init, n_init, initvals, trace, chain_idx, chains, cores, tune, progressbar, model, random_seed, discard_tuned_samples, compute_convergence_checks, callback, jitter_max_retries, return_inferencedata, idata_kwargs, mp_ctx, **kwargs)
469 auto_nuts_init = False
471 initial_points = None
--> 472 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
474 if isinstance(step, list):
475 step = CompoundStep(step)
File ~/project-dev/venv-project/lib/python3.10/site-packages/pymc/sampling.py:198, in assign_step_methods(model, step, methods, step_kwargs)
195 # Use competence classmethods to select step methods for remaining
196 # variables
197 selected_steps = defaultdict(list)
--> 198 model_logpt = model.logpt()
200 for var in model.value_vars:
201 if var not in assigned_vars:
202 # determine if a gradient can be computed
File ~/project-dev/venv-project/lib/python3.10/site-packages/pymc/model.py:763, in Model.logpt(self, vars, jacobian, sum)
761 rv_logps: List[TensorVariable] = []
762 if rv_values:
--> 763 rv_logps = joint_logpt(list(rv_values.keys()), rv_values, sum=False, jacobian=jacobian)
764 assert isinstance(rv_logps, list)
766 # Replace random variables by their value variables in potential terms
File ~/project-dev/venv-project/lib/python3.10/site-packages/pymc/distributions/logprob.py:223, in joint_logpt(var, rv_values, jacobian, scaling, transformed, sum, **kwargs)
220 transform_map[value_var] = original_value_var.tag.transform
222 transform_opt = TransformValuesOpt(transform_map)
--> 223 temp_logp_var_dict = factorized_joint_logprob(
224 tmp_rvs_to_values, extra_rewrites=transform_opt, use_jacobian=jacobian, **kwargs
225 )
227 # Raise if there are unexpected RandomVariables in the logp graph
228 # Only SimulatorRVs are allowed
229 from pymc.distributions.simulator import SimulatorRV
File ~/project-dev/venv-project/lib/python3.10/site-packages/aeppl/joint_logprob.py:186, in factorized_joint_logprob(rv_values, warn_missing_rvs, extra_rewrites, **kwargs)
183 q_value_vars = remapped_vars[: len(q_value_vars)]
184 q_rv_inputs = remapped_vars[len(q_value_vars) :]
--> 186 q_logprob_vars = _logprob(
187 node.op,
188 q_value_vars,
189 *q_rv_inputs,
190 **kwargs,
191 )
193 if not isinstance(q_logprob_vars, (list, tuple)):
194 q_logprob_vars = [q_logprob_vars]
File /usr/lib/python3.10/functools.py:889, in singledispatch.<locals>.wrapper(*args, **kw)
885 if not args:
886 raise TypeError(f'{funcname} requires at least '
887 '1 positional argument')
--> 889 return dispatch(args[0].__class__)(*args, **kw)
File ~/project-dev/venv-project/lib/python3.10/site-packages/pymc/distributions/distribution.py:813, in DensityDist.__new__.<locals>.density_dist_logp(op, value_var_list, *dist_params, **kwargs)
811 _dist_params = dist_params[3:]
812 value_var = value_var_list[0]
--> 813 return logp(value_var, *_dist_params)
File ~/project-dev/venv-project/lib/python3.10/site-packages/aesara/graph/op.py:294, in Op.__call__(self, *inputs, **kwargs)
252 r"""Construct an `Apply` node using :meth:`Op.make_node` and return its outputs.
253
254 This method is just a wrapper around :meth:`Op.make_node`.
(...)
291
292 """
293 return_list = kwargs.pop("return_list", False)
--> 294 node = self.make_node(*inputs, **kwargs)
296 if config.compute_test_value != "off":
297 compute_test_value(node)
File ~/project-dev/venv-project/lib/python3.10/site-packages/aesara/graph/op.py:234, in Op.make_node(self, *inputs)
228 raise NotImplementedError(
229 "You can either define itypes and otypes,\
230 or implement make_node"
231 )
233 if len(inputs) != len(self.itypes):
--> 234 raise ValueError(
235 f"We expected {len(self.itypes)} inputs but got {len(inputs)}."
236 )
237 if not all(it.in_same_class(inp.type) for inp, it in zip(inputs, self.itypes)):
238 raise TypeError(
239 f"Invalid input types for Op {self}:\n"
240 + "\n".join(
(...)
247 )
248 )
ValueError: We expected 2 inputs but got 3.
Sorry, long trace for a ValueError. The similar behavior occurs with any number of parameters specified in the way they are above.
p.s: on pymc v4.0.0b6, aesara v2.5.1