Hi,
I wanted to develop a hierarchical model with a black box likelihood function. Initially I used the emcee package. However, I had difficulties with burning in so many parameters (about 100). So I wanted to try the NUTS implementation instead. However, I have already implemented the logliklihood function in such a way that it is usable for emcee and I do not want to rewrite everything for pymc, which seems to be possible using this tutorial Using a “black box” likelihood function — PyMC example gallery. However, my logliklihood function always requires a large vector containing all the parameters and this tutorial always assumes scalar values. I could get it to work with vector variables for the non-gradient version but not for the gradient version.
Do you have any suggestions on what I need to change to make it work with vectors instead of scalars?
My current not working Version looks as follows:
Using a “black box” likelihood function — PyMC example gallery
import pymc as pm
from pytensor.graph import Apply, Op
import pytensor.tensor as pt
from typing import List
import pytensor
from scipy.optimize import approx_fprime
def final_log_liklihood(parameter):
return combined_log_liklihood(parameter)[0] #My liklihood function
class LogLike(Op):
def make_node(self, m, data) -> Apply:
m = pt.as_tensor(m)
data = pt.as_tensor(data)
inputs = [m, data] # Include data
outputs = [pt.scalar(dtype='float64')]
return Apply(self, inputs, outputs)
def perform(self, node: Apply, inputs: List[np.ndarray], outputs: List[List[None]]) -> None:
# This is the method that compute numerical output
# given numerical inputs. Everything here is numpy arrays
m, data = inputs # this will contain my variables
# call our numpy log-likelihood function
loglike_eval = final_log_liklihood(m)
# Save the result in the outputs list provided by PyTensor
# There is one list per output, each containing another list
# pre-populated with a `None` where the result should be saved.
outputs[0][0] = np.asarray(loglike_eval)
def grad(
self, inputs: List[pt.TensorVariable], g: List[pt.TensorVariable]
) -> List[pt.TensorVariable]:
# NEW!
# the method that calculates the gradients - it actually returns the vector-Jacobian product
m, data = inputs
grad_wrt_m = loglikegrad_op(m, data)
# out_grad is a tensor of gradients of the Op outputs wrt to the function cost
[out_grad] = g
return [
pt.sum(out_grad * grad_wrt_m),
pytensor.gradient.grad_not_implemented(self, 1, data),
]
def finite_differences_loglike(m, data, eps=1e-7):
"""
Calculate the partial derivatives of a function at a set of values. The
derivatives are calculated using scipy approx_fprime function.
Parameters
----------
m, c: array_like
The parameters of the function for which we wish to define partial derivatives
Returns
-------
grad_wrt_m: array_like
Partial derivative wrt to the m parameter
"""
def inner_func(m, data):
return final_log_liklihood(m, data)
grad_wrt_m = approx_fprime(m, inner_func, eps, data)
return grad_wrt_m
class LogLikeGrad(Op):
def make_node(self, m, data) -> Apply:
m = pt.as_tensor(m)
data = pt.as_tensor(data)
inputs = [m, data]
# There are two outputs with the same type as data,
# for the partial derivatives wrt to m, c
outputs = [pt.scalar(dtype='float64'), pt.scalar(dtype='float64')]
return Apply(self, inputs, outputs)
def perform(self, node: Apply, inputs: List[np.ndarray], outputs: List[List[None]]) -> None:
m, data = inputs
# calculate gradients
grad_wrt_m = finite_differences_loglike(m, data)
outputs[0][0] = grad_wrt_m
loglike_op = LogLike()
loglikegrad_op = LogLikeGrad()
def custom_dist_loglike(data, m):
# data, or observed is always passed as the first input of CustomDist
return loglike_op(m, data)
test_out = loglike_op(np.random.uniform(0, 1, len(all_labels)), [])
pytensor.dprint(test_out, print_type=True)
with pm.Model():
# set priors on model gradient and y-intercept
params = pm.Uniform("p", lower=0, upper=1, shape=len(all_labels))
# create custom distribution
pm.CustomDist('likelihood', params,observed=[] ,logp=custom_dist_loglike)
# sample from the distribution
trace = pm.sample(1000)
Note: i’m working with Python 3.8.8
-i acually don’t need to pass any observed data. This is already handled inside the logliklihood function but i could not get the non gradient Version to work without any data variable. Therefore i made it an empty list.
The current error i get is:
ValueError: LogLike.grad returned a term with 0 dimensions, but 1 are required.
Thanks and best regards
Jan