I have a bunch of correlation coefficients, which I assume are samples of a true underlying correlation, which I’d like to estimate using Bayesian inference. The sampling distribution has an analytical solution but is composed of non-standard functions; consequently I’m having trouble converting them to theano
functions. I’d very much appreciate any help anyone could spare on this. Below is some self-contained example code:
import pymc3 as pm
import numpy as np
import theano.tensor as tt
from scipy.special import hyp2f1 # this function is the problem!
from scipy.stats import multivariate_normal, pearsonr
import matplotlib.pyplot as plt
class SampleCorrelationDistribution(pm.Continuous):
"""Distribution of sample correlations.
"""
def __init__(self, rho, n, *args, **kwargs):
self._rho = tt.as_tensor_variable(rho)
self._n = tt.as_tensor_variable(n)
super(SampleCorrelationDistribution, self).__init__(*args, **kwargs)
def logp(self, value):
if value.ndim != 1:
raise ValueError('Only one-dimensional values are supported.')
rho = self._rho
n = self._n
lognum = tt.log(n - 2) + tt.gammaln(n - 1) + ((n - 1) / 2) * tt.log(
1 - rho ** 2) + ((n - 4) / 2) * tt.log(1 - value ** 2)
logden = tt.log(tt.sqrt(2 * np.pi)) + tt.gammaln(n - 0.5) + (
n - 1.5) * tt.log(1 - rho * value)
loghyp = np.log(hyp2f1(0.5, 0.5, (2 * n - 1) / 2, (rho * value) / 2))
return lognum - logden * loghyp
with pm.Model():
true_rho = 0
true_n = 100
data = []
for it in range(1000):
xy = multivariate_normal.rvs([0, 0], [[1, true_rho], [true_rho, 1]],
size=true_n)
x, y = xy[:, 0], xy[:, 1]
r, p = pearsonr(x, y)
data.append(r)
rho = pm.Uniform(name='rho')
y = SampleCorrelationDistribution(name='y', rho=rho, n=true_n, observed=data)
trace = pm.sample()
pm.traceplot(trace)
plt.savefig('traceplot.png')
This produces the following error:
Traceback (most recent call last):
File "/Users/sm2286/Documents/CognitionGyrification/scripts/model.py", line 54, in <module>
y = SampleCorrelationDistribution(name='y', rho=rho, n=true_n, observed=data)
File "/Users/sm2286/anaconda3/lib/python3.6/site-packages/pymc3/distributions/distribution.py", line 39, in __new__
return model.Var(name, dist, data, total_size)
File "/Users/sm2286/anaconda3/lib/python3.6/site-packages/pymc3/model.py", line 545, in Var
total_size=total_size, model=self)
File "/Users/sm2286/anaconda3/lib/python3.6/site-packages/pymc3/model.py", line 970, in __init__
self.logp_elemwiset = distribution.logp(data)
File "/Users/sm2286/Documents/CognitionGyrification/scripts/model.py", line 34, in logp
loghyp = np.log(hyp2f1(0.5, 0.5, (2 * n - 1) / 2, (rho * value) / 2))
TypeError: ufunc 'hyp2f1' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
So clearly the issue is converting scipy’s hyp2f1
function. I think the source code is here, which looks to be written in c. Is there some way to compile this into a theano
function?