Thank you @junpenglao, with the jacobian adjustment the model fits correctly.
Unfortunately i’ve come across this bug, as I try to sample from the the posterior predictive distribution (which requires to sample from the multivariate (that works) and from the mus and sigmas (that dont)). I’ll post my error on the same issue.
Working model for reference:
import numpy as np
import pymc3 as pm
import scipy.special as ss
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import theano.tensor as tt
from theano import shared
corr=0.6
_sigma=np.array([[1,corr],[corr,1]])
_mus=np.array([-1,2])
_sigmas=np.array([.5,.5])
N=10000
u=np.random.multivariate_normal((0,0),_sigma,size=N)
c=sp.stats.norm.cdf(u)
generated=sp.stats.lognorm.ppf(c,_sigmas,scale=np.exp(_mus))
def lognormal_cdf(values,mu,sigma):
return pm.math.invprobit((tt.log(values)-mu)/sigma)
def jacobian_det(f_inv_x, x):
grad = tt.reshape(pm.theanof.gradient(tt.sum(f_inv_x), [x]), x.shape)
return tt.log(tt.abs_(grad))
with pm.Model() as model:
data=shared(generated)
#hackish way to create cholesky mat. that yields a covariance mat. with 1s on the diagonal and corr anywhere else
corr = pm.LKJCorr('corr',n=2, eta=2)[0]
chol = tt.set_subtensor(tt.eye(2,2)[([1,1], [0,1])],[corr,tt.sqrt(1-tt.pow(corr,2))])
# r = pm.Uniform('r',lower=-1, upper=1)
# cov = pm.Deterministic('cov',
# tt.stacklists([[1., r],
# [r, 1.]]))
mus=pm.Normal('mus',0,1,shape=2)
sigmas=pm.HalfCauchy('sigmas',beta=2,shape=2)
ps=lognormal_cdf(data,mus,sigmas)
cs=pm.math.probit(ps)
us_dist = pm.MvNormal('copula',mu=np.zeros(2), cov=cov,observed=cs)
pm.Potential('jacob_det', jacobian_det(cs, data))
trace=pm.sample(1000)
When I then try to sample:
sample=pm.sample_posterior_predictive(trace,model=model,vars=[us_dist,mus,sigmas],samples=10000)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-14dbe60ba96f> in <module>()
----> 1 sample=pm.sample_posterior_predictive(trace,model=model,vars=[us_dist,mus,sigmas],samples=10000)
2
3 simulated_c=sp.stats.norm.cdf(sample['copula'])
4 simulated=np.exp(sp.stats.norm.ppf(simulated_c)*sample['sigmas']+sample['mus'])
~\AppData\Local\Continuum\Anaconda3\envs\newroot\lib\site-packages\pymc3\sampling.py in sample_posterior_predictive(trace, samples, model, vars, size, random_seed, progressbar)
1118 # draw once to inspect the shape
1119 var_values = list(zip(varnames,
-> 1120 draw_values(vars, point=model.test_point, size=size)))
1121 ppc_trace = defaultdict(list)
1122 for varname, value in var_values:
~\AppData\Local\Continuum\Anaconda3\envs\newroot\lib\site-packages\pymc3\distributions\distribution.py in draw_values(params, point, size)
310 while to_eval or missing_inputs:
311 if to_eval == missing_inputs:
--> 312 raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval]))
313 to_eval = set(missing_inputs)
314 missing_inputs = set()
ValueError: Cannot resolve inputs for ['copula']
Thank again @junpenglao for the help!
