The trick is to treat the cov matrix as known (i.e., Sigma in your model) and compute the conditional, I am trying to follow conditional distribution from wikipedia but the code is not quite work yet:
import numpy as np
import pymc3 as pm
import theano.tensor as tt
np.random.seed(42)
n = 10000
threshold = -0.15
# Here I define original Mean and Var-Covar Matrix
cov_matrix_original = np.array([[1, 0.8, 0.9], [0.8, 1., 0.75], [0.9, 0.75, 1.]])
Y_mu = np.array([0., 0., 0.])
# Generating sample n x 3
Y = np.random.multivariate_normal(mean=Y_mu, cov=cov_matrix_original, size=n)
# Do "censoring" for 3rd variate
mask_col = Y[:, -1] <= threshold
Y[mask_col, -1] = threshold
# compute the conditional CDF to account for the censoring
def mv_cond(idx, nd, mu, cov, cond_a):
idx1 = np.arange(nd) != idx
idx2 = np.arange(nd) == idx
mu_1 = mu[idx1]
mu_2 = mu[idx2]
S11 = cov[idx1, idx1]
S12 = cov[idx1, idx2]
S21 = cov[idx2, idx1]
S22 = cov[idx1, :][:, idx1]
mu_ = mu_1 + tt.dot(S12.dot(tt.nlinalg.matrix_inverse(S22)), cond_a-mu_2)
sd_ = S11 - tt.dot(S12.dot(tt.nlinalg.matrix_inverse(S22)), S21)
return mu_, sd_
def normal_lcdf(mu, sigma, x):
z = (x - mu) / sigma
return tt.switch(
tt.lt(z, -1.0),
tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
)
# Censored model
with pm.Model():
packed_L = pm.LKJCholeskyCov(
'packed_L', n=3, eta=2., sd_dist=pm.HalfNormal.dist(2.5))
L = pm.expand_packed_triangular(3, packed_L)
Sigma = pm.Deterministic('Sigma', L.dot(L.T))
Data = pm.MvNormal('Data', mu=Y_mu, chol=L, observed=Y)
mu_c, sd_c = mv_cond(2, 3, Y_mu, Sigma, Y_mu[:-1])
pm.Potential('censor_cdf', normal_lcdf(mu_c, sd_c, threshold) * mask_col.sum())
trace = pm.sample()