Dealing with imputation of left censored multivariate normal distribution

The trick is to treat the cov matrix as known (i.e., Sigma in your model) and compute the conditional, I am trying to follow conditional distribution from wikipedia but the code is not quite work yet:

import numpy as np
import pymc3 as pm
import theano.tensor as tt

np.random.seed(42)
n = 10000
threshold = -0.15

# Here I define original Mean and Var-Covar Matrix
cov_matrix_original = np.array([[1, 0.8, 0.9], [0.8, 1., 0.75], [0.9, 0.75, 1.]])
Y_mu = np.array([0., 0., 0.])

# Generating sample n x 3
Y = np.random.multivariate_normal(mean=Y_mu, cov=cov_matrix_original, size=n)

# Do "censoring" for 3rd variate
mask_col = Y[:, -1] <= threshold
Y[mask_col, -1] = threshold

# compute the conditional CDF to account for the censoring
def mv_cond(idx, nd, mu, cov, cond_a):
    idx1 = np.arange(nd) != idx
    idx2 = np.arange(nd) == idx
    mu_1 = mu[idx1]
    mu_2 = mu[idx2]
    S11 = cov[idx1, idx1]
    S12 = cov[idx1, idx2]
    S21 = cov[idx2, idx1]
    S22 = cov[idx1, :][:, idx1]
    mu_ = mu_1 + tt.dot(S12.dot(tt.nlinalg.matrix_inverse(S22)), cond_a-mu_2)
    sd_ = S11 - tt.dot(S12.dot(tt.nlinalg.matrix_inverse(S22)), S21)
    return mu_, sd_

def normal_lcdf(mu, sigma, x):
    z = (x - mu) / sigma
    return tt.switch(
        tt.lt(z, -1.0),
        tt.log(tt.erfcx(-z / tt.sqrt(2.)) / 2.) - tt.sqr(z) / 2.,
        tt.log1p(-tt.erfc(z / tt.sqrt(2.)) / 2.)
    )

# Censored model
with pm.Model():
    packed_L = pm.LKJCholeskyCov(
        'packed_L', n=3, eta=2., sd_dist=pm.HalfNormal.dist(2.5))
    L = pm.expand_packed_triangular(3, packed_L)
    Sigma = pm.Deterministic('Sigma', L.dot(L.T))

    Data = pm.MvNormal('Data', mu=Y_mu, chol=L, observed=Y)
    mu_c, sd_c = mv_cond(2, 3, Y_mu, Sigma, Y_mu[:-1])
    pm.Potential('censor_cdf', normal_lcdf(mu_c, sd_c, threshold) * mask_col.sum())
    trace = pm.sample()
1 Like