Many thanks once more for your helpful reply . I’ve managed to get the MWE up and running (see below).
Two things are still unclear to me:
- You write that you approximate posteriors as “block multivariate normal.” What do you mean by “block”? That only variables in the same block are correlated? (But then I only see a single block.)
- What is the rational behind setting the regularization for chol_m and Lvec_chol to 1e-3 and 1e-4, respectively?
Thanks a lot!
import numpy as np
import pymc3 as pm
import theano
import theano.tensor as tt
import matplotlib as mpl
import matplotlib.pyplot as plt
import warnings
from scipy import stats
warnings.simplefilter(action='ignore', category=FutureWarning)
dimension = 2
numIterations = 8
packedMatrixDimension = {2:3}
np.random.seed(30)
μ_actual = np.random.normal(0, 1, size=(dimension,))
randMatrix = np.random.normal(0, 1, size=(dimension, dimension))
Σ_actual = (randMatrix + randMatrix.T) / 2 + dimension * np.diag(np.ones(dimension)) #symmetric and positive semi-definite
def getNewObservations(N=1000):
return np.random.multivariate_normal(μ_actual, Σ_actual, size=N)
#initial model
with pm.Model() as init_model:
L_packed = pm.LKJCholeskyCov('L_packed', n=dimension, eta=2, sd_dist=pm.HalfCauchy.dist(2.5))
mu = pm.Cauchy('mu', 0., dimension, shape=(dimension,))
obs = pm.MvNormal('obs', mu, chol=pm.expand_packed_triangular(dimension, L_packed),
observed=getNewObservations())
trace = pm.sample(cores=1)
traces = [trace]
mu_marginal_mean = theano.shared(np.mean(trace['mu'], axis=0), name="mu_marginal_mean")
mu_cholesky = theano.shared(np.linalg.cholesky(np.cov(trace['mu'].T) + np.diag(np.ones(dimension)) * 1e-3), name="mu_cholesky")
L_packed_marginal_mean = theano.shared(np.mean(trace['L_packed'], axis=0), name="L_packed_marginal_mean")
L_packed_cholesky = theano.shared(np.linalg.cholesky(np.cov(trace['L_packed'].T) + np.diag(np.ones(packedMatrixDimension[dimension])) * 1e-4), name="L_packed_cholesky")
observations = theano.shared(getNewObservations(), name="observations")
with pm.Model() as update_model:
# non-centered parametrization
mean_error = pm.Normal('mean_error', 0, 1, shape=dimension)
mu = pm.Deterministic('mu', mu_marginal_mean + tt.dot(mu_cholesky, mean_error))
L_packed_error = pm.Normal('L_packed_error', 0, 1, shape=packedMatrixDimension[dimension])
L_packed = pm.Deterministic('L_packed', L_packed_marginal_mean + tt.dot(L_packed_cholesky, L_packed_error))
obs = pm.MvNormal('obs', mu, chol=pm.expand_packed_triangular(dimension, L_packed), observed=observations)
#iteration 1
with update_model:
trace = pm.sample(cores=1)
traces.append(trace)
#iterations 2+
for _ in range(2, numIterations):
#update model parameters based on previous trace
mu_marginal_mean.set_value(np.mean(trace['mu'], axis=0))
mu_cholesky.set_value(np.linalg.cholesky(np.cov(trace['mu'].T) + np.diag(np.ones(dimension)) * 1e-3))
L_packed_marginal_mean.set_value(np.mean(trace['L_packed'], axis=0))
L_packed_cholesky.set_value(np.linalg.cholesky(np.cov(trace['L_packed'].T) + np.diag(np.ones(packedMatrixDimension[dimension])) * 1e-4))
#update observations
observations.set_value(getNewObservations())
with update_model:
trace = pm.sample(cores=1)
traces.append(trace)
cmap = mpl.cm.autumn
params = ['mu', 'L_packed']
plt.figure(figsize=(20,10))
for paramId, param in enumerate(params):
plt.subplot(len(params), 1, paramId+1)
for update_i, trace in enumerate(traces):
samples = trace[param]
for scalar in zip(*samples):
smin, smax = np.min(scalar), np.max(scalar)
x = np.linspace(smin, smax, 100)
y = stats.gaussian_kde(scalar)(x)
plt.plot(x, y, color=cmap(1 - update_i / len(traces)))
if param == "mu":
for mu_scalar in μ_actual:
plt.axvline(mu_scalar, c="k")
elif param == "L_packed":
for L_scalar in np.linalg.cholesky(Σ_actual)[np.tril_indices(dimension)]:
plt.axvline(L_scalar, c="k")
plt.ylabel('Frequency')
plt.title(param)
plt.show()