Coregionalization model for two separable multidimensional Gaussian Process

So it looks like a Kronecker based implementation isn’t in PyMC3 (PR welcome!), but you can implement it without in a way that doesn’t take advantage of that computational speedup. Here is an example that should help get you going of a coregionalization GP model. Notice that you won’t have any issues with multidimensional inputs, they’ll work as normally.

generate and then plot the data

import numpy as np
import pymc3 as pm

# set the seed
np.random.seed(1)

n = 100 
x = np.linspace(0, 10, n)[:, None] 

# true covariance function and its true parameters
ell_true = 1.0
eta_true = 3.0
cov_func = eta_true**2 * pm.gp.cov.Matern52(1, ℓ_true)
mean_func = pm.gp.mean.Zero()

# two samples from the same gaussian process
f_true1 = np.random.multivariate_normal(mean_func(x).eval(),
                                        cov_func(x).eval() + 1e-8*np.eye(n), 1).flatten()
f_true2 = np.random.multivariate_normal(mean_func(x).eval(),
                                        cov_func(x).eval() + 1e-8*np.eye(n), 1).flatten()


sigma_true = 2.0
f1 = 0.5*f_true1 - 1.0*f_true2
f2 = 1.0*f_true1 + 0.2*f_true2
y1 = f1 + sigma_true * np.random.randn(n)
y2 = f2 + sigma_true * np.random.randn(n)

## Plot the data and the unobserved latent function
fig = plt.figure(figsize=(12,5)); ax = fig.gca()
ax.plot(x, f1, "dodgerblue", lw=3, label="True f1");
ax.plot(x, f2, "tomato", lw=3, label="True f2");
ax.plot(x, y1, 'ok', ms=3, alpha=0.5, label="Data 1");
ax.plot(x, y2, 'ok', ms=3, alpha=0.5, label="Data 2");
ax.set_xlabel("x"); ax.set_ylabel("The true f(x)"); plt.legend();

Format the data for the coregionalized GP

xx = np.concatenate((x, x), axis=0)
idx = np.concatenate((np.zeros(n), np.ones(n)))[:,None]
X = np.concatenate((xx, idx), axis=1)

y = np.concatenate((y1, y2))
X.shape, y.shape

pymc3 model

with pm.Model() as model:
    eta = pm.Gamma("eta", alpha=2, beta=0.5)
    ell = pm.Gamma("ell", alpha=2, beta=0.5)
    cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell, active_dims=[0])
    
    W = pm.Normal("W", mu=0, sd=3, shape=(2,2), testval=np.random.randn(2,2))
    kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=2)
    coreg = pm.gp.cov.Coregion(input_dim=2, active_dims=[1], kappa=kappa, W=W)
    cov_func = coreg * cov

    sigma = pm.HalfNormal("sigma", sd=3)
    gp = pm.gp.Marginal(cov_func=cov_func)
    y_ = gp.marginal_likelihood("f", X, y, noise=sigma) 

MAP estimate and generate some predictions

with model:
    mp = pm.find_MAP()

x_new = np.linspace(0, 20, 200)[:,None]
xx_new = np.concatenate((x_new, x_new), axis=0)
idx2 = np.concatenate((np.zeros(200), np.ones(200)))[:,None]
X_new = np.concatenate((xx_new, idx2), axis=1)

with model:
    f_pred = gp.conditional("f_pred", X_new)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with model:
    pred_samples = pm.sample_ppc([mp], vars=[f_pred], samples=300)

plot

from pymc3.gp.util import plot_gp_dist
fig = plt.figure(figsize=(12,5)); ax = fig.gca()

# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist

f_pred = pred_samples["f_pred"]
plot_gp_dist(ax, f_pred[:, :200], X_new[:200,0], palette="Blues", fill_alpha=0.1, samples_alpha=0.1);
plot_gp_dist(ax, f_pred[:, 200:], X_new[200:,0], palette="Reds", fill_alpha=0.1, samples_alpha=0.1);

5 Likes