# Coregionalization model for two separable multidimensional Gaussian Process

So it looks like a Kronecker based implementation isn’t in PyMC3 (PR welcome!), but you can implement it without in a way that doesn’t take advantage of that computational speedup. Here is an example that should help get you going of a coregionalization GP model. Notice that you won’t have any issues with multidimensional inputs, they’ll work as normally.

#### generate and then plot the data

``````import numpy as np
import pymc3 as pm

# set the seed
np.random.seed(1)

n = 100
x = np.linspace(0, 10, n)[:, None]

# true covariance function and its true parameters
ell_true = 1.0
eta_true = 3.0
cov_func = eta_true**2 * pm.gp.cov.Matern52(1, ℓ_true)
mean_func = pm.gp.mean.Zero()

# two samples from the same gaussian process
f_true1 = np.random.multivariate_normal(mean_func(x).eval(),
cov_func(x).eval() + 1e-8*np.eye(n), 1).flatten()
f_true2 = np.random.multivariate_normal(mean_func(x).eval(),
cov_func(x).eval() + 1e-8*np.eye(n), 1).flatten()

sigma_true = 2.0
f1 = 0.5*f_true1 - 1.0*f_true2
f2 = 1.0*f_true1 + 0.2*f_true2
y1 = f1 + sigma_true * np.random.randn(n)
y2 = f2 + sigma_true * np.random.randn(n)

## Plot the data and the unobserved latent function
fig = plt.figure(figsize=(12,5)); ax = fig.gca()
ax.plot(x, f1, "dodgerblue", lw=3, label="True f1");
ax.plot(x, f2, "tomato", lw=3, label="True f2");
ax.plot(x, y1, 'ok', ms=3, alpha=0.5, label="Data 1");
ax.plot(x, y2, 'ok', ms=3, alpha=0.5, label="Data 2");
ax.set_xlabel("x"); ax.set_ylabel("The true f(x)"); plt.legend();
``````

#### Format the data for the coregionalized GP

``````xx = np.concatenate((x, x), axis=0)
idx = np.concatenate((np.zeros(n), np.ones(n)))[:,None]
X = np.concatenate((xx, idx), axis=1)

y = np.concatenate((y1, y2))
X.shape, y.shape
``````

#### pymc3 model

``````with pm.Model() as model:
eta = pm.Gamma("eta", alpha=2, beta=0.5)
ell = pm.Gamma("ell", alpha=2, beta=0.5)
cov = eta**2 * pm.gp.cov.ExpQuad(1, ls=ell, active_dims=[0])

W = pm.Normal("W", mu=0, sd=3, shape=(2,2), testval=np.random.randn(2,2))
kappa = pm.Gamma("kappa", alpha=1.5, beta=1, shape=2)
coreg = pm.gp.cov.Coregion(input_dim=2, active_dims=[1], kappa=kappa, W=W)
cov_func = coreg * cov

sigma = pm.HalfNormal("sigma", sd=3)
gp = pm.gp.Marginal(cov_func=cov_func)
y_ = gp.marginal_likelihood("f", X, y, noise=sigma)
``````

#### MAP estimate and generate some predictions

``````with model:
mp = pm.find_MAP()

x_new = np.linspace(0, 20, 200)[:,None]
xx_new = np.concatenate((x_new, x_new), axis=0)
idx2 = np.concatenate((np.zeros(200), np.ones(200)))[:,None]
X_new = np.concatenate((xx_new, idx2), axis=1)

with model:
f_pred = gp.conditional("f_pred", X_new)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with model:
pred_samples = pm.sample_ppc([mp], vars=[f_pred], samples=300)
``````

#### plot

``````from pymc3.gp.util import plot_gp_dist
fig = plt.figure(figsize=(12,5)); ax = fig.gca()

# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist

f_pred = pred_samples["f_pred"]
plot_gp_dist(ax, f_pred[:, :200], X_new[:200,0], palette="Blues", fill_alpha=0.1, samples_alpha=0.1);
plot_gp_dist(ax, f_pred[:, 200:], X_new[200:,0], palette="Reds", fill_alpha=0.1, samples_alpha=0.1);
``````

5 Likes