Sparse GP implementation in GP examples gallery

Hi All,

I have been going through the GPs examples in Heteroscedastic Gaussian Processess

I would like very much to get the Sparse GP working in PyMC5.

In the section “Sparse Heteroskedastic GP” a class SparseLatent, using a method

def prior(self, name, X, Xu):
        Kuu = self.cov(Xu)
        self.L = pm.gp.util.cholesky(pm.gp.util.stabilize(Kuu))

Trying the code I get the error

Indeed, if I check the source code in gp.util source code there is no cholesky method anymore.

Is there a Sparse GP implementation example elsewhere?
I could not find much other than this example, using PyMC3.

I made some PyMC5 related changes, but I get the error

AttributeError: 'TensorVariable' object has no attribute 'endswith'

The code I am using is, as close as possible to the original PyMC3 link

import matplotlib.pyplot as plt
import numpy as np
import pymc as pm


# set the seed
np.random.seed(1)

n = 2000  # The number of data points
X = 10 * np.sort(np.random.rand(n))[:, None]

# Define the true covariance function and its parameters
ℓ_true = 1.0
η_true = 3.0
cov_func = η_true**2 * pm.gp.cov.Matern52(1, ℓ_true)

# A mean function that is zero everywhere
mean_func = pm.gp.mean.Zero()

# The latent function values are one sample from a multivariate normal
# Note that we have to call `eval()` because PyMC3 built on top of Theano
f_true = np.random.multivariate_normal(
    mean_func(X).eval(), cov_func(X).eval() + 1e-8 * np.eye(n), 1
).flatten()

# The observed data is the latent function plus a small amount of IID Gaussian noise
# The standard deviation of the noise is `sigma`
σ_true = 2.0
y = f_true + σ_true * np.random.randn(n)

with pm.Model() as model:
    ℓ = pm.Gamma("ℓ", alpha=2, beta=1)
    η = pm.HalfCauchy("η", beta=5)

    cov = η**2 * pm.gp.cov.Matern52(1, ℓ)
    gp = pm.gp.MarginalSparse(cov_func=cov, approx="FITC")

    # initialize 20 inducing points with K-means
    # gp.util
    Xu = pm.gp.util.kmeans_inducing_points(20, X)

    σ = pm.HalfCauchy("σ", beta=5)
    y_ = gp.marginal_likelihood("y", X=X, Xu=Xu, y=y, noise=σ)

    trace = pm.sample(1000, chains = 1)
X_new = np.linspace(-1, 11, 200)[:, None]

# add the GP conditional to the model, given the new X values
with model:
    f_pred = gp.conditional("f_pred", X_new)

# To use the MAP values, you can just replace the trace with a length-1 list with `mp`
with model:
    pred_samples = pm.sample_posterior_predictive(trace, var_names=[f_pred])

and the last line triggers the error

Thank you

If you just need a cholesky decompositon, use:

import pytensor.tensor as pt

def prior(self, name, X, 
    Kuu = self.cov(Xu)
    self.L = pt.linalg.cholesky(pm.gp.util.stabilize(Kuu))

Thank you very much, I looked at pytensor and it now works, with replacements of the both the prior and conditional methods.

I attached the class below and the code for sampling, should it ever be useful to anybody

import pytensor.tensor as pt

class SparseLatent:
    def __init__(self, cov_func):
        self.cov = cov_func

    def prior(self, name, X, Xu): 
        Kuu = self.cov(Xu)
        self.L = pt.linalg.cholesky(pm.gp.util.stabilize(Kuu)) 

    # def prior(self, name, X, Xu):
    #     Kuu = self.cov(Xu)
    #     self.L = pm.gp.util.cholesky(pm.gp.util.stabilize(Kuu))

        self.v = pm.Normal(f"u_rotated_{name}", mu=0.0, sigma=1.0, shape=len(Xu))
        self.u = pm.Deterministic(f"u_{name}", pt.dot(self.L, self.v))

        Kfu = self.cov(X, Xu)
        # self.Kuiu = tt.slinalg.solve_upper_triangular(
        #     self.L.T, tt.slinalg.solve_lower_triangular(self.L, self.u)
        # )
        self.Kuiu = pt.slinalg.solve_triangular(
            self.L.T, pt.slinalg.solve_triangular(self.L, self.u, lower = True),
            lower = False
        )
        self.mu = pm.Deterministic(f"mu_{name}", pt.dot(Kfu, self.Kuiu))
        return self.mu

    def conditional(self, name, Xnew, Xu):
        Ksu = self.cov(Xnew, Xu)
        mus = pt.dot(Ksu, self.Kuiu)
        tmp = pt.slinalg.solve_triangular(self.L, Ksu.T, lower = True)
        Qss = pt.dot(tmp.T, tmp)  # Qss = tt.dot(tt.dot(Ksu, tt.nlinalg.pinv(Kuu)), Ksu.T)
        Kss = self.cov(Xnew)
        #Lss = pm.gp.util.cholesky(pm.gp.util.stabilize(Kss - Qss))
        Lss = pt.linalg.cholesky(pm.gp.util.stabilize(Kss - Qss)) 
        mu_pred = pm.MvNormal(name, mu=mus, chol=Lss, shape=len(Xnew))
        return mu_pred

# Explicitly specify inducing points by downsampling our input vector
Xu = X[1::2]

with pm.Model() as model_hts:
    ℓ = pm.InverseGamma("ℓ", mu=ℓ_μ, sigma=ℓ_σ)
    η = pm.Gamma("η", alpha=2, beta=1)
    cov = η**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=ℓ)

    μ_gp = SparseLatent(cov)
    #μ_f = μ_gp.prior("μ", X_obs, Xu)
    μ_f = μ_gp.prior("μ", X_obs, Xu)

    σ_ℓ = pm.InverseGamma("σ_ℓ", mu=ℓ_μ, sigma=ℓ_σ)
    σ_η = pm.Gamma("σ_η", alpha=2, beta=1)
    σ_cov = σ_η**2 * pm.gp.cov.ExpQuad(input_dim=1, ls=σ_ℓ)

    lg_σ_gp = SparseLatent(σ_cov)
    lg_σ_f = lg_σ_gp.prior("lg_σ_f", X_obs, Xu)
    σ_f = pm.Deterministic("σ_f", pm.math.exp(lg_σ_f))

    lik_hts = pm.Normal("lik_hts", mu=μ_f, sigma=σ_f, observed=y_obs_)
    trace_hts = pm.sample(target_accept=0.95, return_inferencedata=True, random_seed=SEED, chains = 1)

with model_hts:
    μ_pred = μ_gp.conditional("μ_pred", Xnew, Xu)
    lg_σ_pred = lg_σ_gp.conditional("lg_σ_pred", Xnew, Xu)
    samples_hts = pm.sample_posterior_predictive(trace_hts, var_names=["μ_pred", "lg_σ_pred"])
1 Like

If this is broken in the examples, a PR would be very welcome :slight_smile: Even just opening an issue in the pymc-examples repo would be great

I will certainly do so, thanks again much appreciated

1 Like