# Prediction with Latent GP using 2D kernel definition

Trying to predict new values using `.conditional` of a Latent GP using my own 2D kernel definition results in a incompatible shape error (MWE at the end).

In a nutshell, I’m creating a 2 dimensional (spatial) Gaussian process model using Haversine instead of Euclidian distance. My kernel definition is as follows:

``````
class Matern32Haversine(pm.gp.cov.Stationary):
def __init__(self, input_dims, ls, r=6378.137, active_dims=None):
if input_dims != 2:
raise ValueError("Great circle distance is only defined on 2 dimensions")
super().__init__(input_dims, ls=ls, active_dims=active_dims)
self.r = r

def great_circle_distance(self, X, Xs=None):
if Xs is None:
Xs = X

# Assume first column is longitude and second is latitude

# Elementwise differnce of lats and lons
dlat = lat2[:, None] - lat1
dlon = lon2[:, None] - lon1

# Compute haversine
d = (
pt.sin(dlat / 2) ** 2
+ pt.cos(lat1[:, None]) * pt.cos(lat2) * pt.sin(dlon / 2) ** 2
)
return self.r * 2 * pt.arcsin(pt.sqrt(d)) + 1e-12

def full(self, X, Xs=None):
X, Xs = self._slice(X, Xs)
r = self.great_circle_distance(X, Xs)
return (1.0 + np.sqrt(3.0) * r / self.ls) * pt.exp(-np.sqrt(3.0) * r / self.ls)
``````

In the following a minimal working example to reproduce the error:

``````
lonlats = np.array(
[
[15.0, 56.0],
[16.0, 56.5],
[17.0, 57.0],
[14.0, 57.5],
[12.0, 58.0],
[15.0, 58.5],
]
)
lonlats_new = np.array(
[
[17.5, 56.25],
[15.5, 58.75],
]
)

counts = np.array([2, 3, 4, 5, 2, 2, 1, 3, 3])
idx = np.array([0, 1, 2, 3, 4, 5, 0, 1, 2])

with pm.Model() as model:
_idx = pm.ConstantData("idx", idx)
_X = pm.ConstantData("X", lonlats)

# Covariance function for the GP
rho_1 = pm.InverseGamma("rho_1", alpha=3, beta=100)
eta2_1 = pm.Exponential("eta2_1", lam=1.5)
cov_func = eta2_1 * Matern32Haversine(2, ls=rho_1) + pm.gp.cov.WhiteNoise(1e-4)
# cov_func = eta2_1 * pm.gp.cov.Matern32(1, ls=rho_1) + pm.gp.cov.WhiteNoise(1e-4)

# Latent GP
gp = pm.gp.Latent(cov_func=cov_func)
log_mu = gp.prior("log_mu", _X)
mu = pm.Deterministic("mu", pt.exp(log_mu[_idx]))

observed = pm.NegativeBinomial(
"TA",
mu=mu,
alpha=2,
observed=counts,
)

with model:
idata = pm.sample(
500,
tune=4000,
target_accept=0.9,
nuts_sampler="numpyro",
)

with model:
cond_log_mu = gp.conditional("cond_log_mu", lonlats_new, jitter=1e-4)
pp = pm.sample_posterior_predictive(idata, var_names=["cond_log_mu"], random_seed=42)
``````

The `gp.conditional` call results in the following error:

``````ValueError: Incompatible Elemwise input shapes [(2, 2), (6, 2)]

``````

I guess I’m missing some additionally required implementation in my kernel definition? Anyone can tell me what I’m missing? Thanks a lot in advance!

1 Like

The problem is incorrectly implemented broadcasting within the great circle distance which should be computed as follow:

``````    def great_circle_distance(self, X, Xs=None):
if Xs is None:
Xs = X

# Assume first column is longitude and second is latitude

# Reshape lon/lat into 2D
lat1 = lat1_[:, None]
lon1 = lon1_[:, None]

# Elementwise differnce of lats and lons
dlat = lat2_ - lat1
dlon = lon2_ - lon1

# Compute haversine
d = (
pt.sin(dlat / 2) ** 2 + pt.cos(lat1) * pt.cos(lat2_) * pt.sin(dlon / 2) ** 2
)
return self.r * 2 * pt.arcsin(pt.sqrt(d)) + 1e-12
``````
3 Likes