Hi,
I am trying to replicate the latent Gaussian process based on doc: Latent Variable Implementation — PyMC3 3.11.4 documentation.
The main question I have is why my prediction results (based on sampling) are very different from the dataset.
This is my generated dataset
This is my gaussian process prediction
The full code is implemented in colab.
#define the model
gp_model = pm.Model()
#define the GP prior
with gp_model:
# Specify the covariance function.
l = pm.Gamma("l", alpha=2, beta=1)
cov_func = pm.gp.cov.Matern52(1, ls=l)
# Specify the GP. The default mean function is `Zero`.
gp = pm.gp.Latent(cov_func=cov_func)
# Place a GP prior over the function f.
f = gp.prior("f", X=x[:,None])
#define the likelihood
with gp_model:
#specify the noise sigma
sigma = pm.HalfCauchy("sigma", beta=5)
#specify the likelihood
y_observed = pm.Normal("y_observed", mu=f, sigma=sigma, observed=y[:,None])
#make inference based on MCMC
with gp_model:
#specify the sampler
step = pm.HamiltonianMC()
#sample the posterior distribution
trace = pm.sample(2000, step=step, return_inferencedata=False)
#buring and thining trace
BURNING = 1000
THINING = 2
trace = trace[BURNING::THINING]
with gp_model:
pm.plot_trace(trace, figsize=(5, 5))
![image|507x500](upload://rO8S9IKoXCeTgInWHoqQNZGix4W.jpeg)
x_star = np.linspace(0, xmax, 100)[:, None]
with gp_model:
f_star = gp.conditional("f_star", x_star)
with gp_model:
prediction = pm.sample_posterior_predictive(trace, var_names=['f_star'])
plt.scatter(x, y)
plt.plot(x_star, prediction['f_star'].mean(axis=0))
plt.errorbar(x_star, prediction['f_star'].mean(axis=0), np.sqrt(prediction['f_star'].var(axis=0)))
Any help is appreciated!!