I wanted to learn inducing points by optimising the evidence lower bound in Titsias, (2009).
I guess, setting a flat prior on Xu and doing MAP is as close as one can get to that?
Xu_init = 10 * np.random.rand(20)
with pm.Model() as model:
ℓ = pm.Gamma("ℓ", alpha=2, beta=1)
η = pm.HalfCauchy("η", beta=5)
cov = η ** 2 * pm.gp.cov.Matern52(1, ℓ)
gp = pm.gp.MarginalSparse(cov_func=cov, approx="VFE")
# set flat prior for Xu
Xu = pm.Flat("Xu", shape=20, testval=Xu_init)
σ = pm.HalfCauchy("σ", beta=5)
y_ = gp.marginal_likelihood("y", X=X, Xu=Xu[:, None], y=y, noise=σ)
mp = pm.find_MAP()