I am no completely sure what is the implementation behind the kernel regression in sklearn, but I manage to make your example work:
# bayesian rbf kernel
with pm.Model() as model:
# hyper-prior for ls
ls = pm.Beta("ls", alpha=1e-2, beta=1e-4)
cov_func = pm.gp.cov.ExpQuad(X.shape[1], ls=ls)
# Specify the GP.
gp = pm.gp.Marginal(cov_func=cov_func)
# Place a GP prior over the function f.
sigma = pm.HalfNormal("sigma", 1)
y_ = gp.marginal_likelihood("y", X=X, y=y, noise=sigma)
# inference
map1 = pm.find_MAP()
pred = gp.predict(X_new, point=map1)