I got this working with Metropolis sampling in the end. I changed my priors to make them more ‘informative’ after reading Robust Gaussian Processes in Stan, and somehow this solved the broadcasting issue.
I also realised that it is not necessary to define the conditional distribution, as the prediction function calls this internally. That speeds things up a bit. Updated code below:
np.random.seed(1)
data = np.random.rand(60,60,30)
X = np.random.rand(30,100)
Xnew = np.random.rand(1,100)
shared_data = theano.shared(data[0,0,:], borrow=True)
with pm.Model() as model:
ℓ_alpha = pm.HalfNormal('ℓ_alpha', sd=3)
ℓ_beta = pm.HalfNormal('ℓ_beta', sd=4e-7)
σf_sd = pm.HalfNormal('σf_sd', sd=0.1)
σn_sd = pm.HalfNormal('σn_sd', sd=0.01)
ℓ = pm.InverseGamma('ℓ', alpha=ℓ_alpha, beta=ℓ_beta)
σf = pm.HalfNormal('σf', sd=σf_sd)
σn = pm.HalfNormal('σn', sd=σn_sd)
μprior = pm.gp.mean.Zero()
Σprior = σf * pm.gp.cov.ExpQuad(input_dim=X.shape[1],active_dims=np.arange(X.shape[1]),ls=ℓ)
# GP function
gp = pm.gp.Marginal(mean_func=μprior, cov_func=Σprior)
y_ = gp.marginal_likelihood('y_',X=X, y=shared_data, noise=σn)
for i in range(60):
for j in range(60): #dimensions of the 'data' matrix above
ID = i*60 + j
shared_data.set_value(data[i,j,:])
with model:
db = pm.backends.Text('./backends/'+str(ID))
pm.sample(draws=1000, chains=5, step=pm.Metropolis(), random_seed=1, progressbar=False, trace=db)
trace = pm.backends.text.load('/backends/'+str(ID))
point = {}
for obj in trace[-1]:
array = np.asarray([np.nanmean(np.nanmean(trace.get_values(str(obj), burn=1000//2, combine=False),axis=1))])
point[str(obj)] = array
μpost, Σpost = gp.predict(Xnew, point=point, pred_noise=True)