How to make test set prediction with BART. Below is the code used.
import theano
import pymc3 as pm
from sklearn.datasets import load_boston*
from sklearn.model_selection import train_test_split*
from sklearn.metrics import mean_squared_error*
import numpy as np *
import arviz as az*
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'], test_size=0.2)
X_train_shared = theano.shared(X_train)
y_train_shared = theano.shared(y_train)
with pm.Model() as model:
σ = pm.HalfNormal('σ', 1)
μ = pm.BART('μ', X_train_shared.get_value(), y_train_shared.get_value(), m=65)
y = pm.Normal('y', μ, σ, observed=y_train)
trace = pm.sample(2000, return_inferencedata=False)
ppc_train = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
pred_train = ppc_train['y'].mean(axis=0)
std_train = ppc_train['y'].std(axis=0)
So far so good. But when I try to use the model to make prediction on the test data, the returned prediction vector has the same length as the train data.
with model:
X_train_shared.set_value(X_test)
y_train_shared.set_value(y_test)
ppc_test = pm.sample_posterior_predictive(trace, samples=500, progressbar=False)
pred_test = ppc_test['y'].mean(axis=0)
std_test = ppc_test['y'].std(axis=0)
Any help to solve this problem would be greatly appreciated.