I’m trying to implement a BART model with a CAR prior as a spatial model. I’ve filtered a larger dataset to ~1600 records to test it. I’m working off this example Conditional Autoregressive (CAR) Models for Spatial Data — PyMC example gallery and running in a clean environment on an HPC node. The final model will ideally also have a hierarchical structure because I have two levels of spatial data. I’m testing that part and posted a question in response to an existing topic here.
My model works with the aspatial model (runtime ~6 minutes)
with pm.Model() as independent_model:
sigma = pm.HalfCauchy("sigma", beta=10)
beta0 = pm.Normal("beta0", mu=0.0, tau=1.0e-5)
beta1 = pm.Normal("beta1", mu=0.0, tau=1.0e-5)
# variance parameter of the independent random effect
tau_ind = pm.Gamma("tau_ind", alpha=3.2761, beta=1.81)
# independent random effect
theta = pm.Normal("theta", mu=0, tau=tau_ind)
# mean of the likelihood
mu = pm.Deterministic("mu", pt.exp(beta0 + beta1 * X + theta))
# likelihood of the observed data
y_i = pm.Gamma("y_i", mu=mu, sigma=sigma, observed=y)
# saving the residual between the observation and the response for the CBG
res = pm.Deterministic("res", y - y_i)
# sampling the model
independent_idata = pm.sample(2000, tune=2000)
It seems to run okay with the spatial/CAR model with no BART (runtime ~ 2 hours with some updates to the priors and checking that data is scaled well)
n_obs = epa.shape[0]
with pm.Model() as car_model:
sigma = pm.HalfCauchy("sigma", beta=10)
beta0 = pm.Normal("beta0", mu=0.0, sigma=10)
beta1 = pm.Normal("beta1", mu=0.0, sigma=10)
# variance parameter of the independent random effect
tau_ind = pm.Gamma("tau_ind", alpha=3.2761, beta=1.81)
# variance parameter of the spatially dependent random effects
tau_spat = pm.Gamma("tau_spat", alpha=10.0, beta=1.0)
# prior for alpha
alpha = pm.Beta("alpha", alpha=2, beta=2)
# area-specific model parameters
# independent random effect
theta = pm.Normal("theta", mu=0, tau=tau_ind)
# spatially dependent random effect
phi = pm.CAR("phi", mu=np.zeros(n_obs), tau=tau_spat, alpha=alpha, W=adj_matrix)
# mean of the likelihood
mu = pm.Deterministic("mu", pt.exp(beta0 + beta1 * X + theta + phi))
# likelihood of the observed data
y_i = pm.Gamma("y_i", mu=mu, sigma=sigma, observed=y)
# sampling the model
car_idata = pm.sample(200, tune=200)
However, I don’t quite have the BART implementation working. I’ve tried two sampling approaches. First, using nutpie giving
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Failed in nopython mode pipeline (step: native lowering)
<numba.core.base.OverloadSelector object at 0x147b9f48a8a0>, (csr_matrix,)
During: lowering "$454load_global.65 = global(sparse_constant: <Compressed Sparse Row sparse matrix of dtype 'float64'
with 8754 stored elements and shape (1633, 1633)>
.My understanding of the error is that numba doesn’t always play well with sparse matrices. The matrix isn’t huge in this example, but it is much larger for the full dataset.
Second, using the standard sampling gives me:
File ~/.conda/envs/pymc_env/lib/python3.12/site-packages/pymc/sampling/mcmc.py:713, in sample(draws, tune, chains, cores, random_seed, progressbar, progressbar_theme, step, var_names, nuts_sampler, initvals, init, jitter_max_retries, n_init, trace, discard_tuned_samples, compute_convergence_checks, keep_warning_stat, return_inferencedata, idata_kwargs, nuts_sampler_kwargs, callback, mp_ctx, blas_cores, model, **kwargs)
710 auto_nuts_init = False
712 initial_points = None
--> 713 step = assign_step_methods(model, step, methods=pm.STEP_METHODS, step_kwargs=kwargs)
715 if nuts_sampler != "pymc":
716 if not isinstance(step, NUTS):
File ~/.conda/envs/pymc_env/lib/python3.12/site-packages/pymc/sampling/mcmc.py:237, in assign_step_methods(model, step, methods, step_kwargs)
229 selected = max(
230 methods_list,
231 key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence( # type: ignore
232 var, has_gradient
233 ),
234 )
235 selected_steps.setdefault(selected, []).append(var)
--> 237 return instantiate_steppers(model, steps, selected_steps, step_kwargs)
File ~/.conda/envs/pymc_env/lib/python3.12/site-packages/pymc/sampling/mcmc.py:138, in instantiate_steppers(model, steps, selected_steps, step_kwargs)
136 args = step_kwargs.get(name, {})
137 used_keys.add(name)
--> 138 step = step_class(vars=vars, model=model, **args)
139 steps.append(step)
141 unused_args = set(step_kwargs).difference(used_keys)
File ~/.conda/envs/pymc_env/lib/python3.12/site-packages/pymc_bart/pgbart.py:156, in PGBART.__init__(self, vars, num_particles, batch, model)
153 self.leaves_shape = self.shape if not self.bart.separate_trees else 1
155 if self.bart.split_prior.size == 0:
--> 156 self.alpha_vec = np.ones(self.X.shape[1])
157 else:
158 self.alpha_vec = self.bart.split_prior
IndexError: tuple index out of range
This is the model with BART:
with pm.Model() as bart_car_model:
# beta, variance parameter of the independent random effect. beta0 incorporated with theta
w = pmb.BART("w", X=X, Y=y, m=10, shape=(3, n_obs))
# variance parameter of the spatially dependent random effects
tau_spat = pm.Gamma("tau_spat", alpha=1.0, beta=1.0)
# prior for alpha
alpha = pm.Beta("alpha", alpha=2, beta=2)
# area-specific model parameters
# spatially dependent random effect
phi = pm.CAR("phi", mu=np.zeros(n_obs), tau=tau_spat, alpha=alpha, W=adj_matrix)
# mean of the likelihood
mu = pm.Deterministic("mu", pt.exp(w[0] + w[1] * X + phi))
# likelihood of the observed data
y_i = pm.Gamma("y_i", mu=mu, sigma=w[2], observed=y)
car_idata_bart = pm.sample()