Hello all,
Im trying to implement a spike and slab prior for the beta coefficients in logistic regression.
This works, but is very slow:
with pm.Model() as uniform_model:
alfa = pm.Normal(‘alfa’, mu=0, sd=3)
beta = beta_spike_slab(X.shape[1], 0.001) #spike/slab
X_shared = pm.Data(‘x_shared’, X)
mu = alfa + pm.math.dot(X_shared,beta)
θ = pm.Deterministic('θ', 1 / (1 + pm.math.exp(-mu)))
y_pred = pm.Bernoulli('y_pred', p=θ, observed=y)
trace = pm.sample(1000,tune=1000, cores=1, target_accept=0.99)
def beta_spike_slab(shape,spike):
inclusion_prop = 0.05
beta_spike = pm.Normal(‘beta_spike’, 0, spike, shape=shape)
tau2 = pm.HalfNormal('tau2', 10, shape=shape)
beta_tilde = pm.Normal('beta_tilde', 0 ,1, shape=shape)
beta_slab = pm.Deterministic('beta_slab', 0 + tau2 * beta_tilde)
gamma = pm.Bernoulli('gamma', inclusion_prop, shape=shape)
beta_spike_slab = pm.Deterministic('beta_spike_slab',(beta_spike * (1-gamma)) + ((beta_slab * gamma)))
return beta_spike_slab
I understand that it is slow, because you need to marginalize over the bernoulli distribution (I read some topics on here and the docs https://docs.pymc.io/notebooks/marginalized_gaussian_mixture_model.html, which were pretty clear I thought).
I tried:
def beta_spike_slab(shape, spike):
spike_prior = pm.Normal.dist(mu=0, sigma=0.001)
slab_prior = pm.Normal.dist(mu=0, sigma=10)w = pm.Beta("w", alpha=0.5, beta=0.5, shape=shape) weights = pm.Deterministic("weights", pm.math.stack([1.-w, w], axis=1)) beta_spike_slab = pm.Mixture("beta_spike_slab", w=weights, comp_dists=[spike_prior, slab_prior], shape=shape) return beta_spike_slab
…but I keep getting divergences, non convergence etc etc.
I tried varying the sigma of the normal distributions, but no luck.
What am I doing wrong?
Many thanks!