Hi all. I’ve been struggling for a while trying to adapt this model (GitHub - flaxter/gunshot-contagion: Replication files for Loeffler and Flaxman, "Is gun violence contagious?") to PyMC. The Stan code in that repo works very well:
functions {
real gaussian(real x, real lengthscale) {
return(1/(2*pi())*exp(-.5 * x^2/lengthscale^2)/lengthscale^2);
}
}
data {
int<lower=1> n;
int hour[n];
matrix[n,2] Space;
vector[n] Time; // NB: these must be sorted from smallest to largest!
real time_window;
real space_window;
vector[n] muST;
real muSTintegral;
}
transformed data {
matrix[n,n] timeD;
matrix[n,n] spaceD;
vector[n] timeD2;
for(i in 1:n) {
for(j in 1:n) {
timeD[i,j] = -(Time[i] - Time[j]);
spaceD[i,j] = distance(Space[i], Space[j]);
}
timeD2[i] = -(time_window - Time[i]);
}
}
parameters {
real<lower=0> lengthscaleS;
real<lower=0> lengthscaleT;
real<lower=0> a;
real<lower=0> mu;
real<lower=0> mu0;
}
transformed parameters {
vector[n] ll;
real lp;
ll = mu0 + muST * mu;
for(i in 2:n) { // todo: vectorize these calculations
for(j in 1:(i-1)) {
ll[i] = ll[i] + (a * lengthscaleT * exp(timeD[i,j] * lengthscaleT) * gaussian(spaceD[i,j],lengthscaleS));
}
}
lp = sum(log(ll)) - muSTintegral * mu - mu0 * space_window * time_window +
a * (sum(exp(timeD2*lengthscaleT)) - n);
}
model {
increment_log_prob(lp);
lengthscaleS ~ normal(0,10); #sigma
lengthscaleT ~ normal(0,10); #omega
a ~ normal(0,10); #theta
mu0 ~ normal(0,1); #m0
mu ~ normal(0,1);
}
generated quantities {
vector[n] background;
real lengthscale_minutes;
lengthscale_minutes = 24*60/lengthscaleT;
background = (mu0 + muST *mu ) ./ ll;
}
Running in RStan as in the repo, gives this output:
Inference for Stan model: anon_model.
4 chains, each with iter=1000; warmup=500; thin=1;
post-warmup draws per chain=500, total post-warmup draws=2000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
lengthscaleS 0.12 0.0 0.03 0.08 0.10 0.12 0.14 0.19 1750 1
lengthscaleT 24.00 0.1 5.05 14.96 20.38 23.83 27.17 34.92 2791 1
a 0.10 0.0 0.02 0.06 0.09 0.10 0.12 0.15 2165 1
mu 0.90 0.0 0.06 0.78 0.86 0.90 0.94 1.02 2068 1
However, with my clumsy attempt using PyMC I get this non-converging output:
T = xyt['T'].values - min(xyt['T'].values) #I don't quite understand the reason for this
timeD = np.array([T - T[i] for i in range(T.shape[0])])
timeD2 = time_window - T
#Gaussian kernel function
def gk(x, ls):
return (0.5*np.pi)*at.exp((-0.5 * x**2)/ls**2)/ls**2
with pm.Model() as mod:
lengthscaleS = pm.TruncatedNormal("sigma", 0, 10, lower=0) #length scale space
lengthscaleT = pm.TruncatedNormal("omega", 0, 10, lower=0) #length scale time
a = pm.TruncatedNormal("theta", 0, 10, lower=0) #excitation parameter
gs = gk(spaceD, lengthscaleS) #gaussian kernel
mu0 = pm.TruncatedNormal("mu0", 0, 1, lower=0)
mu = pm.TruncatedNormal("mu", 0, 1, lower=0)
lam0 = mu0 + mu*muhat
lam = lam0[1:] + (a * lengthscaleT * at.exp(-timeD[1:,:250] * lengthscaleT)) * gs[1:,:250]
muSTintegral = len(xyt)
def lp(name, lam, a, lengthscaleT, mu, mu0, time, integral):
l = [at.sum(at.log(lam)) - integral * mu - mu0 * space_window * time_window +
a * at.sum(at.exp(-time*lengthscaleT)) - integral]
return l[0]
y = pm.Potential("y", lp("ylp", lam, a, lengthscaleT, mu, mu0, timeD2, muSTintegral))
with mod:
idata = pm.sample(1000, tune=1000, chains=4, cores=12)#, target_accept=0.95)
pos = idata.stack(sample = ['chain', 'draw']).posterior
summ = az.summary(idata, var_names=["theta", "mu", "omega", "sigma"])
print(summ)
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 12 jobs)
NUTS: [lengthscaleS, lengthscaleT, a, mu0, mu]
|██████████| 100.00% [8000/8000 05:01<00:00 Sampling 4 chains, 113 divergences]Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 326 seconds.
mean sd hdi_3% hdi_97% ... mcse_sd ess_bulk ess_tail r_hat
lengthscaleS 1.190 0.521 0.557 1.905 ... 0.198 5.0 15.0 3.19
lengthscaleT 6.691 1.820 3.847 10.146 ... 0.649 5.0 21.0 2.50
a 0.765 0.307 0.401 1.241 ... 0.117 5.0 24.0 2.84
mu 1.512 0.810 0.431 2.466 ... 0.309 5.0 16.0 3.08
[4 rows x 9 columns]
I have also attempted a version of the model looping over the parameters, as in the Stan code, like this:
with pm.Model() as mod:
lengthscaleS = pm.TruncatedNormal("lengthscaleS", 0, 10, lower=0) #length scale space
lengthscaleT = pm.TruncatedNormal("lengthscaleT", 0, 10, lower=0) #length scale time
a = pm.TruncatedNormal("a", 0, 10, lower=0) #excitation parameter
gs = gk(spaceD, lengthscaleS) #gaussian kernel
mu0 = pm.TruncatedNormal("mu0", 0, 1, lower=0)
mu = pm.TruncatedNormal("mu", 0, 1, lower=0)
lam0 = mu0 + mu*muhat
lam = at.zeros(n-1)
for i in tqdm(range(1,n)):
ll = at.zeros(n-1)
for j in range(i-1):
l = lam0[i] + (a * lengthscaleT * at.exp(-timeD[i,j] * lengthscaleT )) * gk(spaceD[i,j], lengthscaleS)
ll = at.inc_subtensor(ll[i], l)
lam = at.inc_subtensor(lam[i], at.sum(ll))
muSTintegral = len(xyt)
def lp(name, lam, a, lengthscaleT, mu, mu0, time, integral):
l = [at.sum(at.log(lam)) - integral * mu - mu0 * space_window * time_window +
a * at.sum(at.exp(-time*lengthscaleT)) - integral]
return l[0]
y = pm.Potential("y", lp("ylp", lam, a, lengthscaleT, mu, mu0, timeD2, muSTintegral))
However the model doesn’t start sampling, as pytensor seems to get stuck with the evaluation of the lam parameter, for some reason I don’t understand. When I try lam.eval() it also gets stuck.
All in all, I must be doing something very wrongly, but I cannot figure out what.
I will be very thankful for any help or advice on how to properly translate the Stan model to PyMC. Many thanks in advance.