I’m sorry, you’re totally right, I forgot that the file is not in the repo but I saved it after running these lines from the R code:
library(spatstat)
library(rstan)
rstan_options(auto_write = T)
options(mc.cores = parallel::detectCores())
source("utils.r")
load("WashingtonDC-agls.rdata")
library(docopt)
'Usage:
model-mismatch.r [--start start] [--end end] [--bwSpace bwSpace] [--bwTime bwTime] [--nonseparable] [--duplicates] [--model model]
Options:
--start start [default: 2010-01-01]
--end end [default: 2010-01-31]
--bwSpace bw-space [default: 1.609]
--nonseparable
--duplicates
--bwTime bw-time [default: 14]
--model model [default: hawkes-model.stan]
]' -> doc
opts <- docopt(doc)
start_date = as.Date(opts$start)
end_date = as.Date(opts$end)
bw_space = as.numeric(opts$bwSpace)
bw_time = as.numeric(opts$bwTime)
separable = !opts$nonseparable
remove.duplicates = !opts$duplicates
## Preprocess the data based on the command line arguments
keep = which(data$date >= start_date & data$date < end_date & data$holiday == F)
xyt = xyt[keep,]
xy = subset(xyt, select = c("X","Y")) #= xy[keep,]
nrow(xyt)
if(remove.duplicates) {
time_dist = dist(xyt[,3])
space_dist = dist(xyt[,1:2])
library(igraph)
ig = graph.adjacency(as.matrix(space_dist) < .1 & as.matrix(time_dist) <= .5 & lower.tri(space_dist))
n.orig = nrow(xyt)
clust = clusters(ig)$membership
keep = !duplicated(clust)
xyt = xyt[keep,]
xy = xy[keep,]
}
xyt$T = xyt$T / 24
### Then I saved:
write.csv(xyt, "preprocessed_xyt.csv")
Here’s the output file:
preprocessed_xyt.csv (9.0 KB)
Thank you for spotting the typo! I always have great difficulty with typos.
Just in case I rerun with the typo fixed, but the problem persists (as expected).