So using the code below, I am trying to model fire truck arrival times. As you can see from the loo-pit graphic, my model appears overdispersed so I am unsure what do do in response. Any guidance would be appreciated. I have also included the trace outputs as well.
model_data = incident_data[['service_time', 'alarms_per_hour', 'travel_time']].copy(deep=True)
st_max = model_data['service_time'].max() + (1/3600) #data is in hours
model_data['service_time'] = (model_data['service_time'] + (1/3600))/ st_max #normalize
tt_max = model_data['travel_time'].max() + (1/60) #data is in seconds
model_data['travel_time'] = (model_data['travel_time'] + (1/60))/ tt_max #normalize
coords = {
'alarms_per_hour': model_data['alarms_per_hour'], #this variable is the number of alarms that occured in the same hour as the incident whose service and travle time are noted for a particualr row in the data
'service_time': model_data['service_time'],
'travel_time': model_data['travel_time']
}
third_model = pm.Model(coords=coords)
with third_model:
#Intervals
interval_alpha = pm.distributions.transforms.Interval(lower=0, upper=10) #need to justify this
interval_beta = pm.distributions.transforms.Interval(lower=1, upper=3.5) #need to justify this
interval_gamma = pm.distributions.transforms.Interval(lower=.1, upper=.9) #assuming this to be a fractional exponent, .1 to .9 is a reasonable
#constants
A = pm.Data('A', 61.13) #miles square
n = pm.Data('n', 33) #number of engine companies
# Priors for unknown model parameters
α = pm.Normal('α', transform =interval_alpha) #pm.Uniform('α',0,50) #pm.HalfNormal('α',sigma=100) #not a lot known about this varibale as it would requier experimental data 0
β = pm.Normal('β',transform =interval_beta) #pm.Uniform('β',1,10) #pm.HalfNormal('β',sigma=100) #not a lot known about this varibale as it would requier experimental data 2.2
γ = pm.Normal('gamma',transform =interval_gamma) # has to be a fractional exponent according to Kolesar [1975] .3
λ = pm.Gamma('λ', alpha=1, beta=1)
alarms_obs = pm.Poisson('alarms_obs', mu= λ, observed = model_data['alarms_per_hour'])
#ES
mu = pm.Normal('mu', mu=0,sigma=1)
alpha_raw = pm.Normal("alpha_raw", mu=0, sigma=0.1)
alpha_w = pm.Deterministic("alpha_w", pt.exp(alpha_raw))
beta_n_w = pm.Deterministic("beta_n_w", pt.exp(mu / alpha_w))
beta_w = pm.Deterministic("beta_w", (beta_n_w *st_max )- (1/3600))
# Sampling distribution of observed service times
service_time_obs = pm.Weibull('service_time_obs',alpha=alpha_w, beta=beta_n_w, observed = model_data['service_time'])
ES = pm.Deterministic('ES', beta_w*pt.gamma(1 + 1/alpha_w))
# Expected value of travel_time
ET = pm.Deterministic('ET',α + β*(A/(n-λ*ES))**γ)
# Sampling distribution of observed travel times
ϕ = pm.HalfNormal('ϕ', sigma=1) #shape parameter for Wald distribution is likely to be less than 1 given the sharp peak seen in the travel time data
travel_time_obs = pm.Wald('travel_time_obs', mu=(ET+(1/60))/tt_max, phi=ϕ, observed=model_data['travel_time'])
#pm.Normal("Y_obs", mu=ET, sigma=sigma_y, observed=incident_data['travel_time'])
#idata = pm.sample_prior_predictive(draws=50)
idata2 = pm.sample(draws=4000, chains=4, nuts_sampler='nutpie')
with third_model:
idata_posterior = pm.sample_posterior_predictive(idata2, var_names=['travel_time_obs', 'service_time_obs', 'alarms_obs'],
return_inferencedata=True,
extend_inferencedata=True,
predictions=False)
with third_model:
ll = pm.compute_log_likelihood(idata_posterior, extend_inferencedata=True)