Hi everyone,
I’m currently working on validating the posterior distribution of a SEIRS model using the az.plot_ppc
function from the ArviZ library. However, when I generate the posterior predictive checks, the plot only shows a single straight line, which is not what I expected.(Everything else works properly) .
Just like this(I only sampled 90 times for showing what it like, but the same as more times):
Typically, the az.plot_ppc function should display multiple lines representing the distribution of simulated data from the posterior, allowing me to compare it with the observed data. A single straight line might indicate that there’s an issue with the model, the data, or how the posterior predictive samples are being generated. When I tryed the example code (simple linear regression model), it works. But when I moved it into this Non-linear Differential Equations, it failed to work properly.
Here is the code:
#the former is a class for processing data, I moved it out in case of too long
class SERIS_model():
def __init__(self,region,data) -> None:
self.covid_dates = data['covid_dates']
self.covid_cases = data['covid_cases']
self.influenza_dates = data['influenza_dates']
self.influenza_cases = data['influenza_cases']
self.parameter = {
'sigma_1' : 0.15,
'sigma_2' : 0.5,
'gamma_1' : 1/7,
'gamma_2' : 0.2,
'gamma_3' : 0.1,
'theta1' : 0.001,
'theta2' : 0.001,
'xi' : 1/365 ,
}
#Crowd proportion
self.init_y = [
1 - 0.00016177807486631015 - 5.922459893048128e-07 - 0.38,
0,
0,
0,
0,
0.00016177807486631015,
5.922459893048128e-07,
0,
0,
0,
0,
0,
0,
0,
0.38
]
self.samples_params = {
'n_samples' : 200,
'n_tune' : 100,
'cores' : 6,
}
self.region_population = {
'EURO' : 744000000,
'AMRO' : 1018000000,
'AFRO' : 1216000000,
'SEARO' : 1984000000,
'WPRO' : 1650000000,
'EMRO' : 654000000,
}
self.region = region
# SEIRS model
def SEIRS_non_normalized(self, y, t, p):
'''''''
Need to keep it a secret here
'''''''
result = [dSdt, dE1dt, dE2dt, dI1dt, dI2dt, dS12dt, dS21dt, dE12dt, dE21dt, dI12dt, dI21dt, dRdt, dE120dt, dE210dt, dI0dt]
return result
def setup_SEIRS_model(self):
seirs_model = pm.ode.DifferentialEquation(
func=self.SEIRS_non_normalized,
times=np.arange(0,max(len(self.covid_dates), len(self.influenza_dates)),1),
n_states=15,
n_theta=4,
t0=0,
)
return seirs_model
def run_SEIRS_model(self, seirs_model):
# model
with pm.Model() as self.model:
# Priors for unknown model parameters
beta1 = pm.Normal('beta1', 0.5 ,1)
beta2 = pm.Normal('beta2', 0.5 ,1)
delta1 = pm.Normal('delta1', 0.5 ,1)
delta2 = pm.Normal('delta2', 0.5 ,1)
res = seirs_model(y0=self.init_y, theta=[beta1, beta2, delta1, delta2])
covid_new = self.parameter['sigma_1']*(res[:,1] + res[:,8] + res[:,14])
influenza_new = self.parameter['sigma_2']*(res[:,2] + res[:,7] + res[:,13])
self.covid_relative_cases = np.array(self.covid_cases)/self.region_population[self.region]
self.influenza_relative_cases = np.array(self.influenza_cases)/self.region_population[self.region]
sigma = pm.HalfCauchy('sigma', 1, shape=1)
covid_obs = pm.StudentT('covid_obs',nu=10, mu=(covid_new), sigma=sigma, observed=self.covid_relative_cases) #1000
influenza_obs = pm.StudentT('influenza_obs',nu=10, mu=(influenza_new), sigma=sigma, observed=self.influenza_relative_cases)
trace = pm.sample_prior_predictive(50)
trace.extend(pm.sample(self.samples_params['n_samples'], tune=self.samples_params['n_tune'], cores=self.samples_params['cores'], target_accept=0.95)) #Metropolis
return trace
def analysis(self, trace):
# result
az.plot_trace(trace)
# print results
print(az.summary(trace))
az.plot_forest(trace, r_hat=True)
az.plot_posterior(trace)
with self.model:
check = pm.sample_posterior_predictive(trace,extend_inferencedata=True)
az.plot_ppc(check)
plt.show()
if __name__ == '__main__':
region = 'EURO'
data_visualizer = DiseaseDataVisualizer('WHO-COVID-19-global-data6.30.csv', 'VIW_FNT6.30.csv')
data = data_visualizer.get_region_data(region)
model = SERIS_model(region,data)
seirs_model = model.setup_SEIRS_model()
trace = model.run_SEIRS_model(seirs_model)
model.analysis(trace)
Has anyone encountered a similar problem? I searched almost all the posts and didn’t get the answer. I wanted could this be due to the model not converging properly, or could there be an issue with the data passed to az.plot_ppc? If this function can’t be used here, how can I check this model? Any insights or suggestions on how to troubleshoot this would be greatly appreciated!
And other one more question is whether I could use
covid_new = self.parameter['sigma_1']*(res[:,1] + res[:,8] + res[:,14])
influenza_new = self.parameter['sigma_2']*(res[:,2] + res[:,7] + res[:,13])
this kind of methods to calculate the I_new(this two)? Is it permitted in pymc(I mean can I add them together straightly)?
Thanks in advance for your help!