Hello,
I’m using az.compare
to compare some traces between three different models. I get the following user warning:
UserWarning: Estimated shape parameter of Pareto distribution is greater than 0.7 for one or more samples. You should consider using a more robust model, this is because importance sampling is less likely to work well if the marginal posterior and LOO posterior are very different. This is more likely to happen with a non-robust model and highly influential observations.
What does that actually mean? How do I make a model “more robust” exactly?
I’m happy with the model results but since it will eventually be put into production, I want to make sure I’m not missing something.
For context, the model is below:
with pm.Model(coords=coords_) as full_p_hierarchy_months:
#add data
t_ = pm.Data('t', time_idxs / T, mutable=True, dims=['obs_id'])
A_ = pm.Data('A', A, mutable=True, dims=['obs_idx', 'changepoints'])
s_ = pm.Data('s', s, mutable=True, dims=['changepoints'])
X_fourier_ = pm.Data('X_fourier', yearly_fourier, mutable=True, dims=['time', 'yearly_components'])
item_idx_ = pm.Data('item_idx', item_idxs, mutable = True, dims=['obs_id'])
time_idx_ = pm.Data('time_idx', time_idxs, mutable = True, dims=['obs_id'])
#set hierarchical intercept
intercept_mu = pm.Normal('intercept_mu', mu=7, sigma=2)
bl_intercept = make_next_level_hierarchy_variable(name='bl_intercept', mu=intercept_mu, alpha=2, beta=1, index_=business_line_idx, dims=['business_line'])
cat_intercept = make_next_level_hierarchy_variable(name='cat_intercept', mu=bl_intercept, alpha=2, beta=1, index_=category_idx, dims=['category'])
subcat_intercept = make_next_level_hierarchy_variable(name='subcat_intercept', mu=cat_intercept, alpha=2, beta=1, index_=sub_category_idx, dims=['sub_category'])
item_intercept = make_next_level_hierarchy_variable(name='item_intercept', mu=subcat_intercept, alpha=2, beta=1, index_=item_idxs, dims=['items'])
#set hierarchical slope
slope_mu = pm.Normal('slope_mu', mu=0, sigma=1)
bl_slope = make_next_level_hierarchy_variable(name='bl_slope', mu=slope_mu, alpha=2, beta=1, index_=business_line_idx, dims=['business_line'])
cat_slope = make_next_level_hierarchy_variable(name='cat_slope', mu=bl_slope, alpha=2, beta=1, index_=category_idx, dims=['category'])
subcat_slope = make_next_level_hierarchy_variable(name='subcat_slope', mu=cat_slope, alpha=2, beta=1, index_=sub_category_idx, dims=['sub_category'])
item_slope = make_next_level_hierarchy_variable(name='item_slope', mu=subcat_slope, alpha=2, beta=1, index_=item_idxs, dims=['items'])
#make changepoints
delta = make_noncentered_variable(name='delta', mu=0, sigma=1, alpha=2, beta=1/2, dims=['items', 'changepoints'])
intercept = pm.Deterministic('intercept', item_intercept[item_idx_] + ((-s_ * A_)[time_idx_, :] * delta[item_idx_]).sum(axis=1), dims=['obs_id'])
slope = pm.Deterministic('slope', item_slope[item_idx_] + (A_[time_idx_, :] * delta[item_idx_]).sum(axis=1), dims=['obs_id'])
#add seasonality
seasonal_mu = pm.Normal('seasonal_mu', mu=0, sigma=1)
bl_seasonal = make_next_level_hierarchy_variable(name='bl_seasonal', mu=seasonal_mu, alpha=2, beta=1, index_=business_line_idx, dims=['business_line','yearly_components'])
cat_seasonal = make_next_level_hierarchy_variable(name='cat_seasonal', mu=bl_seasonal, alpha=2, beta=1, index_=category_idx, dims=['category','yearly_components'])
subcat_betas = make_next_level_hierarchy_variable(name='subcat_betas', mu=cat_seasonal, alpha=2, beta=1, index_=sub_category_idx, dims=['sub_category', 'yearly_components'])
seasonal_betas = make_next_level_hierarchy_variable(name='seasonal_beta', mu=subcat_betas, alpha=2, beta=1, index_=item_idxs, dims=['items', 'yearly_components'])
seasonal = pm.Deterministic('seasonal', (X_fourier_[time_idxs] * seasonal_betas).sum(axis=1))
mu__ = pm.Deterministic('mu', (intercept + slope*t_ + seasonal[time_idx_]), dims='obs_id') # + months_beta[month_idxs])
#compute likelihood
make_likelihood(mu = mu__, beta = 1/30, observed = y)