Sure, here you go!
Here, CV is a “tidy” dataframe where each row is one metric on one particular test/train subset, with columns for Sample (‘Test’ or ‘Train’, i.e. evaluated on the test set or the training set), Metric (‘RMSE’/‘NLPD’), Score (the actual value), Model (the name of several different model variations I was comparing, SubsetSize (size of the training set), and Seed (the random seed used to define the specific test/train split). Note that this is quite large: each model has ~80 different SubsetSizes, ~100 different Seeds, and two metrics on each of two sets each. I did the plotting below on my laptop, but I ran all those tests on a computing cluster. Cross-validation is naturally parallelizable, so if you have access to a computing cluster that will make your life much easier!
colors = sns.color_palette("Set2")
shade = 1
color = {
'Random':'0.5',
'Average':[rgb*shade for rgb in colors[0]],
'Coregion':[rgb*shade for rgb in colors[1]],
'Spatial':[rgb*shade for rgb in colors[2]],
'GLM':[rgb*shade for rgb in colors[3]],
'Individual':[rgb*shade for rgb in colors[-2]],
}
#palette = sns.light_palette(color['Spatial'])
def lq(x): return np.percentile(x,2.5)
def uq(x): return np.percentile(x,97.5)
for sample in ['Train','Test']:
for metric in ['RMSE','NLPD']:
grouped = cv[(cv.Sample==sample)&(cv.Metric==metric)&(cv.Score<10)].groupby(['Model','SubsetSize'])['Score'].agg([np.median, lq, uq]).reset_index()
fig = plt.figure(tight_layout=True, figsize=(8,6))
axs = {
'Major' : fig.add_subplot(1,2,1),
'Random' : fig.add_subplot(2,4,3),
'Average' : fig.add_subplot(2,4,4),
'Coregion' : fig.add_subplot(2,4,7),
'Spatial' : fig.add_subplot(2,4,8),}
yl = {'NLPD':[-0.5,7],'RMSE':[0,2.]}[metric]
for model in major_models:
this = grouped[grouped.Model==model]
axs['Major'].plot(this.SubsetSize.values,this['median'].values, color=color[model], lw=3)
for model_,ax_ in axs.items():
if model_ not in [model,'Major']:
ax_.plot(this.SubsetSize.values,this['median'].values, color='0.8', zorder=-1)
ax = axs[model]
ax.plot(this.SubsetSize.values,this['median'].values, color=color[model], zorder=1)
ax.fill_between(this.SubsetSize.values,this['lq'].values,this['uq'].values, color=color[model], alpha=0.5, zorder=0)
axs['Major'].get_shared_x_axes().join(axs['Major'], ax)
axs['Major'].get_shared_y_axes().join(axs['Major'], ax)
ax.set_yticklabels('')
ax.set_xticklabels('')
ax.set_xticks(np.arange(0,81,20))
ax.set_title(model, color=color[model])
ax.set_ylim(yl)
axs['Major'].set_xlabel('Subset Size')
axs['Major'].set_ylabel(metric)
axs['Major'].set_title(f'{sample}ing Set')
axs['Major'].set_xticks(np.arange(0,81,20))
savemyfig(f'Cross-validation major model comparison - {sample} {metric}')