I think long-form data is indeed the way to go here. I would do something like this:
cols = [f'Blue_player_{i}' for i in range(1, 6)]
new_df = None
for col in cols:
idx_col = df[[col + '_idx']]
kills_col = df[[col + '_kills']]
tmp = (idx_col
.join(kills_col)
.set_axis(axis=1, labels = ['player_id', 'kills'])
.reset_index()
.rename(columns={'index':'match_id'}))
new_df = tmp if new_df is None else pd.concat([new_df, tmp], ignore_index=True)
match_total = new_df.groupby('match_id').kills.sum().rename('match_total').astype('int')
new_df = pd.merge(new_df, match_total, left_on='match_id', right_index=True)
This results in the following structure:
match_id player_id kills match_total
0 0 41 3 4
1 1 42 2 19
2 2 42 2 8
3 3 41 2 18
4 4 41 0 19
From here I model each player’s contribution as Binomial (i.e. the fraction of match_total attributable to a given player), using a player effect and a match effect. Obviously you can bring hierarchy back into either or both of these, or get fancy and consider network effects between pairs or players or whatever:
match_idx, matches = pd.factorize(new_df.match_id)
player_idx, players = pd.factorize(new_df.player_id)
coords = {
'match': matches,
'player': players,
'obs_idx': new_df.index
}
with pm.Model(coords=coords) as hierarchical_model:
player_idx_pt = pm.Data("player_idx",
player_idx,
dims=['obs_idx'])
match_idx_pt = pm.Data("match_idx",
match_idx,
dims=['obs_idx'])
kills = pm.Data("kills", new_df.kills.values, dims=['obs_idx'])
total_kills = pm.Data('total_kills', new_df.match_total.values.ravel(), dims=['obs_idx'])
match_effect = pm.Normal('match_effect', mu=0, sigma=3, dims='match')
player_effect = pm.Normal('player_effect', mu=0, sigma=3, dims='player')
logit_p = match_effect[match_idx_pt] + player_effect[player_idx_pt]
p = pm.math.invlogit(logit_p)
y_obs = pm.Binomial('y_obs',
n=total_kills,
p=p,
observed=kills,
dims=['obs_idx'])
idata = pm.sample()
Let me know if I missed the important point when I removed the Multinomial