Extremely new to modeling and pymc. But I’m trying to blend different categories to predict mma fight outcomes (decision, knockout, submission). Things seems to work pretty well when Im using Dirichlet to describe characteristics like “style” and “camp.” But now I want to add a feature for the fighters “record” . Their records are normalized over their fights and stored in a (,3) arrary (so a fighter with 2 decisions, 2 kos, and 1 dec is : [.4, .4, .2]. I just keep getting infinitesimally small log_probs for initial values. any help (and advice elsewhere would be much much appreciated)
with pm.Model(coords=coords) as hierarchical_model:
#Data Inputs
weightclasses = pm.MutableData("weightclasses", weightclass_idx)
a_camp = pm.MutableData("a fight camps", fighter_a_Fighter_Camp_idx)
b_camp = pm.MutableData("b fight camps", fighter_b_Fighter_Camp_idx)
a_style = pm.MutableData("a fight styles", fighter_a_Primary_Fight_Style_idx)
b_style = pm.MutableData("b fight styles", fighter_b_Primary_Fight_Style_idx)
a_record = pm.MutableData('a records', fighter_a_record_clean)
b_record = pm.MutableData('b records', fighter_b_record_clean)
#hyperpriors
weightclass_hyperprior = pm.Dirichlet('weightclass_hyperprior', a=np.array([1, 1, 1]))
camp_hyperprior = pm.Dirichlet('camp_hyperprior', a=np.array([1, 1, 1]))
style_hyperprior = pm.Dirichlet('style_hyperprior', a=np.array([1, 1, 1]))
record_hyperprior = pm.Dirichlet('record_hyperprior', a=np.array([1, 1, 1]))
# Priors for weight class-level features
prior_weightclass = pm.Dirichlet("p_outcome_prior_weightclass", a=weightclass_hyperprior, dims=('weightclass', 'fight outcomes'))
prior_a_camp = pm.Dirichlet("p_outcome_prior_a_camp", a=camp_hyperprior, dims=('fight camps', 'fight outcomes'))
prior_b_camp = pm.Dirichlet("p_outcome_prior_b_camp", a=camp_hyperprior, dims=('fight camps', 'fight outcomes'))
prior_a_style = pm.Dirichlet("p_outcome_prior_a_style", a=style_hyperprior, dims=('fight styles', 'fight outcomes'))
prior_b_style = pm.Dirichlet("p_outcome_prior_b_style", a=style_hyperprior, dims=('fight styles', 'fight outcomes'))
prior_a_record = pm.Dirichlet("p_outcome_prior_a_record", a=record_hyperprior, dims=('records', 'fight outcomes'))
prior_b_record = pm.Dirichlet("p_outcome_prior_b_record", a=record_hyperprior, dims=('records', 'fight outcomes'))
prior_a_record_coeff = pm.Normal('a_record_coeff', mu=0, sigma=5, shape=(3,))
prior_b_record_coeff = pm.Normal('b_record_coeff', mu=0, sigma=5, shape=(3,))
# Select the probabilities based on weight class
outcome_weightclass = prior_weightclass[weightclasses]
outcome_a_camp = prior_a_camp[a_camp]
outcome_b_camp = prior_b_camp[b_camp]
outcome_a_style = prior_a_style[a_style]
outcome_b_style = prior_b_style[b_style]
outcome_a_record = pm.math.dot(a_record, prior_a_record_coeff)
outcome_b_record = pm.math.dot(b_record, prior_b_record_coeff)
log_p_outcome_weightclass = pm.math.log(outcome_weightclass)
log_p_outcome_a_camp = pm.math.log(outcome_a_camp)
log_p_outcome_b_camp = pm.math.log(outcome_b_camp)
log_p_outcome_a_style = pm.math.log(outcome_a_style)
log_p_outcome_b_style = pm.math.log(outcome_b_style)
log_p_outcome_a_record = pm.math.log(a_record)
log_p_outcome_b_record = pm.math.log(b_record)
# Combine log-probabilities
log_combined_prob = (
log_p_outcome_weightclass
+ log_p_outcome_a_camp
+ log_p_outcome_b_camp
+ log_p_outcome_a_style
+ log_p_outcome_b_style
+ log_p_outcome_a_record
+ log_p_outcome_b_record
)
# Convert back to probabilities and normalize
combined_prob = pm.math.softmax(log_combined_prob, axis=-1)
# Likelihood for the first model
obs = pm.Categorical("obs", p=combined_prob, observed=fight_outcome_idx)
trace = pm.sample()