Hello. I’m pretty new to PyMC and I would like your help about setting up the model for the following problem:
I want to model the time taken by formula 1 drivers to finish the race. I’m not interested in the modeling part of this, at the moment I’m just considering the distance as a factor, but the aim of this topic is that I would like to get your help about building the model architecture, such that, the prior for the same drivers in race B are the posterior of the race A. Since I’m pretty new it’s hard for me to setup the model architecture properly and I’m looking for a very basic implementation that I can start with and improve in the future once the architecture has been created properly.
I attach you my sample code.
import pymc as pm
import numpy as np
class F1Model:
def __init__(self, times, distances, driver_ids, previous_posterior=None):
"""
Parameters:
times (array-like): Observed lap times.
distances (array-like): Distances for the corresponding lap times (could be lap number or track segment).
driver_ids (array-like): IDs representing different drivers.
previous_posterior (dict, optional): A dictionary with previous posterior estimates for each driver.
"""
self.times = times
self.distances = distances
self.driver_ids = driver_ids
self.model = None
self.trace = None
self.unique_drivers = np.unique(driver_ids)
self.num_drivers = len(self.unique_drivers)
self.driver_id_map = {driver_id: idx for idx, driver_id in enumerate(self.unique_drivers)}
self.mapped_driver_ids = np.array([self.driver_id_map[driver_id] for driver_id in driver_ids])
self.previous_posterior = previous_posterior
def build_model(self):
"""
Builds the PyMC model for race pace estimation.
"""
with pm.Model() as model:
if self.previous_posterior is not None:
beta_mu = np.zeros(self.num_drivers)
beta_sigma = np.zeros(self.num_drivers)
sigma = np.zeros(self.num_drivers)
for driver_id, idx in self.driver_id_map.items():
if driver_id in self.previous_posterior['driver_posteriors']:
beta_mu[idx] = self.previous_posterior['driver_posteriors'][driver_id]['beta_mean']
beta_sigma[idx] = self.previous_posterior['driver_posteriors'][driver_id]['beta_std']
sigma = self.previous_posterior['driver_posteriors'][driver_id]['sigma']
else:
beta_mu[idx] = 0.06 # Default initial values if no previous data
beta_sigma[idx] = 0.05
sigma[idx] = 5
else:
beta_mu = np.full(self.num_drivers, 0.06)
beta_sigma = np.full(self.num_drivers, 0.05)
sigma = np.full(self.num_drivers, 5)
beta = pm.Normal('beta', mu=beta_mu, sigma=beta_sigma, shape=self.num_drivers)
sigma = pm.HalfNormal('sigma', sigma=sigma)
distances_data = pm.Data('distances', self.distances)
driver_ids_data = pm.Data('driver_ids', self.mapped_driver_ids)
mu = beta[driver_ids_data] * distances_data
s = sigma[driver_ids_data]
time_obs = pm.Normal('time_obs', mu=mu, sigma=s, observed=self.times)
self.model = model
def fit(self, **sample_kwargs):
"""
Fits the PyMC model to the data.
"""
if self.model is None:
self.build_model()
with self.model:
self.trace = pm.sample(**sample_kwargs)
def predict(self, new_distances, new_driver_ids):
"""
Predicts lap times for new distances and drivers.
Parameters:
new_distances (array-like): Distances for which to predict times.
new_driver_ids (array-like): IDs of the drivers for the new distances.
Returns:
Predicted lap times based on the posterior predictive distribution.
"""
with self.model:
pm.set_data({'distances': new_distances, 'driver_ids': new_driver_ids})
posterior_predictive = pm.sample_posterior_predictive(self.trace)
predicted_times = posterior_predictive.posterior_predictive['time_obs'].values
return predicted_times
def get_posterior_estimates(self):
"""
Returns the posterior estimates for each driver.
Returns:
A dictionary containing posterior estimates for each driver, including
mean and standard deviation for both beta and sigma parameters.
"""
beta_mean = self.trace.posterior['beta'].mean(dim=['chain', 'draw']).values
beta_std = self.trace.posterior['beta'].std(dim=['chain', 'draw']).values
sigma_mean = self.trace.posterior['sigma'].mean(dim=['chain', 'draw']).values
sigma_std = self.trace.posterior['sigma'].std(dim=['chain', 'draw']).values
driver_posteriors = {
driver_id: {
'beta_mean': beta_mean[idx],
'beta_std': beta_std[idx],
'sigma_mean': sigma_mean[idx], # Driver-specific sigma means
'sigma_std': sigma_std[idx] # Driver-specific sigma std deviations
}
for driver_id, idx in self.driver_id_map.items()
}
return {
'driver_posteriors': driver_posteriors
}