Hi, I was looking for an example of Thompson sampling in PYMC but couldn’t find one. So I put a simple example together. I am very new Thompson sampling and was hoping I could get another pair of eyeballs on my example. I think it would be nice to have an example of this in PYMC.
Here is my implementation of a simple two-arm bandits using conjugate priors beta-binomial. Please share any suggestions to improve the example.
import arviz as az
import numpy as np
import pymc as pm
import pytensor.tensor as pt
print(f"Running on PyMC v{pm.__version__}")
class ThompsonAgent:
def __init__(self, true_beta0: float, true_beta1: float, beta0_posterior: float = 0.0, beta1_posterior: float = 0.0):
self.trials = dict(
arm1 = dict(
success = [],
failure = [],
trials = []
),
arm2 = dict(
success = [],
failure = [],
trials = []
)
)
self.true_beta0 = true_beta0
self.true_beta1 = true_beta1
self.beta0_posterior = beta0_posterior
self.beta1_posterior = beta1_posterior
def pull_arms(self, num_pulls: int, beta0: float, beta1: float) -> None:
# arm pull
if beta0 > beta1:
arm1 = pm.Binomial.dist(n = num_pulls, p = beta0).eval()
arm2 = num_pulls - arm1
elif beta0 < beta1:
arm2 = pm.Binomial.dist(n = num_pulls, p = beta1).eval()
arm1 = num_pulls - arm2
else:
arm1 = np.array([int(num_pulls / 2)]).reshape(-1, 1)
arm2 = num_pulls - arm1
# observed rewards
Y_0 = pm.Binomial.dist(n = arm1, p = self.true_beta0).eval()
Y_1 = pm.Binomial.dist(n = arm2, p = self.true_beta1).eval()
self.trials['arm1']['success'].append(Y_0)
self.trials['arm1']['failure'].append(arm1 - Y_0)
self.trials['arm1']['trials'].append(arm1)
self.trials['arm2']['success'].append(Y_1)
self.trials['arm2']['failure'].append(arm2 - Y_1)
self.trials['arm2']['trials'].append(arm2)
def init_model(self, num_pulls: int) -> None:
self.pull_arms(num_pulls = num_pulls, beta0 = self.beta0_posterior, beta1 = self.beta1_posterior)
with pm.Model() as basic_model:
# Data Containers
beta0_success = pm.MutableData(name = 'beta0_success', value=np.sum(self.trials['arm1']['success']))
beta1_success = pm.MutableData(name = 'beta1_success', value=np.sum(self.trials['arm2']['success']))
beta0_failure = pm.MutableData(name = 'beta0_failure', value=np.sum(self.trials['arm1']['failure']))
beta1_failure = pm.MutableData(name = 'beta1_failure', value=np.sum(self.trials['arm2']['failure']))
Y0_trials = pm.MutableData(name = 'Y0_trials', value=np.sum(self.trials['arm1']['trials']))
Y1_trials = pm.MutableData(name = 'Y1_trials', value=np.sum(self.trials['arm2']['trials']))
# Priors for unknown model parameters
beta0 = pm.Beta("beta0", alpha = 1 + beta0_success, beta = 1 + beta0_failure)
beta1 = pm.Beta("beta1", alpha = 1 + beta1_success, beta = 1 + beta1_failure)
# Likelihood
Y0_obs = pm.Binomial("Y0_obs", n = Y0_trials, p = beta0, observed=beta0_success)
Y1_obs = pm.Binomial("Y1_obs", n = Y1_trials, p = beta1, observed=beta1_success)
# draw a posterior sample from each arm
trace = pm.sample(draws=1, chains=1, progressbar = False)
# Get arm samples
self.beta0_posterior = trace['posterior']['beta0'].values
self.beta1_posterior = trace['posterior']['beta1'].values
def update_priors(self, num_trials: int, num_pulls: int) -> None:
for _ in range(num_trials):
self.pull_arms(num_pulls = num_pulls, beta0 = beta0_posterior, beta1 = beta1_posterior)
with basic_model:
# Update data with posterior
beta0_success = pm.set_data({'beta0_success': np.sum(trials['arm1']['success'])})
beta1_success = pm.set_data({'beta1_success': np.sum(trials['arm2']['success'])})
beta0_failure = pm.set_data({'beta0_failure': np.sum(trials['arm1']['failure'])})
beta1_failure = pm.set_data({'beta1_failure': np.sum(trials['arm2']['failure'])})
Y0_trials = pm.set_data({'Y0_trials': np.sum(trials['arm1']['trials'])})
Y1_trials = pm.set_data({'Y1_trials': np.sum(trials['arm2']['trials'])})
trace = pm.sample(draws=1, chains=1, progressbar = False)
self.beta0_posterior = trace['posterior']['beta0'].values
self.beta1_posterior = trace['posterior']['beta1'].values
agent = ThompsonAgent(true_beta0 = 0.4, true_beta1 = 0.7)
agent.init_model(num_pulls = 100)
agent.update_priors(num_trials = 10, num_pulls = 100)
agent.beta0_posterior
agent.beta1_posterior