Modelling Click Behavior using Beta-Binomial model

Hi,

I am trying to model user click behavior using a simple Beta-Binomial model.

The code to generate synthetic data is:

n_users = 100
n_items = 10
lf_dim = 5
propensities = np.random.gamma(1, 0.005, size=n_items)
propensities/= sorted(propensities, reverse=True)[0]

p_r = np.random.uniform(-1, 1, size=(n_users, lf_dim))
q_r = np.random.normal(-1, 1, size=(n_items, lf_dim))
# Generate user (true) preference matrix (rating matrix)
R = np.dot(p_r, q_r.T)  
# Generate relevance param for click model
gamma = expit(R) 

impr_rate = 1/propensities
impr_rate = np.tile(impr_rate, (n_users, 1))
Impressions = np.random.poisson(impr_rate, size=(n_users, n_items))

Clicks = np.random.binomial(Impressions, propensities * gamma)

Code for packaging data into a data frame:

df_exp = pd.DataFrame({'pid': range(0, n_items), 'propensities': propensities})
df_impressions = pd.DataFrame({'uid': np.where(Impressions > 5)[0], 'pid': np.where(Impressions > 5)[1], 'impr': Impressions[Impressions > 5]})
df_clicks = pd.DataFrame({'uid': np.where(Clicks >1 )[0], 'pid': np.where(Clicks > 1)[1], 'click': Clicks[Clicks> 1]})
data = pd.merge(df_clicks, df_impressions, on=['uid', 'pid'], how='left')
data_clean = data[~data['impr'].isnull()]
data_final = data_clean.merge(df_exp, how='left')
train, test = train_test_split(data_final, test_size=0.2)

Code to estimate ‘ctr’ at user and item level (with some smoothing):

pid_global = train.groupby('pid')[['impr', 'click']].sum().reset_index()
pid_global['ctr'] = (pid_global['click']+1)/(pid_global['impr']+3)
uid_global = train.groupby('uid')[['impr', 'click']].sum().reset_index()
uid_global['ctr'] = (uid_global['click']+1)/(uid_global['impr']+3)
train = pd.merge(train, pid_global[['pid', 'ctr']], on='pid', how='left').merge(uid_global[['uid', 'ctr']], on='uid', how='left')
avg_ctr_pid = pid_global['ctr'].mean()
avg_ctr_uid = uid_global['ctr'].mean()
avg_impr_pid = pid_global['impr'].mean()
avg_impr_uid = uid_global['impr'].mean()
avg_click_pid = pid_global['click'].mean()
avg_click_uid = uid_global['click'].mean()
missing_pids = list(set(range(0, n_items)) - set(pid_global['pid'].values))
missing_uids = list(set(range(0, n_users)) - set(uid_global['uid'].values))
missing_df = []
for pid in missing_pids:
    missing_df.append({'pid': pid, 'impr': avg_impr_pid, 'click':avg_click_pid, 'ctr':avg_ctr_pid})

pid_global = pid_global.append(pd.DataFrame(missing_df)).reset_index(drop=True).sort_values('pid')

missing_df = []
for uid in missing_uids:
    missing_df.append({'uid': uid, 'impr': avg_impr_uid, 'click':avg_click_uid, 'ctr':avg_ctr_uid})

uid_global = uid_global.append(pd.DataFrame(missing_df)).reset_index(drop=True).sort_values('uid')

#get priors in an array
beta_mean_pid =  pid_global['ctr'].values
beta_var_pid = 1

beta_mean_uid = np.tile(uid_global['ctr'].values, (n_items, 1)).T
beta_var_uid = 0.001

Now the main model:

with pm.Model() as model:
    # Uninformative prior for alpha and bet
    #param_theta = pm.Beta('theta', beta_mean_pid/beta_var_pid, (1-beta_mean_pid)/beta_mean_pid, shape=n_items)
    #param_gamma = pm.Beta('gamma', beta_mean_uid/beta_var_uid, (1-beta_mean_uid)/beta_mean_uid, shape=(n_users, n_items))
    param_theta = pm.Uniform('theta', 0, 1, shape=n_items)

    param_gamma = pm.Uniform('gamma', 0, 1, shape=(n_users, n_items))

    theta_data = param_theta[train['pid'].values]
    gamma_data = param_gamma[train['uid'].values, train['pid'].values]

    #theta = pm.Beta("theta", alpha=1, beta=0.2, shape=N)
    #theta = pm.Uniform("theta", 0.1, 1, shape=N)
    p = pm.Binomial("y", p=theta_data * gamma_data, observed=train['click'], n=train['impr'])
    trace = pm.sample(1000, tune=2000, target_accept=0.95, chains=1)
    map_estimate = pm.find_MAP()

I have tried empirical Bayes to estimate priors for alpha and beta and uniform prior, but after the sampling is done, the estimated parameters don’t match the “true” params (propensities and gamma).

I am new to both pymc3 and Bayesian modeling, so please excuse me if I am missing something very obvious.

Thanks

There is a merged pymc-example notebook but which is not yet live on the site… pymc-examples/bayesian_ab_testing.ipynb at main · pymc-devs/pymc-examples · GitHub Maybe this will be of use.