Hi Team,
I’m trying to use PyMC 5.16.2 for Bayesian AB analysis for some e-commerce experiments. Trying to detect any change in GMV per visitor between 2 groups. An experiment may have large number of visitors, so I am binning the data following this blog (Bayesian inference at scale: Running A/B tests with millions of observations - PyMC Labs). I am using a Zero Inflated log normal data model.
Since I have multiple experiments, I am trying to use nutpie to compile the model once and run it with different set of observations.
But the results after sampling are still for the old data. Can you help me understand what I’m doing wrong.
Thanks in advance
# Defining shared variables
visitors_c_shared = pytensor.shared(visitors_c,name='visitors_c')
conversions_c_shared = pytensor.shared(conversions_c,name='conversions_c')
bin_centres_c_shared = pytensor.shared(bin_centres_c,name='bin_centres_c')
bin_counts_c_shared = pytensor.shared(bin_counts_c,name='bin_counts_c')
visitors_t_shared = pytensor.shared(visitors_t,name='visitors_t')
conversions_t_shared = pytensor.shared(conversions_t,name='conversions_t')
bin_centres_t_shared = pytensor.shared(bin_centres_t,name='bin_centres_t')
bin_counts_t_shared = pytensor.shared(bin_counts_t,name='bin_counts_t')
with pm.Model() as model0:
# Priors
conv_rate_c = pm.Beta('conv_rate_c', alpha=conv_alpha_prior, beta=conv_beta_prior)
gmv_mu_c = pm.Normal('gmv_mu_c',mu=gmv_mu_prior, sigma = gmv_sd_prior)
gmv_tau_c = pm.HalfNormal("gmv_tau_c", sigma = gmv_sd_prior)
conv_rate_t = pm.Beta('conv_rate_t', alpha=conv_alpha_prior, beta=conv_beta_prior)
gmv_mu_t = pm.Normal('gmv_mu_t',mu=gmv_mu_prior, sigma = gmv_sd_prior)
gmv_tau_t = pm.HalfNormal("gmv_tau_t", sigma = gmv_sd_prior)
#likelihood
conv_like_c = pm.Binomial('conv_like_c', n=visitors_c_shared, p=conv_rate_c, observed=conversions_c_shared)
pm.Potential("likelihood_c", pm.logp(pm.LogNormal('gmv_like_c', mu=gmv_mu_c, tau=gmv_tau_c), bin_centres_c_shared) * bin_counts_c_shared)
conv_like_t = pm.Binomial('conv_like_t', n=visitors_t_shared, p=conv_rate_t, observed=conversions_t_shared)
pm.Potential("likelihood_t", pm.logp(pm.LogNormal('gmv_like_t', mu=gmv_mu_t, tau=gmv_tau_t), bin_centres_t_shared) * bin_counts_t_shared)
#Recording outputs
gmv_mean_nz_c = pm.Deterministic("gmv_mean_nz_c",np.exp(gmv_mu_c + 1/(2*gmv_tau_c)))
gmv_mean_c = pm.Deterministic("gmv_mean_c", gmv_mean_nz_c*conv_rate_c)
gmv_mean_nz_t = pm.Deterministic("gmv_mean_nz_t",np.exp(gmv_mu_t + 1/(2*gmv_tau_t)))
gmv_mean_t = pm.Deterministic("gmv_mean_t", gmv_mean_nz_t*conv_rate_t)
diff_conv = pm.Deterministic("diff_conv", conv_rate_t - conv_rate_c)
diff_gmv = pm.Deterministic("diff_gmv", gmv_mean_t - gmv_mean_c)
compiled_model = nutpie.compile_pymc_model(model)
trace_pymc = nutpie.sample(compiled_model)
# New values
visitors_c_shared.set_value(visitors_c_new)
conversions_c_shared.set_value(conversions_c_new)
bin_centres_c_shared.set_value(bin_centres_c_new)
bin_counts_c_shared.set_value(bin_counts_c_new)
visitors_t_shared.set_value(visitors_t_new)
conversions_t_shared.set_value(conversions_t_new)
bin_centres_t_shared.set_value(bin_centres_t_new)
bin_counts_t_shared.set_value(bin_counts_t_new)
trace_pymc_new = nutpie.sample(compiled_model)