Shared variables not reflecting

Hi Team,
I’m trying to use PyMC 5.16.2 for Bayesian AB analysis for some e-commerce experiments. Trying to detect any change in GMV per visitor between 2 groups. An experiment may have large number of visitors, so I am binning the data following this blog (Bayesian inference at scale: Running A/B tests with millions of observations - PyMC Labs). I am using a Zero Inflated log normal data model.

Since I have multiple experiments, I am trying to use nutpie to compile the model once and run it with different set of observations.
But the results after sampling are still for the old data. Can you help me understand what I’m doing wrong.

Thanks in advance

# Defining shared variables
visitors_c_shared = pytensor.shared(visitors_c,name='visitors_c')
conversions_c_shared = pytensor.shared(conversions_c,name='conversions_c')
bin_centres_c_shared = pytensor.shared(bin_centres_c,name='bin_centres_c')
bin_counts_c_shared = pytensor.shared(bin_counts_c,name='bin_counts_c')
visitors_t_shared = pytensor.shared(visitors_t,name='visitors_t')
conversions_t_shared = pytensor.shared(conversions_t,name='conversions_t')
bin_centres_t_shared = pytensor.shared(bin_centres_t,name='bin_centres_t')
bin_counts_t_shared = pytensor.shared(bin_counts_t,name='bin_counts_t')


with pm.Model() as model0:
    # Priors
    conv_rate_c = pm.Beta('conv_rate_c', alpha=conv_alpha_prior, beta=conv_beta_prior)
    gmv_mu_c = pm.Normal('gmv_mu_c',mu=gmv_mu_prior, sigma = gmv_sd_prior)
    gmv_tau_c = pm.HalfNormal("gmv_tau_c", sigma = gmv_sd_prior)
    conv_rate_t = pm.Beta('conv_rate_t', alpha=conv_alpha_prior, beta=conv_beta_prior)
    gmv_mu_t = pm.Normal('gmv_mu_t',mu=gmv_mu_prior, sigma = gmv_sd_prior)
    gmv_tau_t = pm.HalfNormal("gmv_tau_t", sigma = gmv_sd_prior)    
    
    #likelihood
    conv_like_c = pm.Binomial('conv_like_c', n=visitors_c_shared, p=conv_rate_c, observed=conversions_c_shared)
    pm.Potential("likelihood_c", pm.logp(pm.LogNormal('gmv_like_c', mu=gmv_mu_c, tau=gmv_tau_c), bin_centres_c_shared) * bin_counts_c_shared)
    conv_like_t = pm.Binomial('conv_like_t', n=visitors_t_shared, p=conv_rate_t, observed=conversions_t_shared)
    pm.Potential("likelihood_t", pm.logp(pm.LogNormal('gmv_like_t', mu=gmv_mu_t, tau=gmv_tau_t), bin_centres_t_shared) * bin_counts_t_shared)


    #Recording outputs
    gmv_mean_nz_c = pm.Deterministic("gmv_mean_nz_c",np.exp(gmv_mu_c + 1/(2*gmv_tau_c)))    
    gmv_mean_c = pm.Deterministic("gmv_mean_c", gmv_mean_nz_c*conv_rate_c)    
    gmv_mean_nz_t = pm.Deterministic("gmv_mean_nz_t",np.exp(gmv_mu_t + 1/(2*gmv_tau_t)))    
    gmv_mean_t = pm.Deterministic("gmv_mean_t", gmv_mean_nz_t*conv_rate_t)    
    diff_conv = pm.Deterministic("diff_conv", conv_rate_t - conv_rate_c)
    diff_gmv = pm.Deterministic("diff_gmv", gmv_mean_t - gmv_mean_c)




compiled_model = nutpie.compile_pymc_model(model)


trace_pymc = nutpie.sample(compiled_model)


# New values 
visitors_c_shared.set_value(visitors_c_new)
conversions_c_shared.set_value(conversions_c_new)
bin_centres_c_shared.set_value(bin_centres_c_new)
bin_counts_c_shared.set_value(bin_counts_c_new)
visitors_t_shared.set_value(visitors_t_new)
conversions_t_shared.set_value(conversions_t_new)
bin_centres_t_shared.set_value(bin_centres_t_new)
bin_counts_t_shared.set_value(bin_counts_t_new)


trace_pymc_new = nutpie.sample(compiled_model)

I tried using pm.Data to define shared variables but this is not helping either.

with pm.Model() as model:
    # Data
    visitors_c=pm.Data('visitors_c',visitors_c)
    conversions_c=pm.Data('conversions_c',conversions_c)
    bin_counts_c=pm.Data('bin_counts_c',bin_counts_c)
    bin_centres_c=pm.Data('bin_centres_c',bin_centres_c)
    visitors_t=pm.Data('visitors_t',visitors_t)
    conversions_t=pm.Data('conversions_t',conversions_t)
    bin_counts_t=pm.Data('bin_counts_t',bin_counts_t)
    bin_centres_t=pm.Data('bin_centres_t',bin_centres_t)
    
    # Priors
    conv_rate_c = pm.Beta('conv_rate_c', alpha=conv_alpha_prior, beta=conv_beta_prior)
    gmv_mu_c = pm.Normal('gmv_mu_c',mu=gmv_mu_prior, sigma = gmv_sd_prior)
    gmv_tau_c = pm.HalfNormal("gmv_tau_c", sigma = gmv_sd_prior)
    conv_rate_t = pm.Beta('conv_rate_t', alpha=conv_alpha_prior, beta=conv_beta_prior)
    gmv_mu_t = pm.Normal('gmv_mu_t',mu=gmv_mu_prior, sigma = gmv_sd_prior)
    gmv_tau_t = pm.HalfNormal("gmv_tau_t", sigma = gmv_sd_prior)    
    
    #likelihood
    conv_like_c = pm.Binomial('conv_like_c', n=visitors_c, p=conv_rate_c, observed=conversions_c)
    pm.Potential("likelihood_c", pm.logp(pm.LogNormal('gmv_like_c', mu=gmv_mu_c, tau=gmv_tau_c), bin_centres_c) * bin_counts_c)
    conv_like_t = pm.Binomial('conv_like_t', n=visitors_t, p=conv_rate_t, observed=conversions_t)
    pm.Potential("likelihood_t", pm.logp(pm.LogNormal('gmv_like_t', mu=gmv_mu_t, tau=gmv_tau_t), bin_centres_t) * bin_counts_t)

    #Recording outputs
    gmv_mean_nz_c = pm.Deterministic("gmv_mean_nz_c",np.exp(gmv_mu_c + 1/(2*gmv_tau_c)))    
    gmv_mean_c = pm.Deterministic("gmv_mean_c", gmv_mean_nz_c*conv_rate_c)    
    gmv_mean_nz_t = pm.Deterministic("gmv_mean_nz_t",np.exp(gmv_mu_t + 1/(2*gmv_tau_t)))    
    gmv_mean_t = pm.Deterministic("gmv_mean_t", gmv_mean_nz_t*conv_rate_t)    

    diff_conv = pm.Deterministic("diff_conv", conv_rate_t - conv_rate_c)
    diff_gmv = pm.Deterministic("diff_gmv", gmv_mean_t - gmv_mean_c)

compiled_model = nutpie.compile_pymc_model(model)
trace_pymc = nutpie.sample(compiled_model)

compiled2 = compiled_model.with_data(visitors_c=visitors_c_new,conversions_c=conversions_c_new,bin_centres_c=bin_centres_c_new,bin_counts_c=bin_counts_c_new,visitors_t=visitors_t_new,conversions_t=conversions_t_new,bin_centres_t=bin_centres_t_new,bin_counts_t=bin_counts_t_new)
trace2 = nutpie.sample(compiled2)

nutpie compiled models have their own method to update shared data that you must use. I don’t recall exactly but something like.with_data or .with_shared

Thanks Ricardo.
From what I read, with_data only changes the observerd= params. Since I’m using a custom likelihood function I’m not sure if that will work. I’ve shared a code in the first comment where I tried with_data and it did not work. I’ll check if with_shared exists

Are you aware of a way to do compile once and run inference with different data sets without recompiling in pymc? I’m open to not using nutpie