Bayesian Model development port from pymc3 to pymc5

Hello Everyone,

I am facing some issues while developing a bayesian market mix model for a project.
It is a long story and I hope to be able to take you all through it - step by step.

  1. Data :

The target variable over here is the joins whereas the spends are for each channel in columns.

  1. The transformations:
    Now, for any MMM we have adstock and saturation transformations which are defined by the below functions:
def saturation_hill_pymc(x, alpha, gamma): 
    
    x_s_hill = x ** alpha / (x ** alpha + gamma ** alpha)
    
    return x_s_hill



#from https://github.com/ikatsov/tensor-house/blob/master/promotions/mediamix-bayesian.ipynb

def adstock_geometric_numpy(x, theta):
    """
    NumPy implementation of the geometric adstock transformation.

    :param x: NumPy array containing the input data (e.g., marketing spend over time).
    :param theta: Decay parameter for the adstock effect.
    :return: NumPy array with the adstock-transformed values.
    """
    # Initialize an array to store the adstock-transformed values
    x_decayed = np.zeros_like(x)
    
    # The first element remains the same as the original series
    x_decayed[0] = x[0]

    # Apply the adstock transformation to each element
    for i in range(1, len(x)):
        x_decayed[i] = x[i] + theta * x_decayed[i - 1]

    return x_decayed
  1. The model:

Below is the code I am using to define the parameters:

transform_variables = ["trend", "season", "holiday", 'Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']

delay_channels = ['Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']
media_channels = ['Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']

control_variables = ["trend", "season", "holiday"]

target = "Joins"

data_transformed = final_data.copy()

numerical_encoder_dict = {}

for feature in transform_variables:
    scaler = MinMaxScaler()
    original = final_data[feature].values.reshape(-1, 1)
    transformed = scaler.fit_transform(original)
    data_transformed[feature] = transformed
    numerical_encoder_dict[feature] = scaler

dependent_transformation = None
original = final_data[target].values
data_transformed[target] = original 

with pm.Model() as base_model:
    for channel_name in delay_channels:
        print(f"Delay Channels: Adding {channel_name}")
        
        x = data_transformed[channel_name].values
        
        adstock_param = pm.Beta(f"{channel_name}_adstock", 3, 3)
        saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
        saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)
        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
        
        # Sample from the priors :
        
        pidata = pm.sample(500,random_seed=43)
        stacked = az.extract(pidata)
        
        adstock_params = np.average(stacked[f"{channel_name}_adstock"].values)

        
        x_new = adstock_geometric_numpy(x, adstock_params)
        x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)
        channel_b = np.average(stacked[f"{channel_name}_media_coef"].values)

        response_mean.append(saturation_tensor * channel_b)
        
        
    for control_var in control_variables:
        print(f"Control Variables: Adding {control_var}")
        
        x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        
        control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
        pidata = pm.sample(500,random_seed=43)
        stacked = az.extract(pidata)
        
        control_beta_med = np.average(stacked[f"{control_var}_control_coef"].values)
        control_x = control_beta_med * x
        response_mean.append(control_x)
        
    intercept = pm.Normal("intercept", np.mean(data_transformed[target].values), sigma = 3)   
    sigma = pm.HalfNormal("sigma", 4)
    
    pidata = pm.sample(500,random_seed=43) 
    stacked = az.extract(pidata)
    
    sigma = np.average(stacked["sigma"].values)
    intercept = np.average(stacked["intercept"].values)
    
    likelihood = pm.Normal("likelihood", mu = intercept + sum(response_mean), sigma = sigma, observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])
  1. The earlier code in pymc3:
with pm.Model() as base_model:
    for channel_name in delay_channels:
        
        
        print(f"Delay Channels: Adding {channel_name}")
        
        x = data_transformed[channel_name].values
        
        adstock_param = pm.Beta(f"{channel_name}_adstock",3,3)
        
        
        saturation_gamma = pm.Beta(f"{channel_name}_gamma",2,2)
        saturation_alpha = pm.Beta(f"{channel_name}_alpha",3,1)
        
        pidata = pm.sample(500,random_seed=43)
        
        x_new = adstock_geometric_aesara_pymc(x, adstock_param)
        x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)
        
        
        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
        response_mean.append(saturation_tensor * channel_b)
        
    for control_var in control_variables:
        
        print(f"Control Variables: Adding {control_var}")
        x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
        control_x = control_beta * x
        response_mean.append(control_x)
        

    intercept = pm.Normal("intercept", np.mean(data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]), sigma = 3)
    sigma = pm.HalfNormal("sigma", 4)
    
    likelihood = pm.Normal("likelihood",mu = intercept + sum(response_mean), sigma = sigma, observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])
    trace_p = pm.sample(1000)

I believe I am doing something wrong with the parameter definitions in step 3 as after the sampling , my

with base_model:
    pm.sample_posterior_predictive(pidata, extend_inferencedata=True, random_seed=43)
    
az.plot_ppc(pidata, num_pp_samples=100);

returns the below plot , which doesn’t look right

The reason I am using np.average is to take a single value of the parameters in those cases where I need to pass them through the custom functions for adstock and saturation and add variables to the response_mean array which is finally used during likelihood.

I am not sure what I am doing wrong as I am absolutely new to PYMC ( 4 weeks now) so I will be really glad if someone can help me with this!!

Just two comments - I’m on mobile and just skimming. One of the bits of code you’ve pasted is missing the formatting, could you fix that?

A great way to ensure that you have a similar model structure before and after migrating is to use pm.model_to_graphviz, which is available on both pymc 3 and 5. It’ll let you plot the model structure as a graph and help you identify issues more easily.

1 Like

Below is the code I am using to define the parameters:

transform_variables = [“trend”, “season”, “holiday”, ‘Audio_s’,‘Cable_s’,‘CTV_Hulu_s’,‘Display_s’,‘META_s’,‘Radio_s’,‘SA360_s’,‘Snapchat_s’,‘TikTok_s’,‘TV_s’,‘YouTube_s’]

delay_channels = [‘Audio_s’,‘Cable_s’,‘CTV_Hulu_s’,‘Display_s’,‘META_s’,‘Radio_s’,‘SA360_s’,‘Snapchat_s’,‘TikTok_s’,‘TV_s’,‘YouTube_s’]
media_channels = [‘Audio_s’,‘Cable_s’,‘CTV_Hulu_s’,‘Display_s’,‘META_s’,‘Radio_s’,‘SA360_s’,‘Snapchat_s’,‘TikTok_s’,‘TV_s’,‘YouTube_s’]

control_variables = [“trend”, “season”, “holiday”]

target = “Joins”

data_transformed = final_data.copy()

numerical_encoder_dict = {}

for feature in transform_variables:
scaler = MinMaxScaler()
original = final_data[feature].values.reshape(-1, 1)
transformed = scaler.fit_transform(original)
data_transformed[feature] = transformed
numerical_encoder_dict[feature] = scaler

dependent_transformation = None
original = final_data[target].values
data_transformed[target] = original

Post the data transformations I define the model parameters as below :

response_mean = []
with pm.Model() as base_model:
    for channel_name in delay_channels:
        print(f"Delay Channels: Adding {channel_name}")
        
        x = data_transformed[channel_name].values
        
        adstock_param = pm.Beta(f"{channel_name}_adstock", 3, 3)
        saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
        saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)
        channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
        
        # Sample from the priors :
        
        pidata = pm.sample(500,random_seed=43)
        stacked = az.extract(pidata)
        
        adstock_params = np.average(stacked[f"{channel_name}_adstock"].values)

        
        x_new = adstock_geometric_numpy(x, adstock_params)
        x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)
        channel_b = np.average(stacked[f"{channel_name}_media_coef"].values)

        response_mean.append(saturation_tensor * channel_b)
        
        
    for control_var in control_variables:
        print(f"Control Variables: Adding {control_var}")
        
        xc = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
        
        control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
        
        control_x = control_beta * xc
        response_mean.append(control_x)
        
    intercept = pm.Normal("intercept", np.mean(data_transformed[target].values), sigma = 3)   
    sigma = pm.HalfNormal("sigma", 4)
 
    likelihood = pm.Poisson("likelihood", mu = intercept + sum(response_mean), observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])

And below is the result from pm.model_to_graphviz(model=base_model)

The model appears to be fine and below is the posterior distribution:

image

Now the issue is , if I use PYMC marketing , below is the model graph which is generated:

Which is more along the lines of what I am expecting but have no idea of how to do this in PyMc.

I am not sure if this is even possible to do as the earlier code (which was done about 3 years ago and the person is no longer in the org) is outdated(it uses pymc3)