Hello Everyone,
I am facing some issues while developing a bayesian market mix model for a project.
It is a long story and I hope to be able to take you all through it - step by step.
- Data :
The target variable over here is the joins whereas the spends are for each channel in columns.
- The transformations:
Now, for any MMM we have adstock and saturation transformations which are defined by the below functions:
def saturation_hill_pymc(x, alpha, gamma):
x_s_hill = x ** alpha / (x ** alpha + gamma ** alpha)
return x_s_hill
#from https://github.com/ikatsov/tensor-house/blob/master/promotions/mediamix-bayesian.ipynb
def adstock_geometric_numpy(x, theta):
"""
NumPy implementation of the geometric adstock transformation.
:param x: NumPy array containing the input data (e.g., marketing spend over time).
:param theta: Decay parameter for the adstock effect.
:return: NumPy array with the adstock-transformed values.
"""
# Initialize an array to store the adstock-transformed values
x_decayed = np.zeros_like(x)
# The first element remains the same as the original series
x_decayed[0] = x[0]
# Apply the adstock transformation to each element
for i in range(1, len(x)):
x_decayed[i] = x[i] + theta * x_decayed[i - 1]
return x_decayed
- The model:
Below is the code I am using to define the parameters:
transform_variables = ["trend", "season", "holiday", 'Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']
delay_channels = ['Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']
media_channels = ['Audio_s','Cable_s','CTV_Hulu_s','Display_s','META_s','Radio_s','SA360_s','Snapchat_s','TikTok_s','TV_s','YouTube_s']
control_variables = ["trend", "season", "holiday"]
target = "Joins"
data_transformed = final_data.copy()
numerical_encoder_dict = {}
for feature in transform_variables:
scaler = MinMaxScaler()
original = final_data[feature].values.reshape(-1, 1)
transformed = scaler.fit_transform(original)
data_transformed[feature] = transformed
numerical_encoder_dict[feature] = scaler
dependent_transformation = None
original = final_data[target].values
data_transformed[target] = original
with pm.Model() as base_model:
for channel_name in delay_channels:
print(f"Delay Channels: Adding {channel_name}")
x = data_transformed[channel_name].values
adstock_param = pm.Beta(f"{channel_name}_adstock", 3, 3)
saturation_gamma = pm.Beta(f"{channel_name}_gamma", 2, 2)
saturation_alpha = pm.Gamma(f"{channel_name}_alpha", 3, 1)
channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
# Sample from the priors :
pidata = pm.sample(500,random_seed=43)
stacked = az.extract(pidata)
adstock_params = np.average(stacked[f"{channel_name}_adstock"].values)
x_new = adstock_geometric_numpy(x, adstock_params)
x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)
channel_b = np.average(stacked[f"{channel_name}_media_coef"].values)
response_mean.append(saturation_tensor * channel_b)
for control_var in control_variables:
print(f"Control Variables: Adding {control_var}")
x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
pidata = pm.sample(500,random_seed=43)
stacked = az.extract(pidata)
control_beta_med = np.average(stacked[f"{control_var}_control_coef"].values)
control_x = control_beta_med * x
response_mean.append(control_x)
intercept = pm.Normal("intercept", np.mean(data_transformed[target].values), sigma = 3)
sigma = pm.HalfNormal("sigma", 4)
pidata = pm.sample(500,random_seed=43)
stacked = az.extract(pidata)
sigma = np.average(stacked["sigma"].values)
intercept = np.average(stacked["intercept"].values)
likelihood = pm.Normal("likelihood", mu = intercept + sum(response_mean), sigma = sigma, observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])
- The earlier code in pymc3:
with pm.Model() as base_model:
for channel_name in delay_channels:
print(f"Delay Channels: Adding {channel_name}")
x = data_transformed[channel_name].values
adstock_param = pm.Beta(f"{channel_name}_adstock",3,3)
saturation_gamma = pm.Beta(f"{channel_name}_gamma",2,2)
saturation_alpha = pm.Beta(f"{channel_name}_alpha",3,1)
pidata = pm.sample(500,random_seed=43)
x_new = adstock_geometric_aesara_pymc(x, adstock_param)
x_new_sliced = x_new[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
saturation_tensor = saturation_hill_pymc(x_new_sliced, saturation_alpha, saturation_gamma)
channel_b = pm.HalfNormal(f"{channel_name}_media_coef", sigma = 3)
response_mean.append(saturation_tensor * channel_b)
for control_var in control_variables:
print(f"Control Variables: Adding {control_var}")
x = data_transformed[control_var].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]
control_beta = pm.Normal(f"{control_var}_control_coef", sigma = 3)
control_x = control_beta * x
response_mean.append(control_x)
intercept = pm.Normal("intercept", np.mean(data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX]), sigma = 3)
sigma = pm.HalfNormal("sigma", 4)
likelihood = pm.Normal("likelihood",mu = intercept + sum(response_mean), sigma = sigma, observed = data_transformed[target].values[START_ANALYSIS_INDEX:END_ANALYSIS_INDEX])
trace_p = pm.sample(1000)
I believe I am doing something wrong with the parameter definitions in step 3 as after the sampling , my
with base_model:
pm.sample_posterior_predictive(pidata, extend_inferencedata=True, random_seed=43)
az.plot_ppc(pidata, num_pp_samples=100);
returns the below plot , which doesnât look right
The reason I am using np.average is to take a single value of the parameters in those cases where I need to pass them through the custom functions for adstock and saturation and add variables to the response_mean array which is finally used during likelihood.
I am not sure what I am doing wrong as I am absolutely new to PYMC ( 4 weeks now) so I will be really glad if someone can help me with this!!