updating code script as there are some bugs when I was anonymizing it.
import pymc as pm
import aesara
import aesara.tensor as at
import pandas as pd
import numpy as np
#generate some random data
num_channels= 10
num_cont_vars= 4
num_weeks= 24
num_regions= 5
target= 'Sales'
dict_1= {
'Region': np.repeat(np.arange(num_regions), num_weeks ),
'Week': np.repeat(np.arange(num_weeks), num_regions),
'Sales': np.random.randint(low= 10, high= 100, size= num_weeks*num_regions),
}
dict_2= {f'Channel_{col}': np.random.randint(low=1, high=20, size=(num_weeks*num_regions)) for col in range(num_channels)}
dict_3= {f'cont_var{cont}': np.random.random(size= (num_weeks * num_regions)) for cont in range(num_cont_vars)}
dict_1.update(dict_2)
dict_1.update(dict_3)
df= pd.DataFrame(dict_1)
#defining adstock and saturation functions
def geometric_adstock(x, alpha: float, l: int):
"""Geometric adstock transformation."""
cycles = [
at.concatenate(
[at.zeros(i), x[: x.shape[0] - i]]
)
for i in range(l)
]
x_cycle = at.stack(cycles)
w = at.as_tensor_variable([at.power(alpha, i) for i in range(l)])
return at.dot(w, x_cycle)
def logistic_saturation(x, lam: float):
"""Logistic saturation transformation."""
return (1 - at.exp(-lam * x)) / (1 + at.exp(-lam * x))
# Building the model:
coords= {'Region' : list(df['Region'].unique())}
with pm.Model(coords= coords) as model:
#defining the hyperpriors
adstock_a= pm.Exponential('adstock_a', lam=10.0)
adstock_b= pm.Exponential('adstock_b', lam=10.0)
sat_lam_lam= pm.Exponential('sat_lam', lam= 10.0)
channel_beta_lam= pm.Exponential('coef_lam', lam=10.0)
cont_beta_mu= pm.Normal('cont_lam', mu=0, sigma=10.0)
cont_beta_sigma= pm.HalfCauchy('cont_sig', beta= 10.0)
#then I loop over the marketing channels and calculate their priors and add them to the response list
response_lst= []
for channel in [channel for channel in df.columns if channel.startswith('Channel')]:
x= df[channel].values
adstock_param= pm.Beta(f'{channel}_adstock', alpha= adstock_a, beta=
adstock_b, dims= 'Region' )
sat_lam= pm.Exponential(f'{channel}_sat',
lam= sat_lam_lam,
dims= 'Region')
channel_b = pm.Exponential(f"{channel}_coef",
lam=channel_beta_lam,
dims= 'Region')
x_new = geometric_adstock(x, adstock_param, 6)
saturation_tensor = logistic_saturation(x_new, sat_lam )
channel_contribution= pm.Deterministic(
f'contribution_{channel}',
channel_b * saturation_tensor)
response_lst.append(channel_contribution)
for control_var in [var for var in df.columns if var.startswith('cont_var')]:
x = df[control_var].values
control_beta = pm.Normal(f"{control_var}_control_coef",
mu=cont_beta_mu,
sigma=cont_beta_sigma,
dims= 'Region')
control_x = control_beta * x
response_lst.append(control_x)\
intercept= pm.Exponential('intercept', lam= 0.1)
sigma = pm.Exponential('sigma', lam=0.01)
likelihood = pm.Normal("outcome", mu = sum(response_lst) + intercept , sigma = sigma,
observed = df[target].values,
dims= 'Region'
)
trace= pm.sample(1000, tune= 500)