Hello!
I am a newbie to bayesian modeling and PyMC. I recently switched from cmstanr to PyMC and have been attempting to fit some expensive models. I want to develop an MMM with geometric adstock of my data which is available at the DMA-Day (for non-marketers this is like state-day data) level. I want to apply hierarchical modeling to my betas across DMAs associated with each channel, but fit a global theta for the adstock transformation. ChatGPT has suggested I write a loop within my PyMC model which looks something like this:
# Adstock Transformation:
def adstock_transform(df, theta, max_lag, columns_to_transform):
# Copy the DataFrame to avoid modifying the original data
transformed_df = df.copy()
# Ensure theta is a PyTorch tensor
theta = torch.tensor(theta, dtype=torch.float32)
# Process only specified columns
for col in columns_to_transform:
if col in df.columns:
# Convert column to PyTorch tensor
spend = torch.tensor(df[col].values, dtype=torch.float32)
max_spend = torch.max(spend)
if max_spend > 0: # Avoid division by zero
spend = spend / max_spend
# Initialize the tensor for adstocked values
adstocked_column = torch.zeros_like(spend)
# Apply adstock transformation with no spill-over across DMAs
for lag in range(max_lag + 1):
# Create a shifted version of the spend tensor
shifted_spend = torch.roll(spend, shifts=lag, dims=0)
# Apply the decay factor raised to the power of the lag
decay = torch.pow(theta, lag)
# Ensure no spill-over: zero out the values that cross DMA boundaries
if lag > 0:
shifted_spend[:lag] = 0
# Add to the adstocked spend for the column
adstocked_column += decay * shifted_spend
# Convert the adstocked tensor back to numpy and update the DataFrame
transformed_df[col] = adstocked_column.numpy()
return transformed_df
# Retrieve unique DMA codes
unique_dmas = df['dma_code'].unique()
dma_to_index = {dma: idx for idx, dma in enumerate(unique_dmas)}
# hierarchical model with looping through DMAs (geography)
with pm.Model() as dma_hierarchical_adstock_model:
# Global model parameters
intercept = pm.Normal('intercept', mu=0, sigma=1)
sigma = pm.HalfNormal('sigma', sigma=10)
theta = pm.Beta('theta', alpha=2, beta=2) # Single global theta
# Hierarchical betas for each DMA
# Hyperpriors for the group means of betas
mu_beta_con = pm.HalfNormal('mu_beta_con', sigma=1, shape = nX_direct)
sigma_beta_con = pm.HalfNormal('sigma_beta_con', sigma=1, shape = nX_direct)
mu_beta_uncon = pm.Normal('mu_beta_uncon', mu=0, sigma=1, shape = nX_unconstrained)
sigma_beta_uncon = pm.HalfNormal('sigma_beta_uncon', sigma=1, shape = nX_unconstrained)
# DMA-specific betas
beta_con = pm.HalfNormal('beta_con', sigma=sigma_beta_con, shape=(len(unique_dmas), nX_direct))
beta_uncon = pm.Normal('beta_uncon', mu=mu_beta_uncon, sigma=sigma_beta_uncon, shape=(len(unique_dmas), nX_unconsrained))
# Process data for each DMA
for dma in unique_dmas:
dma_data = df[df['dma_code'] == dma]
y = dma_data['Y'].values
x_con = dma_data[rp_columns].values # Assuming 'Spend' needs adstock transformation
x_uncon = dma_data[['feelslike', 'precip', 'windspeed', 'light_hours', 'areasqmi', 'neighbor_shock']].values
# Apply the adstock transform to the DMA's spend
adstocked_spend = adstock_transform(x_con, theta, max_lag=max_lag, columns_to_transform=rp_columns)
# DMA index for selecting specific betas
dma_idx = dma_to_index[dma]
# Expected sales: using dot product for matrix of predictors
expected_sales = intercept + pm.math.dot(adstocked_spend, beta_con[dma_idx]) + pm.math.dot(x_uncon, beta_uncon[dma_idx])
# Likelihood
sales_obs = pm.Normal(f'sales_obs_{dma}', mu=expected_sales, sigma=sigma, observed=y)
Is this appropriate to do or is this not actually estimating a hierarchical model? ChatGPT loves to create an easy way out that doesn’t work and I thought I’d ask the experts!
edit: had incorrectly been estimating the effect of one beta instead of a vector.