Adstock function in Hierarchical Model

Hi everyone,

I am implementing a Marketing Mix Model using Bayesian regression accounting for non linear structure in the data like the saturation function and adstock/carryover effect.

In a nutshell, I want to estimate the parameters of the following regression:

sales = c + \sum \beta_{media} adstock(saturation(x_{media}; k, s); \alpha, \theta, L) + \beta_{control} \cdot control + noise

Where:

adstock(x, \alpha, \theta, L) = \frac{\sum_{l=0}^{L-1}w_{t-l}\cdot x_{t-l}}{\sum_{l=0}^{L-1}w_{t-l}}
w_{t-l} = \alpha^{(l-\theta)^2}

and:

saturation(x; k, s) = \frac{1}{1+(\frac{x}{k})^{-s}}

I have written the two functions as follows:

def geometric_adstock(x, theta, alpha,L=12):
    w = tt.as_tensor_variable([tt.power(alpha,tt.power(i-theta,2)) for i in range(L)])
    xx = tt.stack([tt.concatenate([tt.zeros(i), x[:x.shape[0] -i]]) for i in range(L)])
    return tt.dot(w/tt.sum(w), xx)
def hill(x, k: float = 0.5, s: float = 0.5):
  return (1 / (1 + tt.power((x / k), (-s))))

Finally I built the model to estimate the parameters passing the data for a single brand. The following one is a simplified version with only one media:

with pm.Model(check_bounds=False, coords=coords_canale) as adstock_saturation_model:
  
  data = pm.Data('data_TV', train_canale_scaled['TV'].values)
  constant = pm.Normal('constant', mu=0, sigma=1)
  b_control = pm.Normal('b_control', 1, 0.1)
  beta = pm.HalfNormal('beta_TV', 1)
  alpha = pm.Beta('alpha_TV', 1, 1)
  theta = pm.Uniform('theta_TV', 0, 12)
  s = pm.Beta('ks_{}'.format(col), 2, 2)
  k = pm.Gamma('ss_{}'.format(col), 3, 1)
  channel = pm.Deterministic('channel_TV', beta*hill(geometric_adstock(data, alpha=alpha, theta=theta, L=12), s=s, k=k))
  
  control = pm.Deterministic('control', b_control * np.log(train_canale_scaled['PREDICTION'].values))
  
  mu = constant + channel + control
  likelihood = pm.Normal('likelihood', mu=mu, sigma=0.01, observed=np.log(train_canale_scaled['TRUE'].values))

This model works perfectly fine. However, I wanted to improve the model performance by building a Hierarchical model where the parameter priors are learned from a dataset composed of several brands of the same category. For simplicity, assume that the parameter:

\alpha \sim Normal(hyper\_alpha, 0.1)

where “hyper_alpha” is learned from the dataset with all the brands. I set this model up:

canale_idxs, brand= pd.factorize(train_canale_scaled['BRAND'])
n_canale = len(canale)
total_size = len(train_canale_scaled)
coords_canale = {
    "brand": brand,
    "obs_id": np.arange(len(brand_idxs)),
}
with pm.Model(check_bounds=False, coords=coords_canale) as adstock_saturation_model:
  brand_idx = pm.Data("brand_idx", brand_idxs, dims="obs_id")
  
  data = pm.Data('data_TV', train_canale_scaled['TV'].values)
  constant = pm.Normal('constant', mu=0, sigma=1, dims='brand')
  b_control = pm.Normal('b_control', 1, 0.1, dims="brand")
  beta = pm.HalfNormal('beta_TV', 1, dims="brand")
  hyper_alpha = pm.Beta('alpha_TV', 1, 1)
  alpha = pm.Normal('alpha_TV', hyper_beta, 0.1, dims="brand")
  theta = pm.Uniform('theta_TV', 0, 12, dims="brand")
  s = pm.Beta('ks_{}'.format(col), 2, 2, dims="brand")
  k = pm.Gamma('ss_{}'.format(col), 3, 1, dims="brand")
  channel = pm.Deterministic('channel_TV', beta*hill(geometric_adstock(data, alpha=alpha[brand_idx], theta=theta[brand_idx], L=12), s=s[brand_idx], k=k[brand_idx]))
  
  control = pm.Deterministic('control', b_control[brand_idx] * np.log(train_canale_scaled['PREDICTION'].values))

But I get the error:

ValueError: shapes (12,909) and (12,909) not aligned: 909 (dim 1) != 12 (dim 0)

ValueError                                Traceback (most recent call last)
<command-1859173143859096> in <module>
     27     k[col] = pm.Beta('ks_{}'.format(col), 2, 2)
     28     s[col] = pm.Gamma('ss_{}'.format(col), 3, 1)
---> 29     channel[col] = pm.Deterministic('channel_{}'.format(col), betas[col][canale_idx]*hill(geometric_adstock(data[col], alpha=alpha[col][canale_idx], theta=theta[col][canale_idx]), k=k[col], s=s[col]))
     30 
     31   control = pm.Deterministic('control', b_control[canale_idx] * np.log(train_canale_scaled['PREDICTION'].values))

<command-1074351251838473> in geometric_adstock(x, theta, alpha, L)
     30     w = tt.as_tensor_variable([tt.power(alpha,tt.power(i-theta,2)) for i in range(L)])
     31     xx = tt.stack([tt.concatenate([tt.zeros(i), x[:x.shape[0] -i]]) for i in range(L)])
---> 32     return tt.dot(w/tt.sum(w), xx)
     33 
     34 def hill(x, k: float = 0.5, s: float = 0.5):

/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in dot(l, r)
   6166 
   6167     try:
-> 6168         res = l.__dot__(r)
   6169         if res is NotImplemented:
   6170             raise NotImplementedError

/databricks/python/lib/python3.8/site-packages/theano/tensor/var.py in __dot__(left, right)
    661 
    662     def __dot__(left, right):
--> 663         return theano.tensor.basic.dense_dot(left, right)
    664 
    665     def __rdot__(right, left):

/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in dense_dot(a, b)
   6221         return tensordot(a, b, [[a.ndim - 1], [np.maximum(0, b.ndim - 2)]])
   6222     else:
-> 6223         return _dot(a, b)
   6224 
   6225 

/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in __call__(self, *inputs, **kwargs)
    251 
    252         if config.compute_test_value != "off":
--> 253             compute_test_value(node)
    254 
    255         if self.default_output is not None:

/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in compute_test_value(node)
    128     thunk.outputs = [storage_map[v] for v in node.outputs]
    129 
--> 130     required = thunk()
    131     assert not required  # We provided all inputs
    132 

/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in rval(p, i, o, n)
    474             # default arguments are stored in the closure of `rval`
    475             def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 476                 r = p(n, [x[0] for x in i], o)
    477                 for o in node.outputs:
    478                     compute_map[o][0] = True

/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in perform(self, node, inp, out)
   6060         # gives a numpy float object but we need to return a 0d
   6061         # ndarray
-> 6062         z[0] = np.asarray(np.dot(x, y))
   6063 
   6064     def grad(self, inp, grads):

/databricks/python/lib/python3.8/site-packages/numpy/core/overrides.py in dot(*args, **kwargs)

ValueError: shapes (12,909) and (12,909) not aligned: 909 (dim 1) != 12 (dim 0)

that is linked to the geometric_adstock function. Probably, it does not support multiple dimension even though I do not clearly understand how the model works under the hood when the setting is Hierarchical.

The problem seems to poi to the:

return tt.dot(w, x_lags)

of the geometric_adstock function.

My question is:is there a way to write the adstock function that can be used within this hierarchical setting? Should I change the way I am modeling everithing?

Thank you in advance.