Hi everyone,
I am implementing a Marketing Mix Model using Bayesian regression accounting for non linear structure in the data like the saturation function and adstock/carryover effect.
In a nutshell, I want to estimate the parameters of the following regression:
Where:
and:
I have written the two functions as follows:
def geometric_adstock(x, theta, alpha,L=12):
w = tt.as_tensor_variable([tt.power(alpha,tt.power(i-theta,2)) for i in range(L)])
xx = tt.stack([tt.concatenate([tt.zeros(i), x[:x.shape[0] -i]]) for i in range(L)])
return tt.dot(w/tt.sum(w), xx)
def hill(x, k: float = 0.5, s: float = 0.5):
return (1 / (1 + tt.power((x / k), (-s))))
Finally I built the model to estimate the parameters passing the data for a single brand. The following one is a simplified version with only one media:
with pm.Model(check_bounds=False, coords=coords_canale) as adstock_saturation_model:
data = pm.Data('data_TV', train_canale_scaled['TV'].values)
constant = pm.Normal('constant', mu=0, sigma=1)
b_control = pm.Normal('b_control', 1, 0.1)
beta = pm.HalfNormal('beta_TV', 1)
alpha = pm.Beta('alpha_TV', 1, 1)
theta = pm.Uniform('theta_TV', 0, 12)
s = pm.Beta('ks_{}'.format(col), 2, 2)
k = pm.Gamma('ss_{}'.format(col), 3, 1)
channel = pm.Deterministic('channel_TV', beta*hill(geometric_adstock(data, alpha=alpha, theta=theta, L=12), s=s, k=k))
control = pm.Deterministic('control', b_control * np.log(train_canale_scaled['PREDICTION'].values))
mu = constant + channel + control
likelihood = pm.Normal('likelihood', mu=mu, sigma=0.01, observed=np.log(train_canale_scaled['TRUE'].values))
This model works perfectly fine. However, I wanted to improve the model performance by building a Hierarchical model where the parameter priors are learned from a dataset composed of several brands of the same category. For simplicity, assume that the parameter:
where “hyper_alpha” is learned from the dataset with all the brands. I set this model up:
canale_idxs, brand= pd.factorize(train_canale_scaled['BRAND'])
n_canale = len(canale)
total_size = len(train_canale_scaled)
coords_canale = {
"brand": brand,
"obs_id": np.arange(len(brand_idxs)),
}
with pm.Model(check_bounds=False, coords=coords_canale) as adstock_saturation_model:
brand_idx = pm.Data("brand_idx", brand_idxs, dims="obs_id")
data = pm.Data('data_TV', train_canale_scaled['TV'].values)
constant = pm.Normal('constant', mu=0, sigma=1, dims='brand')
b_control = pm.Normal('b_control', 1, 0.1, dims="brand")
beta = pm.HalfNormal('beta_TV', 1, dims="brand")
hyper_alpha = pm.Beta('alpha_TV', 1, 1)
alpha = pm.Normal('alpha_TV', hyper_beta, 0.1, dims="brand")
theta = pm.Uniform('theta_TV', 0, 12, dims="brand")
s = pm.Beta('ks_{}'.format(col), 2, 2, dims="brand")
k = pm.Gamma('ss_{}'.format(col), 3, 1, dims="brand")
channel = pm.Deterministic('channel_TV', beta*hill(geometric_adstock(data, alpha=alpha[brand_idx], theta=theta[brand_idx], L=12), s=s[brand_idx], k=k[brand_idx]))
control = pm.Deterministic('control', b_control[brand_idx] * np.log(train_canale_scaled['PREDICTION'].values))
But I get the error:
ValueError: shapes (12,909) and (12,909) not aligned: 909 (dim 1) != 12 (dim 0)
ValueError Traceback (most recent call last)
<command-1859173143859096> in <module>
27 k[col] = pm.Beta('ks_{}'.format(col), 2, 2)
28 s[col] = pm.Gamma('ss_{}'.format(col), 3, 1)
---> 29 channel[col] = pm.Deterministic('channel_{}'.format(col), betas[col][canale_idx]*hill(geometric_adstock(data[col], alpha=alpha[col][canale_idx], theta=theta[col][canale_idx]), k=k[col], s=s[col]))
30
31 control = pm.Deterministic('control', b_control[canale_idx] * np.log(train_canale_scaled['PREDICTION'].values))
<command-1074351251838473> in geometric_adstock(x, theta, alpha, L)
30 w = tt.as_tensor_variable([tt.power(alpha,tt.power(i-theta,2)) for i in range(L)])
31 xx = tt.stack([tt.concatenate([tt.zeros(i), x[:x.shape[0] -i]]) for i in range(L)])
---> 32 return tt.dot(w/tt.sum(w), xx)
33
34 def hill(x, k: float = 0.5, s: float = 0.5):
/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in dot(l, r)
6166
6167 try:
-> 6168 res = l.__dot__(r)
6169 if res is NotImplemented:
6170 raise NotImplementedError
/databricks/python/lib/python3.8/site-packages/theano/tensor/var.py in __dot__(left, right)
661
662 def __dot__(left, right):
--> 663 return theano.tensor.basic.dense_dot(left, right)
664
665 def __rdot__(right, left):
/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in dense_dot(a, b)
6221 return tensordot(a, b, [[a.ndim - 1], [np.maximum(0, b.ndim - 2)]])
6222 else:
-> 6223 return _dot(a, b)
6224
6225
/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in __call__(self, *inputs, **kwargs)
251
252 if config.compute_test_value != "off":
--> 253 compute_test_value(node)
254
255 if self.default_output is not None:
/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in compute_test_value(node)
128 thunk.outputs = [storage_map[v] for v in node.outputs]
129
--> 130 required = thunk()
131 assert not required # We provided all inputs
132
/databricks/python/lib/python3.8/site-packages/theano/graph/op.py in rval(p, i, o, n)
474 # default arguments are stored in the closure of `rval`
475 def rval(p=p, i=node_input_storage, o=node_output_storage, n=node):
--> 476 r = p(n, [x[0] for x in i], o)
477 for o in node.outputs:
478 compute_map[o][0] = True
/databricks/python/lib/python3.8/site-packages/theano/tensor/basic.py in perform(self, node, inp, out)
6060 # gives a numpy float object but we need to return a 0d
6061 # ndarray
-> 6062 z[0] = np.asarray(np.dot(x, y))
6063
6064 def grad(self, inp, grads):
/databricks/python/lib/python3.8/site-packages/numpy/core/overrides.py in dot(*args, **kwargs)
ValueError: shapes (12,909) and (12,909) not aligned: 909 (dim 1) != 12 (dim 0)
that is linked to the geometric_adstock
function. Probably, it does not support multiple dimension even though I do not clearly understand how the model works under the hood when the setting is Hierarchical.
The problem seems to poi to the:
return tt.dot(w, x_lags)
of the geometric_adstock
function.
My question is:is there a way to write the adstock function that can be used within this hierarchical setting? Should I change the way I am modeling everithing?
Thank you in advance.