How to use transform multiple pandas dataframe columns using random variables

I am trying to implement a Bayesian media mixture model based on this paper. Thanks to this post Optimizing a parameter for an outside function, I was able to implement a toy example, but I got stuck when I try to generalize the result to multiple variables.

Here is my code:

import pandas as pd 
import numpy as np
import theano 
import theano.tensor as tt
import pymc3 as pm

from theano.compile.ops import as_op
from statsmodels.tsa.filters.filtertools import recursive_filter
import pandas_flavor as pf

@as_op(itypes=[tt.dvector,tt.dscalar], otypes=[tt.dvector])
def adstock(x,rate=0.09):
    return (recursive_filter(x,rate))

@as_op(itypes=[tt.dvector, tt.dscalar, tt.dscalar], otypes=[tt.dvector])
def hill(x, kappa, s):
    x = x / np.max(x)
    return 1 - 1 / (1 + (x / kappa) ** s)

@pf.register_series_method
def np_adstock(x,rate=0.09):
    return (recursive_filter(x,rate))

@pf.register_series_method
def np_hill(x, kappa, s):
    x = x / np.max(x)
    return 1 - 1 / (1 + (x / kappa) ** s)

data = np.array([[      0. ,       0. , 1652952. ],
       [3355871.4,  183899. , 1942495. ],
       [      0. ,  162984. , 1579585. ],
       [      0. ,  143584. , 1386392. ],
       [      0. ,  137564. , 1333346. ],
       [  75160.2,  156301. , 1526148. ],
       [      0. ,  166691. , 1616383. ],
       [3111239.2,  161410. , 1695017. ],
       [      0. ,  124715. , 1210506. ],
       [4120092.2,  135098. , 1423179. ],
       [      0. ,  117674. , 1139066. ],
       [4142374.6,  144046. , 1523823. ],
       [      0. ,       0. , 1451893. ],
       [4093398.3,       0. , 1495192. ],
       [      0. ,  108441. , 1054854. ],
       [ 686319.8,  101583. , 1021071. ],
       [1232425.2,  103806. , 1061404. ],
       [      0. ,  114343. , 1109392. ],
       [      0. ,   96245. ,  931935. ],
       [ 111736.9,   87099. ,  852896. ],
       [      0. ,   82226. ,  795643. ],
       [3083622.4,   74655. ,  779460. ],
       [      0. ,   74020. ,  713904. ],
       [      0. ,   79599. ,  769926. ],
       [      0. ,   75796. ,  729324. ],
       [      0. ,       0. ,  786024. ],
       [      0. ,       0. ,  611066. ],
       [      0. ,   70239. ,  663881. ],
       [      0. ,   68333. ,  652547. ],
       [2257739.9,   83251. ,  865098. ],
       [      0. ,   81644. ,  783180. ],
       [      0. ,   72919. ,  700838. ],
       [      0. ,   78168. ,  753723. ],
       [      0. ,   87157. ,  843364. ],
       [2854421.7,  104226. , 1092602. ],
       [      0. ,  104729. , 1006488. ],
       [2704242.4,   71379. ,  742475. ],
       [      0. ,   85159. ,  822263. ],
       [1158996.3,       0. , 1147618. ],
       [      0. ,       0. , 1186726. ],
       [2552291. ,  142928. , 1496283. ],
       [1087887.5,  125834. , 1293971. ],
       [ 842411.2,  165917. , 1681426. ],
       [      0. ,  152234. , 1467794. ],
       [6698581.4,  154944. , 1638917. ],
       [      0. ,  173961. , 1685441. ],
       [3631470.6,  181839. , 1924961. ],
       [      0. ,  176321. , 1703518. ],
       [3266184.5,  176166. , 1852866. ],
       [ 391631.4,  173609. , 1731400. ]])

df = pd.DataFrame(data, columns=['tv_spend','search_clicks','brand_sales'])

with pm.Model() as model:
    tv_spend = pm.Data('tv_spend', df.tv_spend.values)
    search_clicks = pm.Data('search_clicks', df.search_clicks.values)
    sales = pm.Data('sales', df.brand_sales.values)
    
    intercept = pm.Normal('intercept', mu=1000, sd=100)
    bA1 = pm.Normal('bA1', mu=0, sd=5)
    bA2 = pm.Normal('bA2', mu=0, sd=5)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    adstock1 = pm.Uniform('adstock1', lower=0.5, upper=0.99)
    adstock2 = pm.Uniform('adstock2', lower=0.5, upper=0.99)
    kappa1 = pm.Uniform('kappa1', lower=0.05, upper=1)
    kappa2 = pm.Uniform('kappa2', lower=0.05, upper=1)
    s1 = pm.Uniform('s1', lower=0.05, upper=1)
    s2 = pm.Uniform('s2', lower=0.05, upper=1)
    mu = intercept + bA1*(adstock(hill(tv_spend, kappa1, s1), adstock1)) + bA2*(adstock(hill(search_clicks, kappa2, s2), adstock2))
    
    sales_hat = pm.Normal('sales_hat', mu=mu, sd=sigma, observed=sales)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace=pm.sample(5000,step=step, start=start)

If I want to generalize this, I probably want to declare

bA = pm.Normal('bA', mu=0, sd=5, shape=2)
adstock = pm.Uniform('adstock', lower=0.5, upper=0.99, shape =2)

etc. But I am not sure how to implement the rest using the theano.scan. So, I write this in pandas instead, just to give it a try:

with pm.Model() as model:
    sales = pm.Data('sales', df.brand_sales.values)
    
    intercept = pm.Normal('intercept', mu=1000, sd=100)
    bA1 = pm.Normal('bA1', mu=0, sd=5)
    bA2 = pm.Normal('bA2', mu=0, sd=5)
    sigma = pm.Uniform('sigma', lower=0, upper=50)
    s = [pm.Uniform(f's{i}', lower=0.5, upper=1) for i in range(2)]
    kappa = [pm.Uniform(f'kappa{i}', lower=0.5, upper=1) for i in range(2)]
    adstock = [pm.Uniform(f'adstock{i}', lower=0.5, upper=1) for i in range(2)]
    for i, col in enumerate(['tv_spend', 'search_clicks']):
        df[col+'_transform']=df[col].np_hill(kappa[i],s[i]).np_adstock(adstock[i])
    mu = intercept + bA1*df['tv_spend_transform'] + bA2*df['search_clicks_transform']
    sales_hat=pm.Normal('sales_hat', mu=mu, sd=sigma, observed=sales)
    start = pm.find_MAP()
    step = pm.Metropolis()
    trace = pm.sample(5000, step=step, start=start)

but this get me the value length not know error:


ValueError Traceback (most recent call last)
in
10 adstock = [pm.Uniform(f’adstock{i}’, lower=0.5, upper=1) for i in range(2)]
11 for i, col in enumerate([‘tv_spend’, ‘search_clicks’]):
—> 12 df[col+’_transform’]=df[col].np_hill(kappa[i],s[i]).np_adstock(adstock[i])
13 mu = intercept + bA1df[‘tv_spend_transform’] + bA2df[‘search_clicks_transform’]
14 sales_hat=pm.Normal(‘sales_hat’, mu=mu, sd=sigma, observed=sales)

~/anaconda3/lib/python3.7/site-packages/pandas_flavor/register.py in call(self, *args, **kwargs)
49 @wraps(method)
50 def call(self, *args, **kwargs):
—> 51 return method(self._obj, *args, **kwargs)
52
53 register_series_accessor(method.name)(AccessorMethod)

in np_hill(x, kappa, s)
35 def np_hill(x, kappa, s):
36 x = x / np.max(x)
—> 37 return 1 - 1 / (1 + (x / kappa) ** s)
38
39 @pf.register_dataframe_method

~/anaconda3/lib/python3.7/site-packages/pandas/core/ops.py in wrapper(left, right)
1583 result = safe_na_op(lvalues, rvalues)
1584 return construct_result(left, result,
-> 1585 index=left.index, name=res_name, dtype=None)
1586
1587 wrapper.name = op_name

~/anaconda3/lib/python3.7/site-packages/pandas/core/ops.py in _construct_result(left, result, index, name, dtype)
1472 not be enough; we still need to override the name attribute.
1473 “”"
-> 1474 out = left._constructor(result, index=index, dtype=dtype)
1475
1476 out.name = name

~/anaconda3/lib/python3.7/site-packages/pandas/core/series.py in init(self, data, index, dtype, name, copy, fastpath)
227 elif (isinstance(data, compat.Iterable)
228 and not isinstance(data, compat.Sized)):
–> 229 data = list(data)
230 else:
231

~/anaconda3/lib/python3.7/site-packages/theano/tensor/var.py in iter(self)
626 def iter(self):
627 try:
–> 628 for i in xrange(theano.tensor.basic.get_vector_length(self)):
629 yield self[i]
630 except TypeError:

~/anaconda3/lib/python3.7/site-packages/theano/tensor/basic.py in get_vector_length(v)
4826 else:
4827 msg = str(v)
-> 4828 raise ValueError(“length not known: %s” % msg)
4829
4830

ValueError: length not known: Elemwise{true_div,no_inplace} [id A] ‘’
|TensorConstant{[0. …05846483]} [id B]
|InplaceDimShuffle{x} [id C] ‘’
|ViewOp [id D] ‘kappa0’
|Elemwise{add,no_inplace} [id E] ‘’
|Elemwise{mul,no_inplace} [id F] ‘’
| |Elemwise{sub,no_inplace} [id G] ‘’
| | |TensorConstant{1.0} [id H]
| | |TensorConstant{0.5} [id I]
| |sigmoid [id J] ‘’
| |kappa0_interval__ [id K]
|TensorConstant{0.5} [id I]

Ignore my parameter setting for now, I want to know how to generalize the above calculation either in a for loop or using the theano.scan.

Thanks in advance for your help!