Optimizing a parameter for an outside function

This is migrated from github.

I am trying to fit a simple linear regression as proof of concept for a larger problem, whereby I am transforming the X variable according to the ad stock transformation.

My question is how to optimize a parameter that is passed to an external function during fitting and is there any way then to have that found optimal value be taken into consideration (i.e. used) when you
predict new data using sample_posterior_predictive()?

Here is a self-contained program and a note about what seems to work and when this fails.

from statsmodels.tsa.filters.filtertools import recursive_filter
import pymc3 as pm
import numpy as np
import pandas as pd

#normalize to range 0-100

def normalize(x,min_=0,max_=100):
    print(x)
    min_x=np.min(x)
    max_x=np.max(x)
    z=(max_-min_)/(max_x-min_x)*(x-max_x)+max_
   
    return(z)
def adstock(x,rate=0.09):
    return (recursive_filter(x,rate))
sales=np.array([1018,0,236,490,1760,443,1670,526,4522,2524,400,2527,4602,168,2795,7195,6277,2974,
5268,4310,2127,1081,4794,806,2565,1223,4141,2994,4079,1883,635,1980,1275,4497,1579,2726,
1901,4683,1686,1745,1404,1096,2825,2331,1711,2041,1210,914,4162,1166,4228,914])

advert=np.array([0,0,0,0,0,0,0,0,0,117.91,120.11,125.83,115.35,177.09,141.65,137.89,0,0,0,0,0,0,0,0,0,
0,158.51,109.39,91.08,79.25,102.71,78.49,135.11,114.55,87.34,107.83,125.02,82.96,60.81,83.15,0,0,
0,0,0,0,129.51,105.49,111.49,107.1,0,0])

pd_sales=pd.DataFrame(np.column_stack([sales,advert]), columns=['sales','advert'])

#HERE THIS WORKS WHEN HARD-CODING THE PARAMETER##########################

with pm.Model() as mod_as:
    a = pm.Normal('a', mu=1000, sd=500)
    bA = pm.Normal('bA', mu=0, sd=2)
    sigma = pm.Uniform('sigma', lower=0, upper=10)
        
    mu = a + bA * normalize(adstock(pd_sales.advert.values,0.09))
    sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
    trace_5_3_S = pm.sample(1000, tune=1000,cores=4)



#HERE THIS FAILS##########################

with pm.Model() as mod_as:
    a = pm.Normal('a', mu=1000, sd=500)
    bA = pm.Normal('bA', mu=0, sd=2)
    sigma = pm.Uniform('sigma', lower=0, upper=10)
    ad_rate = pm.Uniform('ad_rate', lower=0, upper=1)
    
    mu = a + bA * normalize(adstock(pd_sales.advert.values,ad_rate))
    sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
    trace_5_3_S = pm.sample(1000, tune=1000,cores=4)

It was mentioned that I need to use a theano decorator as_op. If I set the function to this,

from theano.compile.ops import as_op
import theano.tensor as tt

@as_op(itypes=[tt.dvector,tt.lscalar], otypes=[tt.dvector])
def adstock(x,rate=0.09):
    return (recursive_filter(x,rate))

I get another error. Can anyone give me a hint on how to set this up properly?

 ---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-85-d00211698585> in <module>()
      5     ad_rate = pm.Uniform('ad_rate', lower=0, upper=1)
      6 
----> 7     mu = a + bA * normalize(adstock(pd_sales.advert.values,ad_rate))
      8     sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
      9 

~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
    613         """
    614         return_list = kwargs.pop('return_list', False)
--> 615         node = self.make_node(*inputs, **kwargs)
    616 
    617         if config.compute_test_value != 'off':

~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in make_node(self, *inputs)
    981             raise ValueError("We expected %d inputs but got %d." %
    982                              (len(self.itypes), len(inputs)))
--> 983         if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
    984             raise TypeError(
    985                 "We expected inputs of types '%s' but got types '%s' " %

~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in <genexpr>(.0)
    981             raise ValueError("We expected %d inputs but got %d." %
    982                              (len(self.itypes), len(inputs)))
--> 983         if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
    984             raise TypeError(
    985                 "We expected inputs of types '%s' but got types '%s' " %

AttributeError: 'numpy.ndarray' object has no attribute 'type'

Looks like you’re on the right track, but you might also need to convert pd_sales.advert.values to a Theano variable (e.g. a shared variable or constant). Also, you’ll either need to make normalize a Theano Op, as well, or convert those explicit Numpy operations into their Theano equivalents.

In general, if you use Numpy/Theano array/tensor methods—instead of functions/operators—you can get away with alot more, since their APIs are designed to match (somewhat).

1 Like

@brandonwillard

Thanks! I think this worked.

I changed both functions:

from theano.compile.ops import as_op
import theano.tensor as tt

@as_op(itypes=[tt.dvector,tt.dscalar], otypes=[tt.dvector])
def adstock(x,rate=0.09):
    return (recursive_filter(x,rate))

@as_op(itypes=[tt.dvector], otypes=[tt.dvector])
def normalize(x):
    min_=0
    max_=100
    min_x=np.min(x)
    max_x=np.max(x)
    z=(max_-min_)/(max_x-min_x)*(x-max_x)+max_
   
    return(z) 

Created these as shared variables

sales=shared(pd_sales.sales.values)
advert=shared(pd_sales.advert.values)


with pm.Model() as mod_as:
    a = pm.Normal('a', mu=1000, sd=500)
    bA = pm.Normal('bA', mu=0, sd=2)
    sigma = pm.Uniform('sigma', lower=0, upper=10)
    ad_rate = pm.Uniform('ad_rate', lower=0, upper=1)
    
    mu = a + bA * normalize(adstock(advert,ad_rate))
    sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=sales)

    trace_5_3_S = pm.sample(1000, tune=1000)

I tried these codes, but get an error

Traceback (most recent call last):
File “c:\Users\DF.vscode\extensions\ms-python.python-2019.6.24221\pythonFiles\ptvsd_launcher.py”, line 43, in
main(ptvsdArgs)
File “c:\Users\DF.vscode\extensions\ms-python.python-2019.6.24221\pythonFiles\lib\python\ptvsd_main_.py”, line 434, in main
run()
File “c:\Users\DF.vscode\extensions\ms-python.python-2019.6.24221\pythonFiles\lib\python\ptvsd_main_.py”, line 312, in run_file
runpy.run_path(target, run_name=‘main’)
File “D:\Miniconda3\envs\tf-gpu\lib\runpy.py”, line 263, in run_path
pkg_name=pkg_name, script_name=fname)
File “D:\Miniconda3\envs\tf-gpu\lib\runpy.py”, line 96, in _run_module_code
mod_name, mod_spec, pkg_name, script_name)
File “D:\Miniconda3\envs\tf-gpu\lib\runpy.py”, line 85, in _run_code
exec(code, run_globals)
File “c:\Users\DF\source\repos\PythonApplication1\PythonApplication1\bgc-parameters-ana\test.py”, line 118, in
mu = a + bA * normalize(adstock(advert, ad_rate))
File “D:\Miniconda3\envs\tf-gpu\lib\site-packages\theano\gof\op.py”, line 615, in call
node = self.make_node(*inputs, **kwargs)
File “D:\Miniconda3\envs\tf-gpu\lib\site-packages\theano\gof\op.py”, line 983, in make_node
if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
File “D:\Miniconda3\envs\tf-gpu\lib\site-packages\theano\gof\op.py”, line 983, in
if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
AttributeError: ‘numpy.ndarray’ object has no attribute ‘type’

Why variables like sales, advert, must be shared?

It shouled be
sales_hat = pm.Normal('sales_hat', mu=mu, sd=sigma, observed=sales)