This is migrated from github.
I am trying to fit a simple linear regression as proof of concept for a larger problem, whereby I am transforming the X variable according to the ad stock transformation.
My question is how to optimize a parameter that is passed to an external function during fitting and is there any way then to have that found optimal value be taken into consideration (i.e. used) when you
predict new data using sample_posterior_predictive()?
Here is a self-contained program and a note about what seems to work and when this fails.
from statsmodels.tsa.filters.filtertools import recursive_filter
import pymc3 as pm
import numpy as np
import pandas as pd
#normalize to range 0-100
def normalize(x,min_=0,max_=100):
print(x)
min_x=np.min(x)
max_x=np.max(x)
z=(max_-min_)/(max_x-min_x)*(x-max_x)+max_
return(z)
def adstock(x,rate=0.09):
return (recursive_filter(x,rate))
sales=np.array([1018,0,236,490,1760,443,1670,526,4522,2524,400,2527,4602,168,2795,7195,6277,2974,
5268,4310,2127,1081,4794,806,2565,1223,4141,2994,4079,1883,635,1980,1275,4497,1579,2726,
1901,4683,1686,1745,1404,1096,2825,2331,1711,2041,1210,914,4162,1166,4228,914])
advert=np.array([0,0,0,0,0,0,0,0,0,117.91,120.11,125.83,115.35,177.09,141.65,137.89,0,0,0,0,0,0,0,0,0,
0,158.51,109.39,91.08,79.25,102.71,78.49,135.11,114.55,87.34,107.83,125.02,82.96,60.81,83.15,0,0,
0,0,0,0,129.51,105.49,111.49,107.1,0,0])
pd_sales=pd.DataFrame(np.column_stack([sales,advert]), columns=['sales','advert'])
#HERE THIS WORKS WHEN HARD-CODING THE PARAMETER##########################
with pm.Model() as mod_as:
a = pm.Normal('a', mu=1000, sd=500)
bA = pm.Normal('bA', mu=0, sd=2)
sigma = pm.Uniform('sigma', lower=0, upper=10)
mu = a + bA * normalize(adstock(pd_sales.advert.values,0.09))
sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
trace_5_3_S = pm.sample(1000, tune=1000,cores=4)
#HERE THIS FAILS##########################
with pm.Model() as mod_as:
a = pm.Normal('a', mu=1000, sd=500)
bA = pm.Normal('bA', mu=0, sd=2)
sigma = pm.Uniform('sigma', lower=0, upper=10)
ad_rate = pm.Uniform('ad_rate', lower=0, upper=1)
mu = a + bA * normalize(adstock(pd_sales.advert.values,ad_rate))
sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
trace_5_3_S = pm.sample(1000, tune=1000,cores=4)
It was mentioned that I need to use a theano decorator as_op. If I set the function to this,
from theano.compile.ops import as_op
import theano.tensor as tt
@as_op(itypes=[tt.dvector,tt.lscalar], otypes=[tt.dvector])
def adstock(x,rate=0.09):
return (recursive_filter(x,rate))
I get another error. Can anyone give me a hint on how to set this up properly?
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-85-d00211698585> in <module>()
5 ad_rate = pm.Uniform('ad_rate', lower=0, upper=1)
6
----> 7 mu = a + bA * normalize(adstock(pd_sales.advert.values,ad_rate))
8 sales_hat = pm.Normal('sales', mu=mu, sd=sigma, observed=pd_sales.sales)
9
~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in __call__(self, *inputs, **kwargs)
613 """
614 return_list = kwargs.pop('return_list', False)
--> 615 node = self.make_node(*inputs, **kwargs)
616
617 if config.compute_test_value != 'off':
~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in make_node(self, *inputs)
981 raise ValueError("We expected %d inputs but got %d." %
982 (len(self.itypes), len(inputs)))
--> 983 if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
984 raise TypeError(
985 "We expected inputs of types '%s' but got types '%s' " %
~/anaconda3/lib/python3.6/site-packages/theano/gof/op.py in <genexpr>(.0)
981 raise ValueError("We expected %d inputs but got %d." %
982 (len(self.itypes), len(inputs)))
--> 983 if not all(inp.type == it for inp, it in zip(inputs, self.itypes)):
984 raise TypeError(
985 "We expected inputs of types '%s' but got types '%s' " %
AttributeError: 'numpy.ndarray' object has no attribute 'type'