Hello,
I’m trying to replicate the following from the Rolling Regression example ( Rolling Regression — PyMC example gallery):
# from pandas_datareader import data
# prices = data.GoogleDailyReader(symbols=['GLD', 'GFI'], end='2014-8-1').read().loc['Open', :, :]
try:
prices = pd.read_csv(os.path.join("..", "data", "stock_prices.csv")).dropna()
except FileNotFoundError:
prices = pd.read_csv(pm.get_data("stock_prices.csv")).dropna()
prices["Date"] = pd.DatetimeIndex(prices["Date"])
prices = prices.set_index("Date")
prices_zscored = (prices - prices.mean()) / prices.std()
prices.head()
with pm.Model(coords={"time": prices.index.values}) as model_randomwalk:
# std of random walk
sigma_alpha = pm.Exponential("sigma_alpha", 50.0)
sigma_beta = pm.Exponential("sigma_beta", 50.0)
alpha = pm.GaussianRandomWalk(
"alpha", sigma=sigma_alpha, init_dist=pm.Normal.dist(0, 10), dims="time"
)
beta = pm.GaussianRandomWalk(
"beta", sigma=sigma_beta, init_dist=pm.Normal.dist(0, 10), dims="time"
)
# Define regression
regression = alpha + beta * prices_zscored.GFI.values
# Assume prices are Normally distributed, the mean comes from the regression.
sd = pm.HalfNormal("sd", sigma=0.1)
likelihood = pm.Normal("y", mu=regression, sigma=sd, observed=prices_zscored.GLD.to_numpy())
trace_rw = pm.sample(tune=2000, target_accept=0.9)
as follows:
yobs = prices['GLD'].values
T = len(yobs)
data = prices.copy().drop(columns='GLD').assign(Constant = 1)
p = 2
with pm.Model(coords={"time": prices.index.values, 'xvars':data.columns.values}) as model_randomwalk:
X = pm.MutableData("X",data.values,dims=("time","xvars"))
# std of random walk
beta_sigma = pm.Exponential("beta_sigma", 50.0,dims='xvars')
beta = pm.MvGaussianRandomWalk("beta",
mu=np.zeros(p),
dims=("time","xvars"),
cov=pytensor.tensor.diag(beta_sigma),
init_dist=pm.MvNormal.dist(0, 10*np.eye(p)))
# Define regression
regression = pm.math.sum(X*beta,axis=1)
# Assume prices are Normally distributed, the mean comes from the regression.
sd = pm.HalfNormal("sd", sigma=0.1)
likelihood = pm.Normal("y", mu=regression, sigma=sd, observed=yobs)
trace = pm.sample(2000,tune=1000, target_accept=0.9)
My thinking is beta should be T x 2, with the first column equal to beta in the original example and the second column equal to alpha, and then I can multiply X and beta elementwise and then sum the two elements in each row to get the regression mean.
This models samples fine (though much more slowly) and also produces the same posterior predictive as the Rolling regression example, and yet the values for alpha and beta don’t look as “clean” as they do in the Rolling Regression example and also their scale is very off; e.g. the betas are between 3 and 9 and the alphas reach almost into the 100s.
Is there something obvious that I’m doing wrong? Thank you in advance for any suggestions.