I got it to run but the results don’t make sense. I don’t think this is the right way to solve the problem. The additional normal random required to pass in the observed returns is undesirable. There’s got to be a way to write a multivariate class to support this
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
from pymc3.distributions.timeseries import EulerMaruyama
returns = np.genfromtxt(pm.get_data("SP500.csv"))
def lin_sde(Vt, sigma_v, rho, beta_v, alpha_v, error_s):
return alpha_v + beta_v * Vt + sigma_v * np.sqrt(Vt) * rho * error_s, sigma_v * np.sqrt(Vt) * np.sqrt(1-rho*rho)
with pm.Model() as sp500_model:
alpha_v=pm.Normal('alpha',0,sd=100)
beta_v=pm.Normal('beta',0,sd=10)
sigma_v=pm.InverseGamma('sigmaV',2.5,0.1)
rho=pm.Uniform('rho',-.9,.9)
error_s = pm.Normal('error_s',mu=0.0,sd=1)
Vt = EulerMaruyama('Vt', 1.0, lin_sde, [sigma_v, rho, alpha_v, beta_v, error_s], shape=len(returns),testval=np.repeat(.01,len(returns)))
expected = pm.Deterministic('expected', np.sqrt(Vt) * error_s)
Yt = pm.Normal('Yt', mu=expected, sd=0.01, observed=returns)
with sp500_model:
trace = pm.sample(2000,chains=1,cores=1,init='adapt_diag')
pm.traceplot(trace, [alpha_v,beta_v,sigma_v,rho]);
fig, ax = plt.subplots(figsize=(14, 8))
ax.plot(returns)
ax.plot(trace['Vt',::5].T, 'r', alpha=.03);
ax.set(xlabel='time', ylabel='returns')
ax.legend(['S&P500', 'stoch vol']);