Hello everyone,
I’m currently working on fitting a random volatility model using PyMC. Here’s my code:
import multiprocessing
import numpy as np
import pandas as pd
import pymc as pm
from pymc.pytensorf import collect_default_updates
import xarray as xr
import arviz as az
import pytensor
import pytensor.tensor as pt
def main():
BURNIN = 5000
N = 15000
dt = 1/240
stock_price_data = pd.read_csv("convvalue_data//2023-05-12&110043.SH.csv")
Y = np.diff(np.log(stock_price_data["convvalue"]))
T = len(Y)
def heston_dist(V0, cov_matrix, kappa, theta, mu, sigma_err, size):
def step(*args):
V_tml, cov_matrix, kappa, theta, mu, sigma_err = args
epsilons = pm.MvNormal.dist(mu=0, cov=cov_matrix)
V = epsilons[1] * pt.sqrt(V_tml * dt) + (1 - kappa * dt) * V_tml + kappa * theta * dt
mu_Y = epsilons[0] * pt.sqrt(V_tml * dt) - 0.5 * V_tml * dt + mu * dt
y = pm.Normal.dist(mu=mu_Y, sigma=sigma_err)
return (V, y), collect_default_updates(inputs=args, outputs=[y])
(V, y), updates = pytensor.scan(fn=step,
outputs_info=[V0, None],
non_sequences=[cov_matrix, kappa, theta, mu, sigma_err],
strict=True,
n_steps=T)
return y
def calc_cov_matrix(phi, omega):
cov_matrix = pm.math.stack([
[1., phi],
[phi, phi**2 + omega]
])
return cov_matrix
with pm.Model() as mod:
mu = pm.Normal('mu', 0, sigma=1)
kappa = pm.Normal('kappa', 0, sigma=1)
theta = pm.Normal('theta', 0, sigma=1)
omega = pm.InverseGamma('omega', alpha=2, beta=0.005)
std_dev = pm.Deterministic('std_dev', omega / 2)
phi = pm.Normal('phi', 0, sigma=std_dev)
V0 = pm.TruncatedNormal('V0', mu=0.0225, sigma=0.005, lower=0)
cov_matrix = pm.Deterministic('cov_matrix', calc_cov_matrix(phi, omega))
y_obs = pm.ConstantData('y_obs', Y)
sigma_err = pm.HalfNormal('sigma', 0.1) # variance
y = pm.CustomDist('obs',
V0, cov_matrix, kappa, theta, mu, sigma_err,
dist=heston_dist,
observed=y_obs)
idata = pm.sample()
if __name__ == '__main__':
multiprocessing.freeze_support()
main()
I have a few questions:
-
My observed data is related to
Y
. Do I need to convertY
intopm.ConstantData
orpm.MutableData
? What’s the difference between the two? -
An issue has been bothering me for quite a while. In the
scan
function,Y
is obtained throughT
sets of random numbers following a bivariate normal distributionepsilons
. As a result, after each iteration, I should haveT
values ofY
, forming a time series of lengthT
. Meanwhile, my observed data forY
also has a length ofT
. My goal is to fit the generatedY
using the observed data in order to estimate the parameters. I’m wondering if thepm.CustomDist
function I’ve written can address this problem effectively? -
When I run the code, I encounter the following error. But I don’t think I have missed any input.
pytensor.graph.utils.MissingInputError: OpFromGraph is missing an input: *1-<RandomGeneratorType>