I am trying to work with pymc-experimental ModelBuilder for deployments. The example on the pymc website is very simple and I am trying to get it to work with a more complex model. The model I am trying to work into ModelBuilder is from the AR structural time series example on the pymc website. Here is my implementation:
class ARModel(ModelBuilder):
# Give the model a name
_model_type = "ARModel"
# And a version
version = "0.1"
def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
"""
build_model creates the PyMC model
Parameters:
model_config: dictionary
it is a dictionary with all the parameters that we need in our model example: a_loc, a_scale, b_loc
X : pd.DataFrame
The input data that is going to be used in the model. This should be a DataFrame
containing the features (predictors) for the model. For efficiency reasons, it should
only contain the necessary data columns, not the entire available dataset, as this
will be encoded into the data used to recreate the model.
y : pd.Series
The target data for the model. This should be a Series representing the output
or dependent variable for the model.
kwargs : dict
Additional keyword arguments that may be used for model configuration.
"""
# Check the type of X and y and adjust access accordingly
y_values = y.values if isinstance(y, pd.Series) else y
self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)
with pm.Model() as AR:
## Add the time interval as a mutable coordinate to the model to allow for future predictions
AR.add_coord("obs_id", self.t_data, mutable=True)
## The fourier features must be mutable to allow for addition fourier features to be
## passed in the prediction step.
AR.add_coord("fourier_features", np.arange(len(self.X)), mutable=True)
## Data containers to enable prediction
t = pm.MutableData("t", self.t_data, dims="obs_id")
y = pm.MutableData("y", self.y, dims="obs_id")
# The first coefficient will be the intercept term
coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
# We need one init variable for each lag, hence size is variable too
init = pm.Normal.dist(
self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
)
# Steps of the AR model minus the lags required given specification
ar1 = pm.AR(
"ar",
coefs,
sigma=sigma,
init_dist=init,
constant=True,
steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
dims="obs_id",
)
## Priors for the linear trend component
alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
trend = pm.Deterministic("trend", alpha + beta * t, dims="obs_id")
## Priors for seasonality
beta_fourier = pm.Normal(
"beta_fourier",
mu=self.model_config["beta_fourier"]["mu"],
sigma=self.model_config["beta_fourier"]["sigma"],
dims="fourier_features",
)
fourier_terms = pm.MutableData("fourier_terms", self.X)
seasonality = pm.Deterministic(
"seasonality", pm.math.dot(beta_fourier, fourier_terms), dims="obs_id"
)
## Combine components ar1 trend seasonality
mu = ar1 + trend + seasonality
# The Likelihood
outcome = pm.Normal("likelihood", mu=mu, sigma=sigma, observed=y, dims="obs_id")
## Sampling
# idata_ar = pm.sample_prior_predictive()
# idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
# idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
AR.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, self.prediction_steps, 1)})
AR.add_coords({"obs_id_fut": range(self.y.shape[0], self.prediction_steps, 1)})
t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], self.prediction_steps, 1)))
ff_fut = pm.MutableData("ff_fut", self.fourier_features_new.to_numpy().T)
# condition on the learned values of the AR process
# initialise the future AR process precisely at the last observed value in the AR process
# using the special feature of the dirac delta distribution to be 0 probability everywhere else.
ar1_fut = pm.AR(
"ar1_fut",
init_dist=pm.DiracDelta.dist(ar1[..., -1]),
rho=coefs,
sigma=sigma,
constant=True,
dims="obs_id_fut_1",
)
# Compute future trend
trend = pm.Deterministic("trend_fut", alpha + beta * t_fut, dims="obs_id_fut")
# Compute future seasonality
seasonality = pm.Deterministic(
"seasonality_fut", pm.math.dot(beta_fourier, ff_fut), dims="obs_id_fut"
)
# Combine trend and seasonality with AR1
mu = ar1_fut[1:] + trend + seasonality
yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=sigma, dims="obs_id_fut")
# use the updated values and predict outcomes and probabilities:
# idata_preds = pm.sample_posterior_predictive(
# idata_ar, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
# )
def _data_setter(
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
):
with self.model:
pm.set_data({"x_data": X})
if y is not None:
pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})
@staticmethod
def get_default_model_config() -> Dict:
"""
Returns a class default config dict for model builder if no model_config is provided on class initialization.
The model config dict is generally used to specify the prior values we want to build the model with.
It supports more complex data structures like lists, dictionaries, etc.
It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
"""
model_config: Dict = {
"coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
"alpha": {"mu": -4, "sigma": 0.1},
"beta": {"mu": -0.1, "sigma": 0.2},
"beta_fourier": {"mu": 0, "sigma": 2},
"sigma": 8,
"init": {"mu": -4, "sigma": 0.1, "size": 1},
}
return model_config
@staticmethod
def get_default_sampler_config() -> Dict:
"""
Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
The sampler config dict is used to send parameters to the sampler .
It will be used during fitting in case the user doesn't provide any sampler_config of their own.
"""
sampler_config: Dict = {
"draws": 1_000,
"tune": 1_000,
"chains": 3,
"target_accept": 0.95,
}
return sampler_config
@property
def output_var(self):
return "yhat_fut"
@property
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
"""
_serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
as some of the data structures are not json serializable, we need to convert them to json serializable objects.
Some models will need them, others can just define them to return the model_config.
"""
return self.model_config
def _generate_and_preprocess_model_data(
self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
) -> None:
"""
Depending on the model, we might need to preprocess the data before fitting the model.
all required preprocessing and conditional assignments should be defined here.
"""
self.model_coords = None # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
# as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
# and usually we would need to do some preprocessing, or generate the coords from the data.
self.t_data = list(range(len(y)))
self.X = X.T
self.y = y
self.prediction_steps = prediction_steps
self.n = prediction_steps - y.shape[0]
self.n_order = 10
self.periods = (y.shape[0] + np.arange(self.n)) / 7
self.fourier_features_new = pd.DataFrame(
{
f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
for order in range(1, self.n_order + 1)
for func in ("sin", "cos")
}
)
Then I try to fit the model with the following generated data:
def simulate_ar(intercept: float, coef1: float, coef2: float, noise: float=0.3, warmup: int=10, steps: int=200) -> NDArray:
"""
Simulate Autoregressive Data
---
Params:
intercept: the intercept for the simulated data
coef1: the AR1 coefficient
coef2: the AR2 coefficient
noise: the standard deviation for the guassian noise
warmup: warmup stabilization
steps: number of data points to simulate
"""
# We sample some extra warmup steps, to let the AR process stabilize
draws = np.zeros(warmup + steps)
# Initialize first draws at intercept
draws[:2] = intercept
for step in range(2, warmup + steps):
draws[step] = (
intercept
+ coef1 * draws[step - 1]
+ coef2 * draws[step - 2]
+ np.random.normal(0, noise)
)
# Discard the warmup draws
return draws[warmup:]
ar1_data = simulate_ar(intercept=10, coef1=-0.9, coef2=0)
trend = -0.3 + np.arange(200) * -0.2 + np.random.normal(0, 10, 200)
y_t = trend + ar1_data
t_data = list(range(200))
n_order = 10
periods = np.array(t_data) / 7
fourier_features = pd.DataFrame(
{
f"{func}_order_{order}": getattr(np, func)(2 * np.pi * periods * order)
for order in range(1, n_order + 1)
for func in ("sin", "cos")
}
)
y_t_s = y_t + 20 * fourier_features["sin_order_1"]
m = ARModel()
idata = m.fit(X=fourier_features, y=y_t_s)
I am getting the following error:
RuntimeError: The model hasn't been built yet, call .build_model() first or call .fit() instead.
When I call build_model() directly the code does not error out but I am not sure how to get samples after that.
I apologize for such a long post. Any insight/guidance is appreciated.
Update:
Okay, I was able to resolve a couple of things first the model was erroneously named as AR it needs to be self.model instead. And the self.X specifications were conflicting with the function variables X. So I named fourier_terms to ff instead of X.
Here is the updated code:
class ARModel(ModelBuilder):
# Give the model a name
_model_type = "ARModel"
# And a version
version = "0.1"
def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
"""
build_model creates the PyMC model
Parameters:
model_config: dictionary
it is a dictionary with all the parameters that we need in our model example: a_loc, a_scale, b_loc
X : pd.DataFrame
The input data that is going to be used in the model. This should be a DataFrame
containing the features (predictors) for the model. For efficiency reasons, it should
only contain the necessary data columns, not the entire available dataset, as this
will be encoded into the data used to recreate the model.
y : pd.Series
The target data for the model. This should be a Series representing the output
or dependent variable for the model.
kwargs : dict
Additional keyword arguments that may be used for model configuration.
"""
# Check the type of X and y and adjust access accordingly
y_values = y.values if isinstance(y, pd.Series) else y
self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)
with pm.Model() as self.model:
## Add the time interval as a mutable coordinate to the model to allow for future predictions
self.model.add_coord("obs_id", self.t_data, mutable=True)
## The fourier features must be mutable to allow for addition fourier features to be
## passed in the prediction step.
self.model.add_coord("fourier_features", np.arange(len(self.ff)), mutable=True)
## Data containers to enable prediction
t = pm.MutableData("t", self.t_data, dims="obs_id")
y = pm.MutableData("y", self.y, dims="obs_id")
# The first coefficient will be the intercept term
coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
# We need one init variable for each lag, hence size is variable too
init = pm.Normal.dist(
self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
)
# Steps of the AR model minus the lags required given specification
ar1 = pm.AR(
"ar",
coefs,
sigma=sigma,
init_dist=init,
constant=True,
steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
dims="obs_id",
)
## Priors for the linear trend component
alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
trend = pm.Deterministic("trend", alpha + beta * t, dims="obs_id")
## Priors for seasonality
beta_fourier = pm.Normal(
"beta_fourier",
mu=self.model_config["beta_fourier"]["mu"],
sigma=self.model_config["beta_fourier"]["sigma"],
dims="fourier_features",
)
fourier_terms = pm.MutableData("fourier_terms", self.ff)
seasonality = pm.Deterministic(
"seasonality", pm.math.dot(beta_fourier, fourier_terms), dims="obs_id"
)
## Combine components ar1 trend seasonality
mu = ar1 + trend + seasonality
# The Likelihood
outcome = pm.Normal("likelihood", mu=mu, sigma=sigma, observed=y, dims="obs_id")
## Sampling
# idata_ar = pm.sample_prior_predictive()
# idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
# idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
self.model.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, self.prediction_steps, 1)})
self.model.add_coords({"obs_id_fut": range(self.y.shape[0], self.prediction_steps, 1)})
t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], self.prediction_steps, 1)))
ff_fut = pm.MutableData("ff_fut", self.fourier_features_new.to_numpy().T)
# condition on the learned values of the AR process
# initialise the future AR process precisely at the last observed value in the AR process
# using the special feature of the dirac delta distribution to be 0 probability everywhere else.
ar1_fut = pm.AR(
"ar1_fut",
init_dist=pm.DiracDelta.dist(ar1[..., -1]),
rho=coefs,
sigma=sigma,
constant=True,
dims="obs_id_fut_1",
)
# Compute future trend
trend = pm.Deterministic("trend_fut", alpha + beta * t_fut, dims="obs_id_fut")
# Compute future seasonality
seasonality = pm.Deterministic(
"seasonality_fut", pm.math.dot(beta_fourier, ff_fut), dims="obs_id_fut"
)
# Combine trend and seasonality with AR1
mu = ar1_fut[1:] + trend + seasonality
yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=sigma, dims="obs_id_fut")
# use the updated values and predict outcomes and probabilities:
# idata_preds = pm.sample_posterior_predictive(
# idata_ar, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
# )
def _data_setter(
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
):
with self.model:
pm.set_data({"x_data": X})
if y is not None:
pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})
@staticmethod
def get_default_model_config() -> Dict:
"""
Returns a class default config dict for model builder if no model_config is provided on class initialization.
The model config dict is generally used to specify the prior values we want to build the model with.
It supports more complex data structures like lists, dictionaries, etc.
It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
"""
model_config: Dict = {
"coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
"alpha": {"mu": -4, "sigma": 0.1},
"beta": {"mu": -0.1, "sigma": 0.2},
"beta_fourier": {"mu": 0, "sigma": 2},
"sigma": 8,
"init": {"mu": -4, "sigma": 0.1, "size": 1},
}
return model_config
@staticmethod
def get_default_sampler_config() -> Dict:
"""
Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
The sampler config dict is used to send parameters to the sampler .
It will be used during fitting in case the user doesn't provide any sampler_config of their own.
"""
sampler_config: Dict = {
"draws": 1_000,
"tune": 1_000,
"chains": 3,
"target_accept": 0.95,
}
return sampler_config
@property
def output_var(self):
return "likelihood"
@property
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
"""
_serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
as some of the data structures are not json serializable, we need to convert them to json serializable objects.
Some models will need them, others can just define them to return the model_config.
"""
return self.model_config
def _generate_and_preprocess_model_data(
self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
) -> None:
"""
Depending on the model, we might need to preprocess the data before fitting the model.
all required preprocessing and conditional assignments should be defined here.
"""
self.model_coords = None # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
# as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
# and usually we would need to do some preprocessing, or generate the coords from the data.
self.t_data = list(range(len(y)))
self.X = X
self.ff = X.T
self.y = y
self.prediction_steps = prediction_steps
self.n = prediction_steps - y.shape[0]
self.n_order = 10
self.periods = (y.shape[0] + np.arange(self.n)) / 7
self.fourier_features_new = pd.DataFrame(
{
f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
for order in range(1, self.n_order + 1)
for func in ("sin", "cos")
}
)
I am able to get this to run through half of the model where the AR1 is fit and the likelihood is sampled. However, I am still not able to get the second part of the model where we use the fitted AR1 to forecast into the future. I am getting the following error:
SamplingError: Initial evaluation of model at starting point failed!
Again any help is appreciated.
final update:
I have figured out how to get this to work. I needed to pull out the future forecasts and override the predict_posterior() method. Here is the updated code. Also, I am assuming that data needs to be set in the data_setter method to make multiple calls to predict.:
class ARModel(ModelBuilder):
# Give the model a name
_model_type = "ARModel"
# And a version
version = "0.1"
def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
"""
build_model creates the PyMC model
Parameters:
model_config: dictionary
it is a dictionary with all the parameters that we need in our model example: a_loc, a_scale, b_loc
X : pd.DataFrame
The input data that is going to be used in the model. This should be a DataFrame
containing the features (predictors) for the model. For efficiency reasons, it should
only contain the necessary data columns, not the entire available dataset, as this
will be encoded into the data used to recreate the model.
y : pd.Series
The target data for the model. This should be a Series representing the output
or dependent variable for the model.
kwargs : dict
Additional keyword arguments that may be used for model configuration.
"""
# Check the type of X and y and adjust access accordingly
y_values = y.values if isinstance(y, pd.Series) else y
self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)
with pm.Model() as self.model:
## Add the time interval as a mutable coordinate to the model to allow for future predictions
self.model.add_coord("obs_id", self.t_data, mutable=True)
## The fourier features must be mutable to allow for addition fourier features to be
## passed in the prediction step.
self.model.add_coord("fourier_features", np.arange(len(self.ff)), mutable=True)
## Data containers to enable prediction
t = pm.MutableData("t", self.t_data, dims="obs_id")
y = pm.MutableData("y", self.y, dims="obs_id")
# The first coefficient will be the intercept term
self.coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
self.sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
# We need one init variable for each lag, hence size is variable too
init = pm.Normal.dist(
self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
)
# Steps of the AR model minus the lags required given specification
self.ar1 = pm.AR(
"ar",
self.coefs,
sigma=self.sigma,
init_dist=init,
constant=True,
steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
dims="obs_id",
)
## Priors for the linear trend component
self.alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
self.beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
trend = pm.Deterministic("trend", self.alpha + self.beta * t, dims="obs_id")
## Priors for seasonality
self.beta_fourier = pm.Normal(
"beta_fourier",
mu=self.model_config["beta_fourier"]["mu"],
sigma=self.model_config["beta_fourier"]["sigma"],
dims="fourier_features",
)
fourier_terms = pm.MutableData("fourier_terms", self.ff)
seasonality = pm.Deterministic(
"seasonality", pm.math.dot(self.beta_fourier, fourier_terms), dims="obs_id"
)
## Combine components ar1 trend seasonality
mu = self.ar1 + trend + seasonality
# The Likelihood
outcome = pm.Normal("likelihood", mu=mu, sigma=self.sigma, observed=y, dims="obs_id")
## Sampling
# idata_ar = pm.sample_prior_predictive()
# idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
# idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
def predict_posterior(self, fourier_features_new: pd.DataFrame, prediction_steps: int = 200):
with self.model:
self.model.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, prediction_steps, 1)})
self.model.add_coords({"obs_id_fut": range(self.y.shape[0], prediction_steps, 1)})
t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], prediction_steps, 1)))
ff_fut = pm.MutableData("ff_fut", fourier_features_new.to_numpy().T)
# condition on the learned values of the AR process
# initialise the future AR process precisely at the last observed value in the AR process
# using the special feature of the dirac delta distribution to be 0 probability everywhere else.
ar1_fut = pm.AR(
"ar1_fut",
init_dist=pm.DiracDelta.dist(self.ar1[..., -1]),
rho=self.coefs,
sigma=self.sigma,
constant=True,
dims="obs_id_fut_1",
)
# Compute future trend
trend = pm.Deterministic("trend_fut", self.alpha + self.beta * t_fut, dims="obs_id_fut")
# Compute future seasonality
seasonality = pm.Deterministic(
"seasonality_fut", pm.math.dot(self.beta_fourier, ff_fut), dims="obs_id_fut"
)
# Combine trend and seasonality with AR1
mu = ar1_fut[1:] + trend + seasonality
yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=self.sigma, dims="obs_id_fut")
# use the updated values and predict outcomes and probabilities:
idata_preds = pm.sample_posterior_predictive(
self.idata, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
)
return idata_preds
def _data_setter(
self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
):
with self.model:
pm.set_data({"x_data": X})
if y is not None:
pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})
@staticmethod
def get_default_model_config() -> Dict:
"""
Returns a class default config dict for model builder if no model_config is provided on class initialization.
The model config dict is generally used to specify the prior values we want to build the model with.
It supports more complex data structures like lists, dictionaries, etc.
It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
"""
model_config: Dict = {
"coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
"alpha": {"mu": -4, "sigma": 0.1},
"beta": {"mu": -0.1, "sigma": 0.2},
"beta_fourier": {"mu": 0, "sigma": 2},
"sigma": 8,
"init": {"mu": -4, "sigma": 0.1, "size": 1},
}
return model_config
@staticmethod
def get_default_sampler_config() -> Dict:
"""
Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
The sampler config dict is used to send parameters to the sampler .
It will be used during fitting in case the user doesn't provide any sampler_config of their own.
"""
sampler_config: Dict = {
"draws": 1_000,
"tune": 1_000,
"chains": 3,
"target_accept": 0.95,
}
return sampler_config
@property
def output_var(self):
return "likelihood", "yhat_fut"
@property
def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
"""
_serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
as some of the data structures are not json serializable, we need to convert them to json serializable objects.
Some models will need them, others can just define them to return the model_config.
"""
return self.model_config
def _generate_and_preprocess_model_data(
self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
) -> None:
"""
Depending on the model, we might need to preprocess the data before fitting the model.
all required preprocessing and conditional assignments should be defined here.
"""
self.model_coords = None # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
# as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
# and usually we would need to do some preprocessing, or generate the coords from the data.
self.t_data = list(range(len(y)))
self.X = X
self.ff = X.T
self.y = y
self.prediction_steps = prediction_steps
self.n = prediction_steps - y.shape[0]
self.n_order = 10
self.periods = (y.shape[0] + np.arange(self.n)) / 7
self.fourier_features_new = pd.DataFrame(
{
f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
for order in range(1, self.n_order + 1)
for func in ("sin", "cos")
}
)