ModelBuilder Guidance

Dekermanjian · November 29, 2023, 1:58am

I am trying to work with pymc-experimental ModelBuilder for deployments. The example on the pymc website is very simple and I am trying to get it to work with a more complex model. The model I am trying to work into ModelBuilder is from the AR structural time series example on the pymc website. Here is my implementation:

class ARModel(ModelBuilder):
    # Give the model a name
    _model_type = "ARModel"

    # And a version
    version = "0.1"

    def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
        """
        build_model creates the PyMC model

        Parameters:
        model_config: dictionary
            it is a dictionary with all the parameters that we need in our model example:  a_loc, a_scale, b_loc
        X : pd.DataFrame
            The input data that is going to be used in the model. This should be a DataFrame
            containing the features (predictors) for the model. For efficiency reasons, it should
            only contain the necessary data columns, not the entire available dataset, as this
            will be encoded into the data used to recreate the model.

        y : pd.Series
            The target data for the model. This should be a Series representing the output
            or dependent variable for the model.

        kwargs : dict
            Additional keyword arguments that may be used for model configuration.
        """
        # Check the type of X and y and adjust access accordingly
        y_values = y.values if isinstance(y, pd.Series) else y
        self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)

        with pm.Model() as AR:
            ## Add the time interval as a mutable coordinate to the model to allow for future predictions
            AR.add_coord("obs_id", self.t_data, mutable=True)
            ## The fourier features must be mutable to allow for addition fourier features to be
            ## passed in the prediction step.
            AR.add_coord("fourier_features", np.arange(len(self.X)), mutable=True)
            ## Data containers to enable prediction
            t = pm.MutableData("t", self.t_data, dims="obs_id")
            y = pm.MutableData("y", self.y, dims="obs_id")
            # The first coefficient will be the intercept term
            coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
            sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
            # We need one init variable for each lag, hence size is variable too
            init = pm.Normal.dist(
                self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
            )
            # Steps of the AR model minus the lags required given specification
            ar1 = pm.AR(
                "ar",
                coefs,
                sigma=sigma,
                init_dist=init,
                constant=True,
                steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
                dims="obs_id",
            )
            ## Priors for the linear trend component
            alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
            beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
            trend = pm.Deterministic("trend", alpha + beta * t, dims="obs_id")
            ## Priors for seasonality
            beta_fourier = pm.Normal(
                "beta_fourier",
                mu=self.model_config["beta_fourier"]["mu"],
                sigma=self.model_config["beta_fourier"]["sigma"],
                dims="fourier_features",
            )
            fourier_terms = pm.MutableData("fourier_terms", self.X)
            seasonality = pm.Deterministic(
                "seasonality", pm.math.dot(beta_fourier, fourier_terms), dims="obs_id"
            )
            ## Combine components ar1 trend seasonality
            mu =  ar1 + trend + seasonality
            # The Likelihood
            outcome = pm.Normal("likelihood", mu=mu, sigma=sigma, observed=y, dims="obs_id")
            ## Sampling
            # idata_ar = pm.sample_prior_predictive()
            # idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
            # idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
            AR.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, self.prediction_steps, 1)})
            AR.add_coords({"obs_id_fut": range(self.y.shape[0], self.prediction_steps, 1)})
            t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], self.prediction_steps, 1)))
            ff_fut = pm.MutableData("ff_fut", self.fourier_features_new.to_numpy().T)
            # condition on the learned values of the AR process
            # initialise the future AR process precisely at the last observed value in the AR process
            # using the special feature of the dirac delta distribution to be 0 probability everywhere else.
            ar1_fut = pm.AR(
                "ar1_fut",
                init_dist=pm.DiracDelta.dist(ar1[..., -1]),
                rho=coefs,
                sigma=sigma,
                constant=True,
                dims="obs_id_fut_1",
            )
            # Compute future trend
            trend = pm.Deterministic("trend_fut", alpha + beta * t_fut, dims="obs_id_fut")
            # Compute future seasonality
            seasonality = pm.Deterministic(
                "seasonality_fut", pm.math.dot(beta_fourier, ff_fut), dims="obs_id_fut"
            )
            # Combine trend and seasonality with AR1
            mu = ar1_fut[1:] + trend + seasonality
            yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=sigma, dims="obs_id_fut")
            # use the updated values and predict outcomes and probabilities:
            # idata_preds = pm.sample_posterior_predictive(
            #     idata_ar, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
            # )

    def _data_setter(
        self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
    ):
        with self.model:
            pm.set_data({"x_data": X})
            if y is not None:
                pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})

    @staticmethod
    def get_default_model_config() -> Dict:
        """
        Returns a class default config dict for model builder if no model_config is provided on class initialization.
        The model config dict is generally used to specify the prior values we want to build the model with.
        It supports more complex data structures like lists, dictionaries, etc.
        It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
        """
        model_config: Dict = {
            "coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
            "alpha": {"mu": -4, "sigma": 0.1},
            "beta": {"mu": -0.1, "sigma": 0.2},
            "beta_fourier": {"mu": 0, "sigma": 2},
            "sigma": 8,
            "init": {"mu": -4, "sigma": 0.1, "size": 1},
        }

        return model_config

    @staticmethod
    def get_default_sampler_config() -> Dict:
        """
        Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
        The sampler config dict is used to send parameters to the sampler .
        It will be used during fitting in case the user doesn't provide any sampler_config of their own.
        """
        sampler_config: Dict = {
            "draws": 1_000,
            "tune": 1_000,
            "chains": 3,
            "target_accept": 0.95,
        }
        return sampler_config

    @property
    def output_var(self):
        return "yhat_fut"

    @property
    def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
        """
        _serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
        as some of the data structures are not json serializable, we need to convert them to json serializable objects.
        Some models will need them, others can just define them to return the model_config.
        """
        return self.model_config


    def _generate_and_preprocess_model_data(
        self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
    ) -> None:
        """
        Depending on the model, we might need to preprocess the data before fitting the model.
        all required preprocessing and conditional assignments should be defined here.
        """
        self.model_coords = None  # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
        # as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
        # and usually we would need to do some preprocessing, or generate the coords from the data.
        self.t_data = list(range(len(y)))
        self.X = X.T
        self.y = y
        self.prediction_steps = prediction_steps
        self.n = prediction_steps - y.shape[0]
        self.n_order = 10
        self.periods = (y.shape[0] + np.arange(self.n)) / 7

        self.fourier_features_new = pd.DataFrame(
            {
                f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
                for order in range(1, self.n_order + 1)
                for func in ("sin", "cos")
            }
        )

Then I try to fit the model with the following generated data:

def simulate_ar(intercept: float, coef1: float, coef2: float, noise: float=0.3, warmup: int=10, steps: int=200) -> NDArray:
    """
    Simulate Autoregressive Data
    ---
    Params:
        intercept: the intercept for the simulated data
        coef1: the AR1 coefficient
        coef2: the AR2 coefficient
        noise: the standard deviation for the guassian noise
        warmup: warmup stabilization
        steps: number of data points to simulate
    """
    # We sample some extra warmup steps, to let the AR process stabilize
    draws = np.zeros(warmup + steps)
    # Initialize first draws at intercept
    draws[:2] = intercept
    for step in range(2, warmup + steps):
        draws[step] = (
            intercept
            + coef1 * draws[step - 1]
            + coef2 * draws[step - 2]
            + np.random.normal(0, noise)
        )
    # Discard the warmup draws
    return draws[warmup:]

ar1_data = simulate_ar(intercept=10, coef1=-0.9, coef2=0)
trend = -0.3 + np.arange(200) * -0.2 + np.random.normal(0, 10, 200)
y_t = trend + ar1_data
t_data = list(range(200))
n_order = 10
periods = np.array(t_data) / 7

fourier_features = pd.DataFrame(
    {
        f"{func}_order_{order}": getattr(np, func)(2 * np.pi * periods * order)
        for order in range(1, n_order + 1)
        for func in ("sin", "cos")
    }
)

y_t_s = y_t + 20 * fourier_features["sin_order_1"]

m = ARModel()
idata = m.fit(X=fourier_features, y=y_t_s)

I am getting the following error:

RuntimeError: The model hasn't been built yet, call .build_model() first or call .fit() instead.

When I call build_model() directly the code does not error out but I am not sure how to get samples after that.

I apologize for such a long post. Any insight/guidance is appreciated.

Update:
Okay, I was able to resolve a couple of things first the model was erroneously named as AR it needs to be self.model instead. And the self.X specifications were conflicting with the function variables X. So I named fourier_terms to ff instead of X.

Here is the updated code:

class ARModel(ModelBuilder):
    # Give the model a name
    _model_type = "ARModel"

    # And a version
    version = "0.1"

    def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
        """
        build_model creates the PyMC model

        Parameters:
        model_config: dictionary
            it is a dictionary with all the parameters that we need in our model example:  a_loc, a_scale, b_loc
        X : pd.DataFrame
            The input data that is going to be used in the model. This should be a DataFrame
            containing the features (predictors) for the model. For efficiency reasons, it should
            only contain the necessary data columns, not the entire available dataset, as this
            will be encoded into the data used to recreate the model.

        y : pd.Series
            The target data for the model. This should be a Series representing the output
            or dependent variable for the model.

        kwargs : dict
            Additional keyword arguments that may be used for model configuration.
        """
        # Check the type of X and y and adjust access accordingly
        y_values = y.values if isinstance(y, pd.Series) else y
        self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)

        with pm.Model() as self.model:
            ## Add the time interval as a mutable coordinate to the model to allow for future predictions
            self.model.add_coord("obs_id", self.t_data, mutable=True)
            ## The fourier features must be mutable to allow for addition fourier features to be
            ## passed in the prediction step.
            self.model.add_coord("fourier_features", np.arange(len(self.ff)), mutable=True)
            ## Data containers to enable prediction
            t = pm.MutableData("t", self.t_data, dims="obs_id")
            y = pm.MutableData("y", self.y, dims="obs_id")
            # The first coefficient will be the intercept term
            coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
            sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
            # We need one init variable for each lag, hence size is variable too
            init = pm.Normal.dist(
                self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
            )
            # Steps of the AR model minus the lags required given specification
            ar1 = pm.AR(
                "ar",
                coefs,
                sigma=sigma,
                init_dist=init,
                constant=True,
                steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
                dims="obs_id",
            )
            ## Priors for the linear trend component
            alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
            beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
            trend = pm.Deterministic("trend", alpha + beta * t, dims="obs_id")
            ## Priors for seasonality
            beta_fourier = pm.Normal(
                "beta_fourier",
                mu=self.model_config["beta_fourier"]["mu"],
                sigma=self.model_config["beta_fourier"]["sigma"],
                dims="fourier_features",
            )
            fourier_terms = pm.MutableData("fourier_terms", self.ff)
            
            seasonality = pm.Deterministic(
                "seasonality", pm.math.dot(beta_fourier, fourier_terms), dims="obs_id"
            )
            ## Combine components ar1 trend seasonality
            mu =  ar1 + trend + seasonality
            # The Likelihood
            outcome = pm.Normal("likelihood", mu=mu, sigma=sigma, observed=y, dims="obs_id")
            ## Sampling
            # idata_ar = pm.sample_prior_predictive()
            # idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
            # idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
            self.model.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, self.prediction_steps, 1)})
            self.model.add_coords({"obs_id_fut": range(self.y.shape[0], self.prediction_steps, 1)})
            t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], self.prediction_steps, 1)))
            ff_fut = pm.MutableData("ff_fut", self.fourier_features_new.to_numpy().T)
            # condition on the learned values of the AR process
            # initialise the future AR process precisely at the last observed value in the AR process
            # using the special feature of the dirac delta distribution to be 0 probability everywhere else.
            ar1_fut = pm.AR(
                "ar1_fut",
                init_dist=pm.DiracDelta.dist(ar1[..., -1]),
                rho=coefs,
                sigma=sigma,
                constant=True,
                dims="obs_id_fut_1",
            )
            # Compute future trend
            trend = pm.Deterministic("trend_fut", alpha + beta * t_fut, dims="obs_id_fut")
            # Compute future seasonality
            seasonality = pm.Deterministic(
                "seasonality_fut", pm.math.dot(beta_fourier, ff_fut), dims="obs_id_fut"
            )
            # Combine trend and seasonality with AR1
            mu = ar1_fut[1:] + trend + seasonality
            yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=sigma, dims="obs_id_fut")
            # use the updated values and predict outcomes and probabilities:
            # idata_preds = pm.sample_posterior_predictive(
            #     idata_ar, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
            # )

    def _data_setter(
        self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
    ):
        with self.model:
            pm.set_data({"x_data": X})
            if y is not None:
                pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})

    @staticmethod
    def get_default_model_config() -> Dict:
        """
        Returns a class default config dict for model builder if no model_config is provided on class initialization.
        The model config dict is generally used to specify the prior values we want to build the model with.
        It supports more complex data structures like lists, dictionaries, etc.
        It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
        """
        model_config: Dict = {
            "coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
            "alpha": {"mu": -4, "sigma": 0.1},
            "beta": {"mu": -0.1, "sigma": 0.2},
            "beta_fourier": {"mu": 0, "sigma": 2},
            "sigma": 8,
            "init": {"mu": -4, "sigma": 0.1, "size": 1},
        }

        return model_config

    @staticmethod
    def get_default_sampler_config() -> Dict:
        """
        Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
        The sampler config dict is used to send parameters to the sampler .
        It will be used during fitting in case the user doesn't provide any sampler_config of their own.
        """
        sampler_config: Dict = {
            "draws": 1_000,
            "tune": 1_000,
            "chains": 3,
            "target_accept": 0.95,
        }
        return sampler_config

    @property
    def output_var(self):
        return "likelihood"

    @property
    def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
        """
        _serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
        as some of the data structures are not json serializable, we need to convert them to json serializable objects.
        Some models will need them, others can just define them to return the model_config.
        """
        return self.model_config


    def _generate_and_preprocess_model_data(
        self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
    ) -> None:
        """
        Depending on the model, we might need to preprocess the data before fitting the model.
        all required preprocessing and conditional assignments should be defined here.
        """
        self.model_coords = None  # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
        # as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
        # and usually we would need to do some preprocessing, or generate the coords from the data.
        self.t_data = list(range(len(y)))
        self.X = X
        self.ff = X.T
        self.y = y
        self.prediction_steps = prediction_steps
        self.n = prediction_steps - y.shape[0]
        self.n_order = 10
        self.periods = (y.shape[0] + np.arange(self.n)) / 7

        self.fourier_features_new = pd.DataFrame(
            {
                f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
                for order in range(1, self.n_order + 1)
                for func in ("sin", "cos")
            }
        )

I am able to get this to run through half of the model where the AR1 is fit and the likelihood is sampled. However, I am still not able to get the second part of the model where we use the fitted AR1 to forecast into the future. I am getting the following error:

SamplingError: Initial evaluation of model at starting point failed!

Again any help is appreciated.

final update:

I have figured out how to get this to work. I needed to pull out the future forecasts and override the predict_posterior() method. Here is the updated code. Also, I am assuming that data needs to be set in the data_setter method to make multiple calls to predict.:

class ARModel(ModelBuilder):
    # Give the model a name
    _model_type = "ARModel"

    # And a version
    version = "0.1"

    def build_model(self, X: pd.DataFrame, y: pd.Series, **kwargs):
        """
        build_model creates the PyMC model

        Parameters:
        model_config: dictionary
            it is a dictionary with all the parameters that we need in our model example:  a_loc, a_scale, b_loc
        X : pd.DataFrame
            The input data that is going to be used in the model. This should be a DataFrame
            containing the features (predictors) for the model. For efficiency reasons, it should
            only contain the necessary data columns, not the entire available dataset, as this
            will be encoded into the data used to recreate the model.

        y : pd.Series
            The target data for the model. This should be a Series representing the output
            or dependent variable for the model.

        kwargs : dict
            Additional keyword arguments that may be used for model configuration.
        """
        # Check the type of X and y and adjust access accordingly
        y_values = y.values if isinstance(y, pd.Series) else y
        self._generate_and_preprocess_model_data(y=y_values, X=X, prediction_steps=250)

        with pm.Model() as self.model:
            ## Add the time interval as a mutable coordinate to the model to allow for future predictions
            self.model.add_coord("obs_id", self.t_data, mutable=True)
            ## The fourier features must be mutable to allow for addition fourier features to be
            ## passed in the prediction step.
            self.model.add_coord("fourier_features", np.arange(len(self.ff)), mutable=True)
            ## Data containers to enable prediction
            t = pm.MutableData("t", self.t_data, dims="obs_id")
            y = pm.MutableData("y", self.y, dims="obs_id")
            # The first coefficient will be the intercept term
            self.coefs = pm.Normal("coefs", self.model_config["coefs"]["mu"], self.model_config["coefs"]["sigma"])
            self.sigma = pm.HalfNormal("sigma", self.model_config["sigma"])
            # We need one init variable for each lag, hence size is variable too
            init = pm.Normal.dist(
                self.model_config["init"]["mu"], self.model_config["init"]["sigma"], size=self.model_config["init"]["size"]
            )
            # Steps of the AR model minus the lags required given specification
            self.ar1 = pm.AR(
                "ar",
                self.coefs,
                sigma=self.sigma,
                init_dist=init,
                constant=True,
                steps=t.shape[0] - (self.model_config["coefs"]["size"] - 1),
                dims="obs_id",
            )
            ## Priors for the linear trend component
            self.alpha = pm.Normal("alpha", self.model_config["alpha"]["mu"], self.model_config["alpha"]["sigma"])
            self.beta = pm.Normal("beta", self.model_config["beta"]["mu"], self.model_config["beta"]["sigma"])
            trend = pm.Deterministic("trend", self.alpha + self.beta * t, dims="obs_id")
            ## Priors for seasonality
            self.beta_fourier = pm.Normal(
                "beta_fourier",
                mu=self.model_config["beta_fourier"]["mu"],
                sigma=self.model_config["beta_fourier"]["sigma"],
                dims="fourier_features",
            )
            fourier_terms = pm.MutableData("fourier_terms", self.ff)
            
            seasonality = pm.Deterministic(
                "seasonality", pm.math.dot(self.beta_fourier, fourier_terms), dims="obs_id"
            )
            ## Combine components ar1 trend seasonality
            mu =  self.ar1 + trend + seasonality
            # The Likelihood
            outcome = pm.Normal("likelihood", mu=mu, sigma=self.sigma, observed=y, dims="obs_id")
            ## Sampling
            # idata_ar = pm.sample_prior_predictive()
            # idata_ar.extend(pm.sample(samples, random_seed=100, target_accept=0.95, nuts_sampler="numpyro"))
            # idata_ar.extend(pm.sample_posterior_predictive(idata_ar))
    
    def predict_posterior(self, fourier_features_new: pd.DataFrame, prediction_steps: int = 200):
        with self.model:
            self.model.add_coords({"obs_id_fut_1": range(self.y.shape[0] - 1, prediction_steps, 1)})
            self.model.add_coords({"obs_id_fut": range(self.y.shape[0], prediction_steps, 1)})
            t_fut = pm.MutableData("t_fut", list(range(self.y.shape[0], prediction_steps, 1)))
            ff_fut = pm.MutableData("ff_fut", fourier_features_new.to_numpy().T)
            # condition on the learned values of the AR process
            # initialise the future AR process precisely at the last observed value in the AR process
            # using the special feature of the dirac delta distribution to be 0 probability everywhere else.
            ar1_fut = pm.AR(
                "ar1_fut",
                init_dist=pm.DiracDelta.dist(self.ar1[..., -1]),
                rho=self.coefs,
                sigma=self.sigma,
                constant=True,
                dims="obs_id_fut_1",
            )
            # Compute future trend
            trend = pm.Deterministic("trend_fut", self.alpha + self.beta * t_fut, dims="obs_id_fut")
            # Compute future seasonality
            seasonality = pm.Deterministic(
                "seasonality_fut", pm.math.dot(self.beta_fourier, ff_fut), dims="obs_id_fut"
            )
            # Combine trend and seasonality with AR1
            mu = ar1_fut[1:] + trend + seasonality
            yhat_fut = pm.Normal("yhat_fut", mu=mu, sigma=self.sigma, dims="obs_id_fut")
            # use the updated values and predict outcomes and probabilities:
            idata_preds = pm.sample_posterior_predictive(
                self.idata, var_names=["likelihood", "yhat_fut"], predictions=True, random_seed=100
            )
            return idata_preds

    def _data_setter(
        self, X: Union[pd.DataFrame, np.ndarray], y: Union[pd.Series, np.ndarray] = None
    ):
        with self.model:
            pm.set_data({"x_data": X})
            if y is not None:
                pm.set_data({"y_data": y.values if isinstance(y, pd.Series) else y})

    @staticmethod
    def get_default_model_config() -> Dict:
        """
        Returns a class default config dict for model builder if no model_config is provided on class initialization.
        The model config dict is generally used to specify the prior values we want to build the model with.
        It supports more complex data structures like lists, dictionaries, etc.
        It will be passed to the class instance on initialization, in case the user doesn't provide any model_config of their own.
        """
        model_config: Dict = {
            "coefs": {"mu": [0.2, 0.2], "sigma": [0.5, 0.03], "size": 2},
            "alpha": {"mu": -4, "sigma": 0.1},
            "beta": {"mu": -0.1, "sigma": 0.2},
            "beta_fourier": {"mu": 0, "sigma": 2},
            "sigma": 8,
            "init": {"mu": -4, "sigma": 0.1, "size": 1},
        }

        return model_config

    @staticmethod
    def get_default_sampler_config() -> Dict:
        """
        Returns a class default sampler dict for model builder if no sampler_config is provided on class initialization.
        The sampler config dict is used to send parameters to the sampler .
        It will be used during fitting in case the user doesn't provide any sampler_config of their own.
        """
        sampler_config: Dict = {
            "draws": 1_000,
            "tune": 1_000,
            "chains": 3,
            "target_accept": 0.95,
        }
        return sampler_config

    @property
    def output_var(self):
        return "likelihood", "yhat_fut"

    @property
    def _serializable_model_config(self) -> Dict[str, Union[int, float, Dict]]:
        """
        _serializable_model_config is a property that returns a dictionary with all the model parameters that we want to save.
        as some of the data structures are not json serializable, we need to convert them to json serializable objects.
        Some models will need them, others can just define them to return the model_config.
        """
        return self.model_config


    def _generate_and_preprocess_model_data(
        self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], prediction_steps: int = 250
    ) -> None:
        """
        Depending on the model, we might need to preprocess the data before fitting the model.
        all required preprocessing and conditional assignments should be defined here.
        """
        self.model_coords = None  # in our case we're not using coords, but if we were, we would define them here, or later on in the function, if extracting them from the data.
        # as we don't do any data preprocessing, we just assign the data given by the user. Note that it's a very basic model,
        # and usually we would need to do some preprocessing, or generate the coords from the data.
        self.t_data = list(range(len(y)))
        self.X = X
        self.ff = X.T
        self.y = y
        self.prediction_steps = prediction_steps
        self.n = prediction_steps - y.shape[0]
        self.n_order = 10
        self.periods = (y.shape[0] + np.arange(self.n)) / 7

        self.fourier_features_new = pd.DataFrame(
            {
                f"{func}_order_{order}": getattr(np, func)(2 * np.pi * self.periods * order)
                for order in range(1, self.n_order + 1)
                for func in ("sin", "cos")
            }
        )

Topic		Replies	Views
Using Pymc3 to do forecasting and numerical integration Questions	11	2718	May 5, 2020
How to predict new values on hold-out data Questions	24	13447	July 22, 2020
Feedback on new ModelBuilder class for deploying PyMC models Development modeling	1	561	August 16, 2022
Regression model sampling solely one sample every 5th second version agnostic modeling	2	517	August 29, 2022
ModelBuilder not work	3	316	March 16, 2024

ModelBuilder Guidance

Related topics