Пример #1
0
def test_run_template_1():
    """Runs default template"""
    data = generate_df_for_tests(
        freq="H",
        periods=700 * 24)
    df = data["train_df"]
    forecast_horizon = data["test_df"].shape[0]

    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )

        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.037, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(0.836, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.004, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.800, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=None,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Пример #2
0
def test_run_forecast_json():
    """Tests:
     - no coverage
     - hourly data (2+ years)
     - default `hyperparameter_grid` (all interaction terms enabled)
    """
    # sets random state for consistent comparison
    data = generate_df_for_tests(freq="H", periods=700 * 24)
    df = data["train_df"]

    json_str = """{
        "model_template": "SILVERKITE",
        "forecast_horizon": 3359,
        "model_components_param": {
            "custom": {
                "fit_algorithm_dict": {
                    "fit_algorithm": "linear"
                }
            }
        }
    }"""

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        forecaster = Forecaster()
        result = forecaster.run_forecast_json(df=df, json_str=json_str)

        mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[mse] == pytest.approx(2.120,
                                                                     rel=0.03)
        assert result.backtest.test_evaluation[q80] == pytest.approx(0.863,
                                                                     rel=0.02)
        assert result.forecast.train_evaluation[mse] == pytest.approx(1.975,
                                                                      rel=0.02)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.786,
                                                                      rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=None,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Пример #3
0
def test_run_template_4():
    """Runs custom template with monthly data and auto-regression"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=["ct2"]),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.95, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Пример #4
0
def test_run_forecast_config():
    """Tests `run_forecast_config`"""
    data = generate_df_for_tests(freq="H", periods=14 * 24)
    df = data["df"]

    # Checks if exception is raised
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_config(
            df=df, config=ForecastConfig(model_template="unknown_template"))
    with pytest.raises(ValueError, match="is not recognized"):
        forecaster = Forecaster()
        forecaster.run_forecast_json(
            df=df, json_str="""{ "model_template": "unknown_template" }""")

    # All run_forecast_config* functions return the same result for the default config,
    # call forecast_pipeline, and return a result with the proper format.
    np.random.seed(123)
    forecaster = Forecaster()
    default_result = forecaster.run_forecast_config(df=df)
    score_func = EvaluationMetricEnum.MeanAbsolutePercentError.name
    check_forecast_pipeline_result(default_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_equal(forecaster.forecast_result, default_result)

    np.random.seed(123)
    forecaster = Forecaster()
    json_result = forecaster.run_forecast_json(df=df)
    check_forecast_pipeline_result(json_result,
                                   coverage=None,
                                   strategy=None,
                                   score_func=score_func,
                                   greater_is_better=False)
    assert_forecast_pipeline_result_equal(json_result,
                                          default_result,
                                          rel=0.02)
Пример #5
0
def test_run_template_2():
    """Runs custom template with all options"""
    data = generate_df_with_reg_for_tests(
        freq="D",
        periods=400,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    daily_event_df_dict = generate_holiday_events(
        countries=["UnitedStates"],
        holidays_to_model_separately=["New Year's Day"],
        year_start=2017,
        year_end=2022,
        pre_num=2,
        post_num=2)
    event_pred_cols = get_event_pred_cols(daily_event_df_dict)
    model_components = ModelComponentsParam(
        seasonality={
            "fs_components_df": pd.DataFrame({
                "name": ["tow", "tom", "toq", "toy"],
                "period": [7.0, 1.0, 1.0, 1.0],
                "order": [2, 1, 1, 5],
                "seas_names": ["weekly", "monthly", "quarterly", "yearly"]
            })
        },
        events={
            "daily_event_df_dict": daily_event_df_dict
        },
        changepoints={
            "changepoints_dict": {
                "method": "auto",
                "yearly_seasonality_order": 3,
                "regularization_strength": 0.5,
                "resample_freq": "14D",
                "potential_changepoint_distance": "56D",
                "no_changepoint_proportion_from_end": 0.2
            },
            "seasonality_changepoints_dict": {
                "potential_changepoint_distance": "60D",
                "regularization_strength": 0.5,
                "no_changepoint_proportion_from_end": 0.2
            },
        },
        autoregression=None,
        uncertainty={
            "uncertainty_dict": None,
        },
        custom={
            "origin_for_time_vars": None,
            "extra_pred_cols": [["ct1"] + reg_cols + event_pred_cols],  # growth, regressors, events
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {"cv": 2}
            },
            "min_admissible_value": min(df[VALUE_COL]) - abs(max(df[VALUE_COL])),
            "max_admissible_value": max(df[VALUE_COL]) * 2,
        }
    )
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(2.692, rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.531, rel=1e-2)
        assert result.backtest.test_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.823, rel=1e-2)
        assert result.forecast.train_evaluation[rmse] == pytest.approx(2.304, rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.921, rel=1e-2)
        assert result.forecast.train_evaluation[PREDICTION_BAND_COVERAGE] == pytest.approx(0.897, rel=1e-2)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
Пример #6
0
def test_run_template_5():
    """Runs custom template with monthly data, auto-regression and lagged regressors"""
    data = generate_df_with_reg_for_tests(
        freq="MS",
        periods=48,
        remove_extra_cols=True,
        mask_test_actuals=True)
    reg_cols_all = ["regressor1", "regressor2", "regressor_categ"]
    reg_cols = ["regressor1"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols_all
    df = data["df"][keep_cols]
    forecast_horizon = data["test_df"].shape[0]

    model_components = ModelComponentsParam(
        custom=dict(
            fit_algorithm_dict=dict(fit_algorithm="linear"),
            extra_pred_cols=reg_cols),
        autoregression=dict(autoreg_dict=dict(lag_dict=dict(orders=[1]))),
        lagged_regressors={
            "lagged_regressor_dict": [
                {"regressor2": "auto"},
                {"regressor_categ": {"lag_dict": {"orders": [5]}}}
            ]},
        uncertainty=dict(uncertainty_dict=None))
    config = ForecastConfig(
        model_template=ModelTemplateEnum.SK.name,
        forecast_horizon=forecast_horizon,
        coverage=0.9,
        model_components_param=model_components,
    )

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        result = Forecaster().run_forecast_config(
            df=df,
            config=config,
        )
        rmse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        assert result.backtest.test_evaluation[rmse] == pytest.approx(4.46, rel=1e-1)
        check_forecast_pipeline_result(
            result,
            coverage=0.9,
            strategy=None,
            score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
            greater_is_better=False)
        # Checks lagged regressor columns
        actual_pred_cols = set(result.model[-1].model_dict["pred_cols"])
        actual_x_mat_cols = set(result.model[-1].model_dict["x_mat"].columns)
        expected_pred_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5'
        }
        expected_x_mat_cols = {
            'regressor1',
            'y_lag1',
            'regressor_categ_lag5[T.c2]',
            'regressor_categ_lag5[T.c2]'
        }
        assert expected_pred_cols.issubset(actual_pred_cols)
        assert expected_x_mat_cols.issubset(actual_x_mat_cols)
Пример #7
0
def test_run_forecast_config_custom():
    """Tests `run_forecast_config` on weekly data with custom config:

     - numeric and categorical regressors
     - coverage
     - null model
    """
    data = generate_df_with_reg_for_tests(freq="W-MON",
                                          periods=140,
                                          remove_extra_cols=True,
                                          mask_test_actuals=True)
    reg_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + reg_cols
    df = data["df"][keep_cols]

    metric = EvaluationMetricEnum.MeanAbsoluteError
    evaluation_metric = EvaluationMetricParam(cv_selection_metric=metric.name,
                                              agg_periods=7,
                                              agg_func=np.max,
                                              null_model_params={
                                                  "strategy": "quantile",
                                                  "constant": None,
                                                  "quantile": 0.5
                                              })

    evaluation_period = EvaluationPeriodParam(test_horizon=10,
                                              periods_between_train_test=5,
                                              cv_horizon=4,
                                              cv_min_train_periods=80,
                                              cv_expanding_window=False,
                                              cv_periods_between_splits=20,
                                              cv_periods_between_train_test=3,
                                              cv_max_splits=3)

    model_components = ModelComponentsParam(
        regressors={"regressor_cols": reg_cols},
        custom={
            "fit_algorithm_dict": {
                "fit_algorithm": "ridge",
                "fit_algorithm_params": {
                    "cv": 2
                }
            }
        })
    computation = ComputationParam(verbose=2)
    forecast_horizon = 27
    coverage = 0.90

    forecast_config = ForecastConfig(
        model_template=ModelTemplateEnum.SILVERKITE.name,
        computation_param=computation,
        coverage=coverage,
        evaluation_metric_param=evaluation_metric,
        evaluation_period_param=evaluation_period,
        forecast_horizon=forecast_horizon,
        model_components_param=model_components)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(df=df, config=forecast_config)

        mse = EvaluationMetricEnum.RootMeanSquaredError.get_metric_name()
        q80 = EvaluationMetricEnum.Quantile80.get_metric_name()
        assert result.backtest.test_evaluation[mse] == pytest.approx(2.976,
                                                                     rel=1e-2)
        assert result.backtest.test_evaluation[q80] == pytest.approx(1.360,
                                                                     rel=1e-2)
        assert result.forecast.train_evaluation[mse] == pytest.approx(2.224,
                                                                      rel=1e-2)
        assert result.forecast.train_evaluation[q80] == pytest.approx(0.941,
                                                                      rel=1e-2)
        check_forecast_pipeline_result(result,
                                       coverage=coverage,
                                       strategy=None,
                                       score_func=metric.name,
                                       greater_is_better=False)

    with pytest.raises(KeyError, match="missing_regressor"):
        model_components = ModelComponentsParam(
            regressors={"regressor_cols": ["missing_regressor"]})
        forecaster = Forecaster()
        result = forecaster.run_forecast_config(
            df=df,
            config=ForecastConfig(
                model_template=ModelTemplateEnum.SILVERKITE.name,
                model_components_param=model_components))
        check_forecast_pipeline_result(result,
                                       coverage=None,
                                       strategy=None,
                                       score_func=metric.get_metric_func(),
                                       greater_is_better=False)