Exemplo n.º 1
0
def test_get_regressor_cols():
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    template.df = pd.DataFrame(columns=["p1", "p2"])
    regressor_cols = template.get_regressor_cols()
    assert regressor_cols is None

    template.config.model_components_param = ModelComponentsParam(
        custom={}
    )
    regressor_cols = template.get_regressor_cols()
    assert regressor_cols is None

    template.config.model_components_param = ModelComponentsParam(
        custom={
            "extra_pred_cols": ["p1", "p2", "p3", template.config.metadata_param.time_col]
        }
    )
    regressor_cols = template.get_regressor_cols()
    assert set(regressor_cols) == {"p1", "p2"}

    template.config.model_components_param = ModelComponentsParam(
        custom={
            "extra_pred_cols": [["p1"], ["p2", "p3"], None, []]
        }
    )
    regressor_cols = template.get_regressor_cols()
    assert set(regressor_cols) == {"p1", "p2"}
Exemplo n.º 2
0
def test_apply_template_decorator():
    data = generate_df_for_tests(freq="D", periods=10)
    df = data["df"]
    template = SilverkiteTemplate()
    with pytest.raises(
            ValueError,
            match="SilverkiteTemplate only supports config.model_template='SK', found 'PROPHET'"):
        template.apply_template_for_pipeline_params(
            df=df,
            config=ForecastConfig(model_template="PROPHET")
        )
Exemplo n.º 3
0
def test_silverkite_template():
    """Tests test_silverkite_template with default config"""
    data = generate_df_for_tests(freq="D", periods=10)
    df = data["df"]
    template = SilverkiteTemplate()
    config = ForecastConfig(model_template="SK")
    params = template.apply_template_for_pipeline_params(
        df=df,
        config=config
    )
    assert config == ForecastConfig(model_template="SK")  # not modified
    pipeline = params.pop("pipeline", None)

    metric = EvaluationMetricEnum.MeanAbsolutePercentError
    expected_params = dict(
        df=df,
        time_col=TIME_COL,
        value_col=VALUE_COL,
        date_format=None,
        freq=None,
        train_end_date=None,
        anomaly_info=None,
        # model
        regressor_cols=None,
        lagged_regressor_cols=None,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=None,
        n_jobs=COMPUTATION_N_JOBS,
        verbose=1,
        # forecast
        forecast_horizon=None,
        coverage=None,
        test_horizon=None,
        periods_between_train_test=None,
        agg_periods=None,
        agg_func=None,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=CV_REPORT_METRICS_ALL,
        null_model_params=None,
        relative_error_tolerance=None,
        # CV
        cv_horizon=None,
        cv_min_train_periods=None,
        cv_expanding_window=True,
        cv_periods_between_splits=None,
        cv_periods_between_train_test=None,
        cv_max_splits=3
    )
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Exemplo n.º 4
0
def test_get_lagged_regressor_info():
    # Without lagged regressors
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    expected_lagged_regressor_info = {
        "lagged_regressor_cols": None,
        "overall_min_lag_order": None,
        "overall_max_lag_order": None
    }
    assert template.get_lagged_regressor_info() == expected_lagged_regressor_info

    # With lagged regressors
    template.config.model_components_param = ModelComponentsParam(
        lagged_regressors={
            "lagged_regressor_dict": [{
                "regressor2": {
                    "lag_dict": {"orders": [5]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7 * 2, 7 * 3]],
                        "interval_list": [(8, 7 * 2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            }, {
                "regressor_bool": {
                    "lag_dict": {"orders": [1]},
                    "agg_lag_dict": {
                        "orders_list": [[7, 7 * 2]],
                        "interval_list": [(8, 7 * 2)]},
                    "series_na_fill_func": lambda s: s.bfill().ffill()}
            }]
        })
    lagged_regressor_info = template.get_lagged_regressor_info()
    assert set(lagged_regressor_info["lagged_regressor_cols"]) == {"regressor2", "regressor_bool"}
    assert lagged_regressor_info["overall_min_lag_order"] == 1
    assert lagged_regressor_info["overall_max_lag_order"] == 21
Exemplo n.º 5
0
def test_property():
    """Tests properties"""
    assert SilverkiteTemplate().allow_model_template_list is False
    assert SilverkiteTemplate().allow_model_components_param_list is False

    template = SilverkiteTemplate()
    assert template.DEFAULT_MODEL_TEMPLATE == "SK"
    assert isinstance(template.estimator, SilverkiteEstimator)
    assert template.estimator.coverage is None
    assert template.apply_forecast_config_defaults().model_template == "SK"

    estimator = SilverkiteEstimator(coverage=0.99)
    template = SilverkiteTemplate(estimator=estimator)
    assert template.estimator is estimator
Exemplo n.º 6
0
def test_silverkite_template_custom(model_components_param):
    """"Tests simple_silverkite_template with custom parameters,
    and data that has regressors"""
    data = generate_df_with_reg_for_tests(
        freq="H",
        periods=300*24,
        remove_extra_cols=True,
        mask_test_actuals=True)
    df = data["df"]
    time_col = "some_time_col"
    value_col = "some_value_col"
    df.rename({
        TIME_COL: time_col,
        VALUE_COL: value_col
    }, axis=1, inplace=True)

    metric = EvaluationMetricEnum.MeanAbsoluteError
    # anomaly adjustment adds 10.0 to every record
    adjustment_size = 10.0
    anomaly_df = pd.DataFrame({
        START_DATE_COL: [df[time_col].min()],
        END_DATE_COL: [df[time_col].max()],
        ADJUSTMENT_DELTA_COL: [adjustment_size],
        METRIC_COL: [value_col]
    })
    anomaly_info = {
        "value_col": VALUE_COL,
        "anomaly_df": anomaly_df,
        "start_date_col": START_DATE_COL,
        "end_date_col": END_DATE_COL,
        "adjustment_delta_col": ADJUSTMENT_DELTA_COL,
        "filter_by_dict": {METRIC_COL: VALUE_COL},
        "adjustment_method": "add"
    }
    metadata = MetadataParam(
        time_col=time_col,
        value_col=value_col,
        freq="H",
        date_format="%Y-%m-%d-%H",
        train_end_date=datetime.datetime(2019, 7, 1),
        anomaly_info=anomaly_info
    )
    evaluation_metric = EvaluationMetricParam(
        cv_selection_metric=metric.name,
        cv_report_metrics=[EvaluationMetricEnum.MedianAbsolutePercentError.name],
        agg_periods=24,
        agg_func=np.max,
        null_model_params={
            "strategy": "quantile",
            "constant": None,
            "quantile": 0.8
        },
        relative_error_tolerance=0.01
    )
    evaluation_period = EvaluationPeriodParam(
        test_horizon=1,
        periods_between_train_test=2,
        cv_horizon=3,
        cv_min_train_periods=4,
        cv_expanding_window=True,
        cv_periods_between_splits=5,
        cv_periods_between_train_test=6,
        cv_max_splits=7
    )
    computation = ComputationParam(
        hyperparameter_budget=10,
        n_jobs=None,
        verbose=1
    )
    forecast_horizon = 20
    coverage = 0.7
    template = SilverkiteTemplate()
    params = template.apply_template_for_pipeline_params(
        df=df,
        config=ForecastConfig(
            model_template=ModelTemplateEnum.SK.name,
            metadata_param=metadata,
            forecast_horizon=forecast_horizon,
            coverage=coverage,
            evaluation_metric_param=evaluation_metric,
            evaluation_period_param=evaluation_period,
            model_components_param=model_components_param,
            computation_param=computation
        )
    )
    pipeline = params.pop("pipeline", None)
    expected_params = dict(
        df=df,
        time_col=time_col,
        value_col=value_col,
        date_format=metadata.date_format,
        freq=metadata.freq,
        train_end_date=metadata.train_end_date,
        anomaly_info=metadata.anomaly_info,
        # model
        regressor_cols=template.regressor_cols,
        estimator=None,
        hyperparameter_grid=template.hyperparameter_grid,
        hyperparameter_budget=computation.hyperparameter_budget,
        n_jobs=computation.n_jobs,
        verbose=computation.verbose,
        # forecast
        forecast_horizon=forecast_horizon,
        coverage=coverage,
        test_horizon=evaluation_period.test_horizon,
        periods_between_train_test=evaluation_period.periods_between_train_test,
        agg_periods=evaluation_metric.agg_periods,
        agg_func=evaluation_metric.agg_func,
        relative_error_tolerance=evaluation_metric.relative_error_tolerance,
        # evaluation
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=evaluation_metric.cv_report_metrics,
        null_model_params=evaluation_metric.null_model_params,
        # CV
        cv_horizon=evaluation_period.cv_horizon,
        cv_min_train_periods=evaluation_period.cv_min_train_periods,
        cv_expanding_window=evaluation_period.cv_expanding_window,
        cv_periods_between_splits=evaluation_period.cv_periods_between_splits,
        cv_periods_between_train_test=evaluation_period.cv_periods_between_train_test,
        cv_max_splits=evaluation_period.cv_max_splits
    )
    assert_basic_pipeline_equal(pipeline, template.pipeline)
    assert_equal(params, expected_params)
Exemplo n.º 7
0
def test_get_silverkite_hyperparameter_grid(model_components_param, silverkite, silverkite_diagnostics):
    template = SilverkiteTemplate()
    template.config = template.apply_forecast_config_defaults()
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [SilverkiteForecast()],
        "estimator__silverkite_diagnostics": [SilverkiteDiagnostics()],
        "estimator__origin_for_time_vars": [None],
        "estimator__extra_pred_cols": [["ct1"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None}],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [pd.DataFrame({
            "name": ["tod", "tow", "tom", "toq", "toy"],
            "period": [24.0, 7.0, 1.0, 1.0, 1.0],
            "order": [3, 3, 1, 1, 5],
            "seas_names": ["daily", "weekly", "monthly", "quarterly", "yearly"]})],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [None],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [None],
        "estimator__uncertainty_dict": [None],
    }
    assert_equal(hyperparameter_grid, expected_grid, ignore_keys={"estimator__silverkite": None, "estimator__silverkite_diagnostics": None})
    assert hyperparameter_grid["estimator__silverkite"][0] != silverkite
    assert hyperparameter_grid["estimator__silverkite_diagnostics"][0] != silverkite_diagnostics

    # Tests auto-list conversion
    template.config.model_components_param = model_components_param
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid = {
        "estimator__silverkite": [silverkite],
        "estimator__silverkite_diagnostics": [silverkite_diagnostics],
        "estimator__origin_for_time_vars": [2020],
        "estimator__extra_pred_cols": [["ct1"], ["ct2"], ["regressor1", "regressor3"]],
        "estimator__train_test_thresh": [None],
        "estimator__training_fraction": [None],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear",
            "fit_algorithm_params": None,
        }],
        "estimator__daily_event_df_dict": [None],
        "estimator__fs_components_df": [None],
        "estimator__autoreg_dict": [None],
        "estimator__changepoints_dict": [{
            "method": "uniform",
            "n_changepoints": 20,
        }],
        "estimator__seasonality_changepoints_dict": [None],
        "estimator__changepoint_detector": [None],
        "estimator__min_admissible_value": [None],
        "estimator__max_admissible_value": [4],
        "estimator__uncertainty_dict": [{
            "uncertainty_method": "simple_conditional_residuals"
        }],
    }
    assert_equal(hyperparameter_grid, expected_grid)

    # Tests hyperparameter_override
    template.config.model_components_param.hyperparameter_override = [
        {
            "input__response__null__max_frac": 0.1,
            "estimator__min_admissible_value": [2],
            "estimator__extra_pred_cols": ["override_estimator__extra_pred_cols"],
        },
        {},
        {
            "estimator__extra_pred_cols": ["val1", "val2"],
            "estimator__origin_for_time_vars": [2019],
        },
        None
    ]
    template.time_properties = {"origin_for_time_vars": 2020}
    hyperparameter_grid = template.get_hyperparameter_grid()
    expected_grid["estimator__origin_for_time_vars"] = [2020]
    updated_grid1 = expected_grid.copy()
    updated_grid1["input__response__null__max_frac"] = [0.1]
    updated_grid1["estimator__min_admissible_value"] = [2]
    updated_grid1["estimator__extra_pred_cols"] = [["override_estimator__extra_pred_cols"]]
    updated_grid2 = expected_grid.copy()
    updated_grid2["estimator__extra_pred_cols"] = [["val1", "val2"]]
    updated_grid2["estimator__origin_for_time_vars"] = [2019]
    expected_grid = [
        updated_grid1,
        expected_grid,
        updated_grid2,
        expected_grid]
    assert_equal(hyperparameter_grid, expected_grid)