def test_score_function_null(daily_data):
    """Tests fit and its compatibility with predict/score.
    Checks score function accuracy with null model
    """
    model = SilverkiteEstimator(
        null_model_params={"strategy": "mean"},
        fit_algorithm_dict={
            "fit_algorithm_params": {"fit_intercept": False}
        }
    )
    assert model.fit_algorithm_dict == {
        "fit_algorithm_params": {"fit_intercept": False}
    }
    train_df = daily_data["train_df"]

    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.fit_algorithm_dict == {
        "fit_algorithm": "linear",
        "fit_algorithm_params": {"fit_intercept": False}
    }
    score = model.score(
        daily_data["test_df"],
        daily_data["test_df"][cst.VALUE_COL])
    assert score == pytest.approx(0.90, rel=1e-2)
def test_validate_inputs():
    """Test validate_inputs"""

    with pytest.warns(None) as record:
        SilverkiteEstimator()
        assert len(record) == 0  # no warnings

    with pytest.raises(ValueError) as record:
        fs_components_df = pd.DataFrame({
            "name": ["tod", "tow"],
            "period": [24.0, 7.0]})
        SilverkiteEstimator(fs_components_df=fs_components_df)
        fs_cols_not_found = {"order", "seas_names"}
        assert (f"fs_components_df is missing the following columns: "
                f"{fs_cols_not_found}" in record[0].message.args[0])

    with pytest.raises(ValueError) as record:
        fs_components_df = pd.DataFrame({
            "name": ["tod", "tow", "tow"],
            "period": [24.0, 7.0, 10.0],
            "order": [12, 4, 3],
            "seas_names": ["daily", "weekly", "weekly"]})
        SilverkiteEstimator(fs_components_df=fs_components_df)
        assert ("Found multiple rows in fs_components_df with same `names` and `seas_names`. "
                "Make sure these are unique." in record[0].message.args[0])
def test_uncertainty(daily_data):
    """Runs a basic model with uncertainty intervals
    and checks coverage"""
    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow_hr"],
            "quantiles": [0.025, 0.975],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 10,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98}}
    model = SilverkiteEstimator(uncertainty_dict=uncertainty_dict)
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"]

    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    predictions = model.predict(test_df)
    expected_forecast_cols = \
        {"ts", "y", "y_quantile_summary", "err_std", "forecast_lower", "forecast_upper"}
    assert expected_forecast_cols.issubset(list(model.forecast.columns))

    actual = daily_data["test_df"][cst.VALUE_COL]
    forecast_lower = predictions[cst.PREDICTED_LOWER_COL]
    forecast_upper = predictions[cst.PREDICTED_UPPER_COL]
    calc_pred_coverage = 100 * (
        (actual <= forecast_upper)
        & (actual >= forecast_lower)
        ).mean()
    assert round(calc_pred_coverage) == 97, "forecast coverage is incorrect"
def test_validate_fs_components_df():
    """Tests validate_fs_components_df function"""
    model = SilverkiteEstimator()
    with pytest.warns(None) as record:
        fs_components_df = pd.DataFrame({
            "name": ["tod", "tow"],
            "period": [24.0, 7.0],
            "order": [12, 4],
            "seas_names": ["daily", "weekly"]})
        model.validate_fs_components_df(fs_components_df)
        assert len(record) == 0

    fs_cols_not_found = ["order", "seas_names"]
    with pytest.raises(ValueError) as record:
        fs_components_df = pd.DataFrame({
            "name": ["tod", "tow"],
            "period": [24.0, 7.0]})
        model.validate_fs_components_df(fs_components_df)
        assert (f"fs_components_df is missing the following columns: {fs_cols_not_found}"
                in record[0].message.args[0])

    with pytest.raises(ValueError, match="Found multiple rows in fs_components_df with the same "
                                         "`names` and `seas_names`. Make sure these are unique."):
        fs_components_df = pd.DataFrame({
            "name": ["tod", "tow", "tow"],
            "period": [24.0, 7.0, 10.0],
            "order": [12, 4, 3],
            "seas_names": ["daily", "weekly", "weekly"]})
        model.validate_fs_components_df(fs_components_df)
Пример #5
0
def test_get_basic_pipeline_custom():
    """Tests get_basic_pipeline with custom estimator"""
    pipeline = get_basic_pipeline(
        estimator=SilverkiteEstimator(),
        score_func=EvaluationMetricEnum.MeanAbsolutePercentError.name,
        score_func_greater_is_better=False,
        agg_periods=10,
        agg_func=np.sum,
        relative_error_tolerance=None,
        coverage=None,
        null_model_params={"strategy": "mean"})

    expected_score_func, _, _ = get_score_func_with_aggregation(
        score_func=EvaluationMetricEnum.MeanAbsolutePercentError.
        get_metric_func(),
        agg_periods=10,
        agg_func=np.sum,
        greater_is_better=False)

    # checks estimator parameters
    assert isinstance(pipeline.steps[-1][-1], SilverkiteEstimator)
    assert pipeline.steps[-1][-1].fit_algorithm_dict is None
    assert pipeline.steps[-1][-1].extra_pred_cols is None
    assert pipeline.steps[-1][-1].coverage is None
    assert pipeline.steps[-1][-1].null_model_params["strategy"] == "mean"
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        assert_eval_function_equal(pipeline.steps[-1][-1].score_func,
                                   expected_score_func)
def test_setup(params):
    """Tests __init__ and attributes set during fit"""
    coverage = 0.95
    silverkite = SilverkiteForecast()
    model = SilverkiteEstimator(
        silverkite=silverkite,
        score_func=mean_squared_error,
        coverage=coverage,
        null_model_params=None,
        **params)

    assert model.silverkite == silverkite
    assert model.score_func == mean_squared_error
    assert model.coverage == coverage
    assert model.null_model_params is None

    # set_params must be able to replicate the init
    model2 = SilverkiteEstimator()
    model2.set_params(**dict(
        silverkite=silverkite,
        score_func=mean_squared_error,
        coverage=coverage,
        null_model_params=None,
        **params))
    assert model2.__dict__ == model.__dict__

    initalized_params = model.__dict__
    initalized_params_subset = {
        k: v for k, v in initalized_params.items()
        if k in params.keys()}
    assert_equal(initalized_params_subset, params)

    assert model.model_dict is None
    assert model.pred_cols is None
    assert model.feature_cols is None
    assert model.coef_ is None

    train_df = daily_data_reg().get("train_df").copy()
    model.fit(train_df)
    assert model.fit_algorithm_dict == {
        "fit_algorithm": "sgd",
        "fit_algorithm_params": {"alpha": 0.1},
    }
    assert model.model_dict is not None
    assert type(model.model_dict["ml_model"]) == SGDRegressor
    assert model.model_dict["ml_model"].alpha == (
        params["fit_algorithm_dict"]["fit_algorithm_params"]["alpha"])
    assert model.model_dict["training_evaluation"] is not None
    assert model.model_dict["test_evaluation"] is None
    assert model.pred_cols is not None
    assert model.feature_cols is not None
    assert_frame_equal(model.df, train_df)
    assert model.coef_ is not None
Пример #7
0
def test_property():
    """Tests properties"""
    assert SilverkiteTemplate().allow_model_template_list is False
    assert SilverkiteTemplate().allow_model_components_param_list is False

    template = SilverkiteTemplate()
    assert template.DEFAULT_MODEL_TEMPLATE == "SK"
    assert isinstance(template.estimator, SilverkiteEstimator)
    assert template.estimator.coverage is None
    assert template.apply_forecast_config_defaults().model_template == "SK"

    estimator = SilverkiteEstimator(coverage=0.99)
    template = SilverkiteTemplate(estimator=estimator)
    assert template.estimator is estimator
Пример #8
0
def test_get_basic_pipeline_apply_reg():
    """Tests get_basic_pipeline fit and predict methods on
    a dataset with regressors, and checks if pipeline parameters
    can be set.
    """
    df = generate_df_with_reg_for_tests("D", 50)
    # adds degenerate columns
    df["train_df"]["cst1"] = "constant"
    df["train_df"]["cst2"] = 1.0
    df["test_df"]["cst1"] = "constant"
    df["test_df"]["cst2"] = 1.0
    pipeline = get_basic_pipeline(
        estimator=SilverkiteEstimator(),
        score_func=EvaluationMetricEnum.MeanSquaredError.name,
        score_func_greater_is_better=False,
        agg_periods=None,
        agg_func=None,
        relative_error_tolerance=None,
        coverage=0.95,
        null_model_params=None,
        regressor_cols=[
            "regressor1", "regressor2", "regressor3", "regressor_bool",
            "regressor_categ", "cst1", "cst2"
        ])
    pipeline.fit(df["train_df"])
    assert pipeline.named_steps["degenerate"].drop_cols == []
    pipeline.predict(df["test_df"])

    # drops degenerate columns, normalizes
    pipeline.set_params(
        degenerate__drop_degenerate=True,
        input__regressors_numeric__normalize__normalize_algorithm=
        "PowerTransformer",
    )
    pipeline.fit(df["train_df"])
    # (column order is swapped by column selectors and feature union)
    assert pipeline.named_steps["degenerate"].drop_cols == ["cst2", "cst1"]
    predictions = pipeline.predict(df["test_df"])
    assert predictions.shape[0] == df["test_df"].shape[0]

    with pytest.raises(
            ValueError,
            match=
            "Invalid parameter unknown_param for estimator NormalizeTransformer"
    ):
        pipeline.set_params(
            degenerate__drop_degenerate=True,
            input__regressors_numeric__normalize__unknown_param=
            "PowerTransformer",
        )
def test_setup2(params2):
    """Tests __init__ and attributes set during fit"""
    coverage = 0.95
    silverkite = SilverkiteForecast()
    model = SilverkiteEstimator(
        silverkite=silverkite,
        score_func=mean_squared_error,
        coverage=coverage,
        null_model_params=None,
        **params2)

    assert model.silverkite == silverkite
    assert model.score_func == mean_squared_error
    assert model.coverage == coverage
    assert model.null_model_params is None

    # set_params must be able to replicate the init
    model2 = SilverkiteEstimator()
    model2.set_params(**dict(
        silverkite=silverkite,
        score_func=mean_squared_error,
        coverage=coverage,
        null_model_params=None,
        **params2))
    assert model2.__dict__ == model.__dict__

    initalized_params = model.__dict__
    initalized_params_subset = {
        k: v for k, v in initalized_params.items()
        if k in params2.keys()}
    assert_equal(initalized_params_subset, params2)

    assert model.model_dict is None
    assert model.pred_cols is None
    assert model.feature_cols is None
    assert model.coef_ is None

    train_df = daily_data_reg().get("train_df").copy()
    model.fit(train_df)
    assert model.model_dict is not None
    assert model.model_dict["training_evaluation"] is not None
    assert model.model_dict["test_evaluation"] is None
    assert model.pred_cols is not None
    assert model.feature_cols is not None
    assert_frame_equal(model.df, train_df)
    assert model.coef_ is not None
Пример #10
0
def test_plot_components():
    """Test plot_components of UnivariateForecast class"""
    X = pd.DataFrame({
        cst.TIME_COL:
        pd.date_range("2018-01-01", periods=10, freq="D"),
        cst.VALUE_COL:
        np.arange(1, 11)
    })
    coverage = 0.95

    # Test Silverkite
    trained_model = Pipeline([("estimator",
                               SilverkiteEstimator(coverage=coverage))])
    with pytest.warns(Warning) as record:
        trained_model.fit(X, X[cst.VALUE_COL])
        assert "No slice had sufficient sample size" in record[0].message.args[
            0]
    forecast = get_forecast(X, trained_model)

    with pytest.warns(Warning) as record:
        title = "Custom component plot"
        fig = forecast.plot_components(
            names=["trend", "YEARLY_SEASONALITY", "DUMMY"], title=title)

        expected_rows = 3
        assert len(fig.data) == expected_rows
        assert [fig.data[i].name for i in range(expected_rows)] == \
            [cst.VALUE_COL, "trend", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis3.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "trend"
        assert fig.layout.yaxis3.title["text"] == "yearly"

        assert fig.layout.title["text"] == title

        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Test Prophet
    trained_model = Pipeline([("estimator",
                               ProphetEstimator(coverage=coverage))])
    trained_model.fit(X, X[cst.VALUE_COL])
    forecast = get_forecast(X, trained_model)
Пример #11
0
def test_get_forecast():
    """Tests get_forecast function"""
    X = pd.DataFrame({
        cst.TIME_COL:
        pd.date_range("2018-01-01", periods=10, freq="D"),
        cst.VALUE_COL:
        np.arange(10)
    })
    # coverage is sufficient to request uncertainty interval,
    # even with ``uncertainty_dict=None``
    coverage = 0.95

    # test forecast with bands
    trained_model = Pipeline([("estimator",
                               SilverkiteEstimator(coverage=coverage))])
    trained_model.fit(X, X[cst.VALUE_COL])

    with pytest.warns(UserWarning) as record:
        forecast = get_forecast(X,
                                trained_model,
                                relative_error_tolerance=0.01)
        assert forecast.df.shape == (X.shape[0], 5)
        assert forecast.time_col == cst.TIME_COL
        assert forecast.actual_col == cst.ACTUAL_COL
        assert forecast.predicted_col == cst.PREDICTED_COL
        assert forecast.predicted_lower_col == cst.PREDICTED_LOWER_COL
        assert forecast.predicted_upper_col == cst.PREDICTED_UPPER_COL
        assert forecast.null_model_predicted_col is None  # there is no null model by default
        assert forecast.ylabel == cst.VALUE_COL
        assert forecast.train_end_date == X[cst.TIME_COL].max()
        assert forecast.forecast_horizon is None
        assert forecast.coverage == coverage
        assert forecast.r2_loss_function == mean_squared_error
        assert forecast.estimator
        assert forecast.relative_error_tolerance == 0.01
        assert "y_true contains 0. MAPE is undefined." in record[
            0].message.args[0]
        assert "y_true contains 0. MedAPE is undefined." in record[
            1].message.args[0]
        assert "denominator contains very small values. sMAPE is likely highly volatile." in record[
            2].message.args[0]
def test_score_function(daily_data_with_reg):
    """Tests fit and its compatibility with predict/score.
    Checks score function accuracy without null model
    """
    model = SilverkiteEstimator(
        extra_pred_cols=["ct1", "regressor1", "regressor2"],
        impute_dict={
            "func": impute_with_lags,
            "params": {"orders": [7]}}
    )
    train_df = daily_data_with_reg["train_df"]
    test_df = daily_data_with_reg["test_df"]

    model.fit(
        X=train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    score = model.score(test_df, test_df[cst.VALUE_COL])
    pred_df = model.predict(test_df)
    assert list(pred_df.columns) == [cst.TIME_COL, cst.PREDICTED_COL]
    assert score == pytest.approx(mean_squared_error(
        pred_df[cst.PREDICTED_COL],
        test_df[cst.VALUE_COL]))
    assert score == pytest.approx(4.6, rel=1e-2)
Пример #13
0
def test_get_forecast():
    """Tests get_forecast function"""
    X = pd.DataFrame({
        cst.TIME_COL:
        pd.date_range("2018-01-01", periods=10, freq="D"),
        cst.VALUE_COL:
        np.arange(10)
    })
    X_future = pd.DataFrame({
        cst.TIME_COL:
        pd.date_range("2018-01-11", periods=2, freq="D"),
        cst.VALUE_COL:
        np.repeat(np.nan, 2)
    })
    # coverage is sufficient to request uncertainty interval,
    # even with ``uncertainty_dict=None``
    coverage = 0.95

    # test forecast with bands
    trained_model = Pipeline([("estimator",
                               SilverkiteEstimator(coverage=coverage))])
    trained_model.fit(X, X[cst.VALUE_COL])

    with pytest.warns(UserWarning) as record:
        forecast = get_forecast(X,
                                trained_model,
                                relative_error_tolerance=0.01)
        assert forecast.df.shape == (X.shape[0], 5)
        assert forecast.time_col == cst.TIME_COL
        assert forecast.actual_col == cst.ACTUAL_COL
        assert forecast.predicted_col == cst.PREDICTED_COL
        assert forecast.predicted_lower_col == cst.PREDICTED_LOWER_COL
        assert forecast.predicted_upper_col == cst.PREDICTED_UPPER_COL
        assert forecast.null_model_predicted_col is None  # there is no null model by default
        assert forecast.ylabel == cst.VALUE_COL
        assert forecast.train_end_date == X[cst.TIME_COL].max()
        assert forecast.forecast_horizon is None
        assert forecast.coverage == coverage
        assert forecast.r2_loss_function == mean_squared_error
        assert forecast.estimator
        assert forecast.relative_error_tolerance == 0.01
        assert "y_true contains 0. MAPE is undefined." in record[
            0].message.args[0]
        assert "y_true contains 0. MedAPE is undefined." in record[
            1].message.args[0]
        assert "denominator contains very small values. sMAPE is likely highly volatile." in record[
            2].message.args[0]

    # test forecast into future with bands and null model, custom labels, custom loss
    trained_model = Pipeline([
        ("estimator",
         ProphetEstimator(coverage=coverage,
                          score_func=mean_absolute_error,
                          null_model_params={"strategy": "mean"}))
    ])

    trained_model.fit(X, X[cst.VALUE_COL])

    with pytest.warns(UserWarning) as record:
        X_forecast = pd.concat([X, X_future])
        train_end_date = X[cst.TIME_COL].max()
        test_start_date = X[cst.TIME_COL].min()
        forecast = get_forecast(X_forecast,
                                trained_model,
                                train_end_date=train_end_date,
                                test_start_date=test_start_date,
                                forecast_horizon=X_future.shape[0],
                                xlabel="xlabel",
                                ylabel="ylabel")
        assert forecast.df.shape == (X_forecast.shape[0], 6)
        assert forecast.time_col == "xlabel"
        assert forecast.actual_col == cst.ACTUAL_COL
        assert forecast.predicted_col == cst.PREDICTED_COL
        assert forecast.predicted_lower_col == cst.PREDICTED_LOWER_COL
        assert forecast.predicted_upper_col == cst.PREDICTED_UPPER_COL
        assert forecast.null_model_predicted_col == cst.NULL_PREDICTED_COL
        assert forecast.ylabel == "ylabel"
        assert forecast.train_end_date == train_end_date
        assert forecast.test_start_date == test_start_date
        assert forecast.forecast_horizon == 2
        assert forecast.coverage == coverage
        assert forecast.r2_loss_function == mean_absolute_error
        assert "y_true contains 0. MAPE is undefined." in record[
            0].message.args[0]
        assert "y_true contains 0. MedAPE is undefined." in record[
            1].message.args[0]
        assert "denominator contains very small values. sMAPE is likely highly volatile." in record[
            2].message.args[0]
Пример #14
0
def pipeline_results():
    """Runs forecast_pipeline three times to get
     grid search results"""
    pipeline_results = {}

    data = generate_df_for_tests(freq="1D", periods=20 * 7)
    df = data["df"]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
    hyperparameter_grid = [{
        "estimator__strategy": ["quantile"],
        "estimator__quantile": [0.9]
    }, {
        "estimator__strategy": ["mean"]
    }, {
        "estimator__strategy": ["constant"],
        "estimator__constant": [1.0, 2.0]
    }]
    pipeline = Pipeline([("estimator", DummyEstimator())])
    # Tests MAPE `score_func`, list `cv_report_metrics`
    metric = EvaluationMetricEnum.MeanAbsolutePercentError
    pipeline_results["1"] = forecast_pipeline(
        df,
        pipeline=pipeline,
        hyperparameter_grid=hyperparameter_grid,
        n_jobs=-1,
        forecast_horizon=20,
        coverage=None,
        agg_periods=7,
        agg_func=np.sum,
        score_func=metric.name,
        score_func_greater_is_better=metric.get_metric_greater_is_better(),
        cv_report_metrics=[
            EvaluationMetricEnum.MeanAbsoluteError.name,
            EvaluationMetricEnum.MeanSquaredError.name,
            EvaluationMetricEnum.MedianAbsolutePercentError.name,
        ],
        null_model_params=None)

    # Tests FRACTION_OUTSIDE_TOLERANCE `score_func`, all `cv_report_metrics`
    pipeline = Pipeline([("estimator", DummyEstimator())])
    pipeline_results["2"] = forecast_pipeline(
        df,
        pipeline=pipeline,
        hyperparameter_grid=hyperparameter_grid,
        n_jobs=-1,
        forecast_horizon=20,
        coverage=None,
        score_func=FRACTION_OUTSIDE_TOLERANCE,
        score_func_greater_is_better=False,
        cv_report_metrics=CV_REPORT_METRICS_ALL,
        null_model_params=None,
        relative_error_tolerance=0.02)

    # Tests callable `score_func`, greater_is_better=True, no `cv_report_metrics`
    fs1 = pd.DataFrame({
        "name": ["tow", "conti_year"],
        "period": [7.0, 1.0],
        "order": [3, 3],
        "seas_names": ["weekly", None]
    })
    fs2 = pd.DataFrame({
        "name": ["tow"],
        "period": [7.0],
        "order": [3],
        "seas_names": ["weekly"]
    })
    hyperparameter_grid = {
        "estimator__origin_for_time_vars": [2018],
        "estimator__extra_pred_cols": [["ct1"], ["ct2"]],
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear"
        }],
        "estimator__fs_components_df": [fs1, fs2],
    }
    cv_max_splits = 2
    pipeline_results["3"] = forecast_pipeline(
        df,
        estimator=SilverkiteEstimator(),
        hyperparameter_grid=hyperparameter_grid,
        hyperparameter_budget=4,
        n_jobs=1,
        forecast_horizon=3 * 7,
        test_horizon=2 * 7,
        score_func=mean_absolute_error,  # callable score_func
        score_func_greater_is_better=
        True,  # Not really True, only for the sake of testing
        null_model_params=None,
        cv_horizon=1 * 7,
        cv_expanding_window=True,
        cv_min_train_periods=7 * 7,
        cv_periods_between_splits=7,
        cv_periods_between_train_test=3 * 7,
        cv_max_splits=cv_max_splits)
    return pipeline_results
Пример #15
0
 def __init__(self):
     super().__init__(estimator=SilverkiteEstimator())
def test_lagged_regressors(daily_data_with_reg, params):
    """Tests a basic model with lagged regressors"""
    train_df = daily_data_with_reg["train_df"]
    test_df = daily_data_with_reg["test_df"][:20]

    # default forecast horizon, no uncertainty
    model = SilverkiteEstimator(
        lagged_regressor_dict=params["lagged_regressor_dict"])
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    assert trained_model["lagged_regressor_dict"] == params["lagged_regressor_dict"]
    pred_cols = trained_model["pred_cols"]
    expected_lagged_regression_terms = {
        'regressor1_lag1',
        'regressor1_lag2',
        'regressor1_lag3',
        'regressor1_avglag_7_14_21',
        'regressor1_avglag_8_to_14',
        'regressor2_lag35',
        'regressor2_avglag_35_42_49',
        'regressor2_avglag_30_to_36'
    }
    assert expected_lagged_regression_terms.issubset(pred_cols)

    model.predict(test_df)
    expected_forecast_cols = {"ts", "y"}
    assert expected_forecast_cols.issubset(list(model.forecast.columns))

    # Passes forecast horizon of 10, and uncertainty dict
    model = SilverkiteEstimator(
        uncertainty_dict=params["uncertainty_dict"],
        lagged_regressor_dict=params["lagged_regressor_dict"],
        forecast_horizon=10)
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    pred_cols = trained_model["pred_cols"]
    expected_lagged_regression_terms = {
        'regressor1_lag1',
        'regressor1_lag2',
        'regressor1_lag3',
        'regressor1_avglag_7_14_21',
        'regressor1_avglag_8_to_14',
        'regressor2_lag35',
        'regressor2_avglag_35_42_49',
        'regressor2_avglag_30_to_36'
    }
    assert expected_lagged_regression_terms.issubset(pred_cols)

    model.predict(test_df)
    expected_forecast_cols = {"ts", "y", 'y_quantile_summary', 'err_std',
                              'forecast_lower', 'forecast_upper'}
    assert expected_forecast_cols.issubset(list(model.forecast.columns))
def test_autoreg(daily_data):
    """Runs a basic model with uncertainty intervals
    and checks coverage"""
    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow_hr"],
            "quantiles": [0.025, 0.975],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 10,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98}}

    model = SilverkiteEstimator(
        uncertainty_dict=uncertainty_dict,
        autoreg_dict="auto")
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"][:20]
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    pred_cols = trained_model["pred_cols"]

    expected_autoreg_terms = {
        "y_lag30", "y_lag31", "y_lag32",
        "y_avglag_35_42_49", "y_avglag_30_to_36", "y_avglag_37_to_43"}
    assert expected_autoreg_terms.issubset(pred_cols)

    predictions = model.predict(test_df)
    expected_forecast_cols = {
        "ts", "y", "y_quantile_summary", "err_std", "forecast_lower",
        "forecast_upper"}

    assert expected_forecast_cols.issubset(list(model.forecast.columns))

    actual = test_df[cst.VALUE_COL]
    forecast_lower = predictions[cst.PREDICTED_LOWER_COL]
    forecast_upper = predictions[cst.PREDICTED_UPPER_COL]
    calc_pred_coverage = 100 * (
        (actual <= forecast_upper)
        & (actual >= forecast_lower)
        ).mean()
    assert round(calc_pred_coverage) >= 75, "forecast coverage is incorrect"

    # Simulation based, default forecast horizon
    model = SilverkiteEstimator(
        uncertainty_dict=uncertainty_dict,
        autoreg_dict="auto",
        simulation_based=True)
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"][:20]
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    pred_cols = trained_model["pred_cols"]

    expected_autoreg_terms = {
         "y_lag1", "y_lag2", "y_lag3", "y_avglag_7_14_21", "y_avglag_1_to_7", "y_avglag_8_to_14"}
    assert expected_autoreg_terms.issubset(pred_cols)

    # Passes forecast horizon of 10
    model = SilverkiteEstimator(
        uncertainty_dict=uncertainty_dict,
        autoreg_dict="auto",
        forecast_horizon=10)
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"][:20]
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    pred_cols = trained_model["pred_cols"]

    expected_autoreg_terms = {
        "y_lag10", "y_lag11", "y_lag12", "y_avglag_14_21_28", "y_avglag_10_to_16", "y_avglag_17_to_23"}
    assert expected_autoreg_terms.issubset(pred_cols)

    # Passes forecast horizon of 10, and simulation-based True
    model = SilverkiteEstimator(
        uncertainty_dict=uncertainty_dict,
        autoreg_dict="auto",
        forecast_horizon=10,
        simulation_based=True)
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"][:20]
    model.fit(
        train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    assert model.forecast is None

    trained_model = model.model_dict
    pred_cols = trained_model["pred_cols"]

    expected_autoreg_terms = {
        "y_lag1", "y_lag2", "y_lag3", "y_avglag_7_14_21", "y_avglag_1_to_7", "y_avglag_8_to_14"}
    assert expected_autoreg_terms.issubset(pred_cols)
def test_plot_components():
    """Tests plot_components.
    Because component plots are implemented in `base_silverkite_estimator.py,` the bulk of
    the testing is done there. This file only tests inheritance and compatibility of the
    trained_model generated by this estimator's fit.
    """
    daily_data = generate_df_with_reg_for_tests(
        freq="D",
        periods=20,
        train_start_date=datetime.datetime(2018, 1, 1),
        conti_year_origin=2018)
    train_df = daily_data.get("train_df").copy()
    params_daily = params_components()
    fit_algorithm = params_daily.pop("fit_algorithm", "linear")
    fit_algorithm_params = params_daily.pop("fit_algorithm_params", None)
    params_daily["fit_algorithm_dict"] = {
        "fit_algorithm": fit_algorithm,
        "fit_algorithm_params": fit_algorithm_params,
    }
    # removing daily seasonality terms
    params_daily["fs_components_df"] = pd.DataFrame({
        "name": ["tow", "ct1"],
        "period": [7.0, 1.0],
        "order": [4, 5],
        "seas_names": ["weekly", "yearly"]})
    model = SilverkiteEstimator(**params_daily)
    with pytest.warns(Warning):
        # suppresses sklearn warning on `iid` parameter for ridge hyperparameter_grid search
        model.fit(train_df)

    # Test plot_components
    with pytest.warns(Warning) as record:
        title = "Custom component plot"
        fig = model.plot_components(names=["trend", "YEARLY_SEASONALITY", "DUMMY"], title=title)
        expected_rows = 3
        assert len(fig.data) == expected_rows + 1  # includes changepoints
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "trend", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis3.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "trend"
        assert fig.layout.yaxis3.title["text"] == "yearly"

        assert fig.layout.title["text"] == title
        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Test plot_trend
    title = "Custom trend plot"
    fig = model.plot_trend(title=title)
    expected_rows = 2
    assert len(fig.data) == expected_rows + 1  # includes changepoints
    assert [fig.data[i].name for i in range(expected_rows)] == [cst.VALUE_COL, "trend"]

    assert fig.layout.xaxis.title["text"] == cst.TIME_COL
    assert fig.layout.xaxis2.title["text"] == cst.TIME_COL

    assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
    assert fig.layout.yaxis2.title["text"] == "trend"

    assert fig.layout.title["text"] == title

    # Test plot_seasonalities
    with pytest.warns(Warning):
        # suppresses the warning on seasonalities removed
        title = "Custom seasonality plot"
        fig = model.plot_seasonalities(title=title)
        expected_rows = 3
        assert len(fig.data) == expected_rows
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "WEEKLY_SEASONALITY", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == "Day of week"
        assert fig.layout.xaxis3.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "weekly"
        assert fig.layout.yaxis3.title["text"] == "yearly"

        assert fig.layout.title["text"] == title
Пример #19
0
def test_forecast_pipeline_rolling_evaluation_silverkite():
    """Checks the output rolling evaluation with Silverkite template"""
    data = generate_df_with_reg_for_tests(
        freq="1D",
        periods=20 * 7,  # short-term: 20 weeks of data
        remove_extra_cols=True,
        mask_test_actuals=True)
    regressor_cols = ["regressor1", "regressor2", "regressor_categ"]
    keep_cols = [TIME_COL, VALUE_COL] + regressor_cols
    df = data["df"][keep_cols]

    coverage = 0.1
    hyperparameter_grid = {
        "estimator__origin_for_time_vars":
        [None],  # inferred from training data
        "estimator__fs_components_df": [
            pd.DataFrame({
                "name": ["tow"],
                "period": [7.0],
                "order": [3],
                "seas_names": ["weekly"]
            })
        ],
        "estimator__extra_pred_cols":
        [regressor_cols, regressor_cols + ["ct_sqrt"]
         ],  # two cases: no growth term and single growth term
        "estimator__fit_algorithm_dict": [{
            "fit_algorithm": "linear"
        }]
    }
    pipeline_params = mock_pipeline(
        df=df,
        time_col=TIME_COL,
        value_col=VALUE_COL,
        date_format=None,  # not recommended, but possible to specify
        freq=None,
        regressor_cols=regressor_cols,
        estimator=SilverkiteEstimator(),
        hyperparameter_grid=hyperparameter_grid,
        hyperparameter_budget=1,
        n_jobs=1,
        forecast_horizon=2 * 7,
        coverage=coverage,
        test_horizon=2 * 7,
        periods_between_train_test=2 * 7,
        agg_periods=7,
        agg_func=np.mean,
        score_func=mean_absolute_error,  # callable score_func
        null_model_params=None,
        cv_horizon=1 * 7,
        cv_expanding_window=True,
        cv_min_train_periods=8 * 7,
        cv_periods_between_splits=7,
        cv_periods_between_train_test=3 * 7,
        cv_max_splits=2)
    tscv = RollingTimeSeriesSplit(forecast_horizon=2 * 7,
                                  min_train_periods=10 * 7,
                                  expanding_window=True,
                                  use_most_recent_splits=True,
                                  periods_between_splits=2 * 7,
                                  periods_between_train_test=2 * 7,
                                  max_splits=3)
    rolling_evaluation = forecast_pipeline_rolling_evaluation(
        pipeline_params=pipeline_params, tscv=tscv)

    expected_splits_n = tscv.max_splits
    assert len(rolling_evaluation.keys()) == expected_splits_n
    assert set(rolling_evaluation.keys()) == {"split_0", "split_1", "split_2"}

    time_col = pipeline_params["time_col"]
    for split_num, (train, test) in enumerate(tscv.split(X=df)):
        split_output = rolling_evaluation[f"split_{split_num}"]
        assert round(split_output["runtime_sec"],
                     3) == split_output["runtime_sec"]

        pipeline_result = split_output["pipeline_result"]

        # Checks every split uses all the available data for training
        ts = pipeline_result.timeseries
        train_end_date = df.iloc[train[-1]][time_col]
        assert ts.train_end_date == train_end_date

        assert pipeline_result.backtest is None

        # Checks every split has forecast for train+test periods passed by tscv
        forecast = pipeline_result.forecast
        assert forecast.df.shape[0] == ts.fit_df.shape[
            0] + tscv.periods_between_train_test + tscv.forecast_horizon
Пример #20
0
 def __init__(self,
              estimator: BaseForecastEstimator = SilverkiteEstimator()):
     super().__init__(estimator=estimator)