Пример #1
0
def test_null_model(X):
    """Checks null model"""
    model = BaseSilverkiteEstimator(null_model_params={
        "strategy": "quantile",
        "constant": None,
        "quantile": 0.8
    })

    model.fit(X)
    y = np.repeat(2.0, X.shape[0])
    null_score = model.null_model.score(X, y=y)
    assert null_score == mean_squared_error(y, np.repeat(9.0, X.shape[0]))

    # tests if different score function gets propagated to null model
    model = BaseSilverkiteEstimator(score_func=mean_absolute_error,
                                    null_model_params={
                                        "strategy": "quantile",
                                        "constant": None,
                                        "quantile": 0.8
                                    })
    model.fit(X)
    y = np.repeat(2.0, X.shape[0])
    null_score = model.null_model.score(X, y=y)
    assert null_score == mean_absolute_error(y, np.repeat(9.0, X.shape[0]))
    # checks that `df` is set
    assert_equal(X, model.df)
Пример #2
0
def test_summary(daily_data):
    """Checks summary function returns without error"""
    model = BaseSilverkiteEstimator()
    train_df = daily_data["train_df"]
    model.summary()

    model.fit(train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    model.summary()
Пример #3
0
def test_init(params):
    """Checks if parameters are passed to BaseSilverkiteEstimator correctly"""
    coverage = 0.95
    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow"],
            "quantiles": [0.025, 0.975],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 5,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98
        }
    }
    model = BaseSilverkiteEstimator(score_func=mean_squared_error,
                                    coverage=coverage,
                                    null_model_params=None,
                                    uncertainty_dict=uncertainty_dict)

    assert model.score_func == mean_squared_error
    assert model.coverage == coverage
    assert model.null_model_params is None
    assert model.uncertainty_dict == uncertainty_dict

    assert model.model_dict is None
    assert model.pred_cols is None
    assert model.feature_cols is None
    assert model.df is None
    assert model.coef_ is None
Пример #4
0
def test_model_summary(df_pt):
    model = BaseSilverkiteEstimator()
    model.fit(
        X=df_pt.iloc[:100],  # speeds up
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    params = {"fit_algorithm": "linear", "training_fraction": 0.8}
    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(df=df_pt.iloc[:100],
                                           time_col=cst.TIME_COL,
                                           value_col=cst.VALUE_COL,
                                           **params)
    model.finish_fit()
    summary = model.summary()
    summary.__str__()
    summary.__repr__()
    assert summary is not None
Пример #5
0
def test_pred_category(df_pt):
    model = BaseSilverkiteEstimator()
    # property is not available without fitting.
    with pytest.raises(NotFittedError,
                       match="Must fit before getting predictor category."):
        print(model.pred_category)
    model.fit(
        X=df_pt.iloc[:100],  # speeds up
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL)
    params = {
        "fit_algorithm": "linear",
        "training_fraction": 0.8,
        "extra_pred_cols": ["ct1", "x", "x:ct1"]
    }
    df_pt["x"] = np.random.randn(df_pt.shape[0])
    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(df=df_pt.iloc[:100],
                                           time_col=cst.TIME_COL,
                                           value_col=cst.VALUE_COL,
                                           **params)
    model.extra_pred_cols = ["ct1", "x",
                             "x:ct1"]  # set in subclass initialization
    # _pred_category is None before trying to access pred_category
    assert model._pred_category is None
    model.finish_fit()
    pred_category = model.pred_category
    # _pred_category is updated after trying to access pred_category
    assert model._pred_category is not None
    assert pred_category["intercept"] == ["Intercept"]
    assert pred_category["time_features"] == ["ct1", "x:ct1"]
    assert pred_category["event_features"] == []
    assert pred_category["trend_features"] == ["ct1", "x:ct1"]
    assert pred_category["seasonality_features"] == [
        "sin1_tod_daily", "cos1_tod_daily", "sin2_tod_daily", "cos2_tod_daily",
        "sin3_tod_daily", "cos3_tod_daily", "sin1_tow_weekly",
        "cos1_tow_weekly", "sin2_tow_weekly", "cos2_tow_weekly",
        "sin3_tow_weekly", "cos3_tow_weekly", "sin1_toy_yearly",
        "cos1_toy_yearly", "sin2_toy_yearly", "cos2_toy_yearly",
        "sin3_toy_yearly", "cos3_toy_yearly", "sin4_toy_yearly",
        "cos4_toy_yearly", "sin5_toy_yearly", "cos5_toy_yearly"
    ]
    assert pred_category["lag_features"] == []
    assert pred_category["regressor_features"] == ["x", "x:ct1"]
    assert pred_category["interaction_features"] == ["x:ct1"]
Пример #6
0
def test_plot_trend_changepoint_detection(df_pt):
    model = BaseSilverkiteEstimator()
    model.fit(X=df_pt, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    params = {"changepoints_dict": {"method": "auto"}}
    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(df=df_pt,
                                           time_col=cst.TIME_COL,
                                           value_col=cst.VALUE_COL,
                                           **params)
    model.finish_fit()
    fig = model.plot_trend_changepoint_detection()
    assert fig is not None
    assert fig.layout.title[
        "text"] == "Timeseries Plot with detected trend change points"
    assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
    assert fig.layout.xaxis.title["text"] == "Dates"
    # tests given parameters
    fig = model.plot_trend_changepoint_detection(dict(trend_change=False))
    assert fig is not None
    assert fig.layout.title["text"] == "Timeseries Plot"
    assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
    assert fig.layout.xaxis.title["text"] == "Dates"
Пример #7
0
def test_score_function(daily_data_with_reg):
    """Checks score function without null model, with regressors"""
    model = BaseSilverkiteEstimator()
    train_df = daily_data_with_reg["train_df"]
    test_df = daily_data_with_reg["test_df"]

    # every subclass `fit` follows these steps
    model.fit(X=train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(
        df=train_df,
        time_col=cst.TIME_COL,
        value_col=cst.VALUE_COL,
        origin_for_time_vars=None,
        extra_pred_cols=["ct1", "regressor1", "regressor2"],
        train_test_thresh=None,
        training_fraction=None,
        fit_algorithm="linear",
        fit_algorithm_params=None,
        daily_event_df_dict=None,
        changepoints_dict=None,
        fs_components_df=pd.DataFrame({
            "name": ["tod", "tow", "conti_year"],
            "period": [24.0, 7.0, 1.0],
            "order": [3, 3, 5],
            "seas_names": ["daily", "weekly", "yearly"]
        }),
        autoreg_dict=None,
        min_admissible_value=None,
        max_admissible_value=None,
        uncertainty_dict=None)
    model.finish_fit()

    score = model.score(test_df, test_df[cst.VALUE_COL])
    pred_df = model.predict(test_df)
    assert list(pred_df.columns) == [cst.TIME_COL, cst.PREDICTED_COL]
    assert score == pytest.approx(
        mean_squared_error(pred_df[cst.PREDICTED_COL], test_df[cst.VALUE_COL]))
    assert score == pytest.approx(4.6, rel=1e-1)
Пример #8
0
def test_silverkite_with_components_hourly_data():
    """Tests get_components, plot_components, plot_trend,
    plot_seasonalities with hourly data
    """
    hourly_data = generate_df_with_reg_for_tests(
        freq="H",
        periods=24 * 4,
        train_start_date=datetime.datetime(2018, 1, 1),
        conti_year_origin=2018)
    train_df = hourly_data.get("train_df").copy()
    params_hourly = params_components()

    # converts into parameters for `forecast_silverkite`
    coverage = params_hourly.pop("coverage")
    model = BaseSilverkiteEstimator(
        coverage=coverage, uncertainty_dict=params_hourly["uncertainty_dict"])
    model.fit(X=train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(df=train_df,
                                           time_col=cst.TIME_COL,
                                           value_col=cst.VALUE_COL,
                                           **params_hourly)
    model.finish_fit()

    # Test plot_components
    with pytest.warns(Warning) as record:
        title = "Custom component plot"
        fig = model.plot_components(
            names=["trend", "DAILY_SEASONALITY", "DUMMY"], title=title)
        expected_rows = 3 + 1  # includes changepoints
        assert len(fig.data) == expected_rows
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "trend", "DAILY_SEASONALITY", "trend change point"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis3.title["text"] == "Hour of day"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "trend"
        assert fig.layout.yaxis3.title["text"] == "daily"

        assert fig.layout.title["text"] == title
        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Test plot_trend
    title = "Custom trend plot"
    fig = model.plot_trend(title=title)
    expected_rows = 2
    assert len(fig.data) == expected_rows + 1  # includes changepoints
    assert [fig.data[i].name
            for i in range(expected_rows)] == [cst.VALUE_COL, "trend"]

    assert fig.layout.xaxis.title["text"] == cst.TIME_COL
    assert fig.layout.xaxis2.title["text"] == cst.TIME_COL

    assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
    assert fig.layout.yaxis2.title["text"] == "trend"

    assert fig.layout.title["text"] == title

    # Test plot_seasonalities
    with pytest.warns(Warning):
        # suppresses the warning on seasonalities removed
        title = "Custom seasonality plot"
        fig = model.plot_seasonalities(title=title)
        expected_rows = 4
        assert len(fig.data) == expected_rows
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "DAILY_SEASONALITY", "WEEKLY_SEASONALITY", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == "Hour of day"
        assert fig.layout.xaxis3.title["text"] == "Day of week"
        assert fig.layout.xaxis4.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "daily"
        assert fig.layout.yaxis3.title["text"] == "weekly"
        assert fig.layout.yaxis4.title["text"] == "yearly"

        assert fig.layout.title["text"] == title
Пример #9
0
def test_silverkite_with_components_daily_data():
    """Tests get_components, plot_components, plot_trend,
    plot_seasonalities with daily data and missing input values.
    """
    daily_data = generate_df_with_reg_for_tests(
        freq="D",
        periods=20,
        train_start_date=datetime.datetime(2018, 1, 1),
        conti_year_origin=2018)
    train_df = daily_data["train_df"].copy()
    train_df.loc[[2, 4, 7], cst.VALUE_COL] = np.nan  # creates missing values

    params_daily = params_components()  # SilverkiteEstimator parameters
    # converts into parameters for `forecast_silverkite`
    coverage = params_daily.pop("coverage")
    # removes daily seasonality terms
    params_daily["fs_components_df"] = pd.DataFrame({
        "name": ["tow", "ct1"],
        "period": [7.0, 1.0],
        "order": [4, 5],
        "seas_names": ["weekly", "yearly"]
    })

    model = BaseSilverkiteEstimator(
        coverage=coverage, uncertainty_dict=params_daily["uncertainty_dict"])

    with pytest.raises(NotFittedError,
                       match="Call `fit` before calling `plot_components`."):
        model.plot_components()

    with pytest.warns(Warning):
        # suppress warnings from conf_interval.py and sklearn
        # a subclass's fit() method will have these steps
        model.fit(X=train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
        silverkite = SilverkiteForecast()
        model.model_dict = silverkite.forecast(df=train_df,
                                               time_col=cst.TIME_COL,
                                               value_col=cst.VALUE_COL,
                                               **params_daily)
        model.finish_fit()

    # Tests plot_components
    with pytest.warns(Warning) as record:
        title = "Custom component plot"
        model._set_silverkite_diagnostics_params()
        fig = model.plot_components(
            names=["trend", "YEARLY_SEASONALITY", "DUMMY"], title=title)
        expected_rows = 3
        assert len(fig.data) == expected_rows + 1  # includes changepoints
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "trend", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis3.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "trend"
        assert fig.layout.yaxis3.title["text"] == "yearly"

        assert fig.layout.title["text"] == title
        assert f"The following components have not been specified in the model: " \
               f"{{'DUMMY'}}, plotting the rest." in record[0].message.args[0]

    # Missing component error
    with pytest.raises(
            ValueError,
            match=
            "None of the provided components have been specified in the model."
    ):
        model.plot_components(names=["DUMMY"])

    # Tests plot_trend
    title = "Custom trend plot"
    fig = model.plot_trend(title=title)
    expected_rows = 2
    assert len(fig.data) == expected_rows + 1  # includes changepoints
    assert [fig.data[i].name
            for i in range(expected_rows)] == [cst.VALUE_COL, "trend"]

    assert fig.layout.xaxis.title["text"] == cst.TIME_COL
    assert fig.layout.xaxis2.title["text"] == cst.TIME_COL

    assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
    assert fig.layout.yaxis2.title["text"] == "trend"

    assert fig.layout.title["text"] == title

    # Tests plot_seasonalities
    with pytest.warns(Warning):
        # suppresses the warning on seasonalities removed
        title = "Custom seasonality plot"
        fig = model.plot_seasonalities(title=title)
        expected_rows = 3
        assert len(fig.data) == expected_rows
        assert [fig.data[i].name for i in range(expected_rows)] == \
               [cst.VALUE_COL, "WEEKLY_SEASONALITY", "YEARLY_SEASONALITY"]

        assert fig.layout.xaxis.title["text"] == cst.TIME_COL
        assert fig.layout.xaxis2.title["text"] == "Day of week"
        assert fig.layout.xaxis3.title["text"] == "Time of year"

        assert fig.layout.yaxis.title["text"] == cst.VALUE_COL
        assert fig.layout.yaxis2.title["text"] == "weekly"
        assert fig.layout.yaxis3.title["text"] == "yearly"

        assert fig.layout.title["text"] == title

    # Component plot error if `fit_algorithm` is "rf" or "gradient_boosting"
    params_daily["fit_algorithm"] = "rf"
    model = BaseSilverkiteEstimator(
        coverage=coverage, uncertainty_dict=params_daily["uncertainty_dict"])
    with pytest.warns(Warning):
        # suppress warnings from conf_interval.py and sklearn
        # a subclass's fit() method will have these steps
        model.fit(X=train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
        model.model_dict = silverkite.forecast(df=train_df,
                                               time_col=cst.TIME_COL,
                                               value_col=cst.VALUE_COL,
                                               **params_daily)
        model.finish_fit()
    assert model.coef_ is None
    with pytest.raises(
            NotImplementedError,
            match=
            "Component plot has only been implemented for additive linear models."
    ):
        model.plot_components()

    with pytest.raises(
            NotImplementedError,
            match=
            "Component plot has only been implemented for additive linear models."
    ):
        model.plot_trend()

    with pytest.raises(
            NotImplementedError,
            match=
            "Component plot has only been implemented for additive linear models."
    ):
        model.plot_seasonalities()
Пример #10
0
def test_set_uncertainty_dict(daily_data):
    """Tests __set_uncertainty_dict"""
    train_df = daily_data["train_df"]

    # both provided
    coverage = 0.95
    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow_hr"],
            "quantiles": [0.025, 0.975],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 20,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98
        }
    }
    model = BaseSilverkiteEstimator(coverage=coverage,
                                    uncertainty_dict=uncertainty_dict)
    model.fit(train_df)
    expected_dict = uncertainty_dict
    assert_equal(model.uncertainty_dict, expected_dict)
    assert_equal(model.coverage, coverage)

    # only coverage provided
    coverage = 0.90
    uncertainty_dict = None
    model = BaseSilverkiteEstimator(coverage=coverage,
                                    uncertainty_dict=uncertainty_dict)
    model.fit(train_df)
    expected_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow_hr"],
            "quantiles": [0.05, 0.95],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 5,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98
        }
    }
    assert_equal(model.uncertainty_dict, expected_dict)
    assert_equal(model.coverage, coverage)

    # both missing
    coverage = None
    uncertainty_dict = None
    model = BaseSilverkiteEstimator(coverage=coverage,
                                    uncertainty_dict=uncertainty_dict)
    model.fit(train_df)
    expected_dict = None
    assert_equal(model.uncertainty_dict, expected_dict)
    assert_equal(model.coverage, None)

    # only uncertainty provided
    coverage = None
    uncertainty_dict = {
        "uncertainty_method": "simple_conditional_residuals",
        "params": {
            "conditional_cols": ["dow_hr"],
            "quantiles": [0.05, 0.95],
            "quantile_estimation_method": "normal_fit",
            "sample_size_thresh": 5,
            "small_sample_size_method": "std_quantiles",
            "small_sample_size_quantile": 0.98
        }
    }
    model = BaseSilverkiteEstimator(coverage=coverage,
                                    uncertainty_dict=uncertainty_dict)
    model.fit(train_df)
    expected_dict = uncertainty_dict
    assert_equal(model.uncertainty_dict, expected_dict)
    assert_equal(model.coverage, 0.90)
Пример #11
0
def test_fit_predict(daily_data):
    """Checks fit and predict function with null model"""
    model = BaseSilverkiteEstimator(null_model_params={"strategy": "mean"})
    train_df = daily_data["train_df"]
    test_df = daily_data["test_df"]
    assert model.last_predicted_X_ is None
    assert model.cached_predictions_ is None

    with pytest.raises(NotFittedError,
                       match="Call `fit` before calling `predict`."):
        model.predict(train_df)

    # Every subclass `fit` follows these steps
    model.fit(train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    # Checks that `df` is set, but other variables aren't
    assert_equal(model.df, train_df)
    assert model.pred_cols is None
    assert model.feature_cols is None
    assert model.coef_ is None

    with pytest.raises(
            ValueError,
            match="Must set `self.model_dict` before calling this function."):
        model.finish_fit()

    silverkite = SilverkiteForecast()
    model.model_dict = silverkite.forecast(df=train_df,
                                           time_col=cst.TIME_COL,
                                           value_col=cst.VALUE_COL,
                                           origin_for_time_vars=None,
                                           extra_pred_cols=None,
                                           train_test_thresh=None,
                                           training_fraction=None,
                                           fit_algorithm="linear",
                                           fit_algorithm_params=None,
                                           daily_event_df_dict=None,
                                           changepoints_dict=None,
                                           fs_components_df=pd.DataFrame({
                                               "name":
                                               ["tod", "tow", "conti_year"],
                                               "period": [24.0, 7.0, 1.0],
                                               "order": [3, 3, 5],
                                               "seas_names":
                                               ["daily", "weekly", "yearly"]
                                           }),
                                           autoreg_dict=None,
                                           min_admissible_value=None,
                                           max_admissible_value=None,
                                           uncertainty_dict=None)

    with pytest.raises(
            NotFittedError,
            match="Subclass must call `finish_fit` inside the `fit` method."):
        model.predict(train_df)
    assert model.last_predicted_X_ is not None  # attempted prediction
    assert model.cached_predictions_ is None

    model.finish_fit()
    # Checks that other variables are set
    assert_equal(model.pred_cols, model.model_dict["pred_cols"])
    assert_equal(model.feature_cols, model.model_dict["x_mat"].columns)
    assert_equal(
        model.coef_,
        pd.DataFrame(model.model_dict["ml_model"].coef_,
                     index=model.feature_cols))

    # Predicts on a new dataset
    with LogCapture(cst.LOGGER_NAME) as log_capture:
        predicted = model.predict(test_df)
        assert_equal(model.last_predicted_X_, test_df)
        assert_equal(model.cached_predictions_, predicted)
        log_capture.check()  # no log messages (not using cached predictions)

    # Uses cached predictions
    with LogCapture(cst.LOGGER_NAME) as log_capture:
        assert_equal(model.predict(test_df), predicted)
        log_capture.check(
            (cst.LOGGER_NAME, "DEBUG", "Returning cached predictions."))

    # Predicts on a different dataset
    with LogCapture(cst.LOGGER_NAME) as log_capture:
        predicted = model.predict(train_df)
        assert_equal(model.last_predicted_X_, train_df)
        assert_equal(model.cached_predictions_, predicted)
        log_capture.check()  # no log messages (not using cached predictions)

    # .fit() clears the cached result
    model.fit(train_df, time_col=cst.TIME_COL, value_col=cst.VALUE_COL)
    assert model.last_predicted_X_ is None
    assert model.cached_predictions_ is None