示例#1
0
def test_prophet_group_subset_predict_raises_and_warns():

    _rows_to_generate = 30
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    model = GroupedProphet().fit(train_df, train.key_columns)

    key_entries = []
    for v in train_df[["key1", "key0"]].iloc[[0]].to_dict().values():
        key_entries.append(list(v.values())[0])
    groups = [(key_entries[0], key_entries[1]), ("missing", "key")]

    with pytest.raises(
        DivinerException, match="Cannot perform predictions due to submitted"
    ):
        model.predict_groups(groups, _rows_to_generate, "D")

    with pytest.warns(
        UserWarning, match="Specified groups are unable to be predicted due to "
    ):
        model.predict_groups(groups, _rows_to_generate, "D", on_error="warn")

    with pytest.raises(
        DivinerException, match="Groups specified for subset forecasting are not"
    ):
        model.predict_groups(
            ("invalid", "invalid"), _rows_to_generate, "D", on_error="ignore"
        )
def test_group_performance_metrics():

    train = data_generator.generate_test_data(2, 4, 1000, "2020-01-01", 1)
    model = GroupedProphet(n_changepoints=10, uncertainty_samples=0).fit(
        train.df, train.key_columns
    )

    cv_results = prophet_cross_validate.group_cross_validation(
        model,
        horizon="30 days",
        period="120 days",
        initial="180 days",
        parallel="threads",
    )

    with pytest.raises(ValueError):
        bad_metrics = ["rmse", "mse", "invalid"]
        prophet_cross_validate.group_performance_metrics(
            cv_results, model, bad_metrics, rolling_window=0.25
        )
    metrics = ["rmse", "mse", "mape"]
    metric_results = prophet_cross_validate.group_performance_metrics(
        cv_results, model, metrics, rolling_window=0.05, monthly=False
    )

    first_result = metric_results[list(metric_results.keys())[0]]

    assert len(set(metric_results.keys())) == 4
    assert set(metrics).issubset(set(first_result.columns))
示例#3
0
def test_individual_model_cross_validate():

    metrics = ["smape", "mean_squared_error", "mean_absolute_error"]

    train = data_generator.generate_test_data(1, 1, 765, "2019-01-01")

    model = AutoARIMA(max_order=5, out_of_sample_size=30).fit(train.df["y"])
    cross_validator = SlidingWindowForecastCV(window_size=180, step=120, h=90)

    cv_results = _cross_validate_single_model(
        model,
        train.df["y"],
        metrics=metrics,
        cross_validator=cross_validator,
        error_score=np.nan,
        exog=None,
        verbosity=3,
    )

    expected_fields = [f"{met}_mean" for met in metrics] + [
        f"{met}_stddev" for met in metrics
    ]
    for key, value in cv_results.items():
        assert key in expected_fields
        assert value > 0
        if "_stddev" in key:
            assert value < cv_results.get(key.split("_stddev")[0] + "_mean") * 10.0
示例#4
0
def test_model_raises_if_already_fit():
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    with pytest.raises(
        DivinerException,
        match="The model has already been fit. Create a new instance to fit the model again.",
    ):
        model.fit(train.df, train.key_columns)
示例#5
0
def test_prophet_extract_params():
    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    params = model.extract_model_params()

    assert len(params) == 6
示例#6
0
def data():
    test_data = data_generator.generate_test_data(
        column_count=3,
        series_count=SERIES_TEST_COUNT,
        series_size=365 * 5,
        start_dt="2020-01-01",
        days_period=1,
    )
    return test_data
示例#7
0
def test_prophet_default_fit():

    train = data_generator.generate_test_data(4, 2, 1000, "2020-01-01", 1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    first_model = _get_individual_model(model, 0)

    assert len(first_model.history) > 0
    assert (
        len(first_model.params["trend"][0]) == 1000
    )  # fit value for each value in series
    assert len(list(model.model.keys())) == 2
示例#8
0
def test_prophet_forecast_correct_start():

    train = data_generator.generate_test_data(2, 5, 1000, "2020-01-01", 1)
    expected_start_of_forecast = max(train.df["ds"]) + timedelta(days=1)
    model = GroupedProphet().fit(train.df, train.key_columns)
    forecasted_data = model.forecast(10, "D")

    # check that the first date in the forecasted df for the first model is 1 day after last date.
    min_forecast = min(forecasted_data["ds"])

    assert expected_start_of_forecast == min_forecast
    assert len(forecasted_data) == 50
def test_backtesting_cross_validation():
    train = data_generator.generate_test_data(2, 4, 1000, "2020-01-01", 1)
    model = GroupedProphet(n_changepoints=10, uncertainty_samples=0).fit(
        train.df, train.key_columns
    )

    cv_results = model.cross_validate(
        horizon="30 days", period="180 days", initial="365 days", parallel="processes"
    )
    for entries in cv_results.values():
        assert len(entries) == 120
        assert {row > 0 for row in entries["yhat"]}
示例#10
0
def test_prophet_df_naming_overrides():

    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df
    train_df.rename(columns={"ds": "datetime", "y": "sales"}, inplace=True)

    assert {"datetime", "sales"}.issubset(set(train_df.columns))

    model = GroupedProphet().fit(train_df, train.key_columns, "sales", "datetime")

    params = model.extract_model_params()

    assert len(params) == 1
示例#11
0
def test_prophet_save_and_load():
    # Tests serialization, deserialization, and utilization of forecasting API from loaded model
    save_path = os.path.join("/tmp/grouped_prophet_test", "model")

    train = data_generator.generate_test_data(2, 2, 1000, "2020-01-01", 1)
    grouped_model = GroupedProphet().fit(train.df, train.key_columns)
    grouped_model.save(save_path)
    loaded_model = GroupedProphet.load(save_path)
    forecasts = loaded_model.forecast(25, "D")

    shutil.rmtree(os.path.dirname(save_path))

    assert len(forecasts) == 50
示例#12
0
def test_prophet_cross_validation_extract():

    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    scores = model.cross_validate_and_score(
        initial="100 days", period="90 days", horizon="15 days", parallel=None
    )

    assert all(scores["rmse"] > 0)
    assert len(scores) == 6
    assert "coverage" not in scores
def test_prophet_save_load_override_object():
    """Test to ensure that deserialization updates object properly for all attributes"""

    train1 = data_generator.generate_test_data(3, 2, 1000, "2020-01-01", 1)
    train2 = data_generator.generate_test_data(2, 2, 500, "2021-01-01", 1)

    model1 = GroupedProphet().fit(train1.df, train1.key_columns)
    model2 = GroupedProphet().fit(train2.df, train2.key_columns)

    model1_group_keys = deepcopy(model1._group_key_columns)
    model1_model = deepcopy(model1.model)

    # save model 2
    save_path = os.path.join("/tmp/group_prophet_test", "model2serdetest.gpm")
    model2.save(save_path)

    # use model1 object to load model2
    reloaded = model1.load(save_path)

    assert set(reloaded._group_key_columns) != set(model1_group_keys)
    assert reloaded.model.keys() == model2.model.keys()
    assert reloaded.model.keys() != model1_model.keys()
示例#14
0
def test_prophet_manual_predict():
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    predict_df = train_df[["key1", "key0", "ds"]][-10:]

    model = GroupedProphet().fit(train_df, train.key_columns)

    prediction = model.predict(predict_df)

    assert len(prediction) == 10

    for _, row in prediction.iterrows():
        assert row["yhat"] > 0
示例#15
0
def test_prophet_execution_with_kwargs_override_for_pystan():

    train = data_generator.generate_test_data(4, 6, 1000, "2020-01-01", 1)

    default_prophet_uncertainty_samples = Prophet().uncertainty_samples

    model = GroupedProphet(uncertainty_samples=0).fit(
        train.df, train.key_columns, algorithm="LBFGS"
    )

    last_model = _get_individual_model(model, 5)

    assert last_model.uncertainty_samples == 0
    assert default_prophet_uncertainty_samples != last_model.uncertainty_samples
def test_manual_performance_metrics_execution():
    train = data_generator.generate_test_data(2, 4, 1000, "2020-01-01", 1)
    model = GroupedProphet(n_changepoints=10, uncertainty_samples=0).fit(
        train.df, train.key_columns
    )

    cv_results = model.cross_validate(
        horizon="30 days", period="180 days", initial="365 days", parallel="processes"
    )
    performance_metrics = model.calculate_performance_metrics(cv_results=cv_results)

    assert len(performance_metrics.keys()) == 4

    metrics = {"mse", "rmse", "mae", "mape", "mdape", "smape"}
    for entries in performance_metrics.values():
        for metric in metrics:
            assert {row > 0 for row in entries[metric]}
示例#17
0
def test_prophet_cross_validation_extract_custom_scores():

    train = data_generator.generate_test_data(4, 2, 1000, "2020-01-01", 1)

    model = GroupedProphet(uncertainty_samples=0).fit(train.df, train.key_columns)

    scores = model.cross_validate_and_score(
        initial="100 days",
        period="90 days",
        horizon="15 days",
        parallel=None,
        metrics=["rmse", "mape"],
        disable_tqdm=False,
        monthly=True,
    )

    assert all(scores["rmse"] > 0)
    assert len(scores) == 2
    assert "coverage" not in scores
示例#18
0
def test_prophet_with_bad_group_data():

    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df
    bad_data = pd.DataFrame(
        {
            "ds": datetime.strptime("2021-01-01", "%Y-%M-%d"),
            "y": -500.3,
            "key1": "bad",
            "key0": "data",
        },
        index=[1000],
    )

    train_df_add = pd.concat([train_df, bad_data])

    with pytest.warns(RuntimeWarning, match="An error occurred while fitting group"):
        model = GroupedProphet().fit(train_df_add, train.key_columns)
    assert ("bad", "data") not in model.model.keys()
示例#19
0
def test_grouped_model_cross_validate():

    metrics = ["smape", "mean_squared_error", "mean_absolute_error"]
    expected_columns = (
        [f"{met}_mean" for met in metrics]
        + [f"{met}_stddev" for met in metrics]
        + ["grouping_key_columns", "key0"]
    )

    train = data_generator.generate_test_data(1, 2, 765, "2019-01-01")

    grouped_model = GroupedPmdarima(
        model_template=AutoARIMA(max_order=5, out_of_sample_size=30),
    ).fit(train.df, train.key_columns, "y", "ds", silence_warnings=True)
    cross_validator = RollingForecastCV(h=90, step=120, initial=365)
    cv_metrics = grouped_model.cross_validate(train.df, metrics, cross_validator)

    assert len(cv_metrics) == 2
    assert set(cv_metrics.columns).issubset(set(expected_columns))
def test_group_cross_validation():

    train = data_generator.generate_test_data(2, 4, 1000, "2020-01-01", 1)
    model = GroupedProphet(n_changepoints=10, uncertainty_samples=0).fit(
        train.df, train.key_columns
    )

    cv_results = prophet_cross_validate.group_cross_validation(
        model,
        horizon="30 days",
        period="120 days",
        initial="180 days",
        parallel="threads",
    )

    first_key = list(cv_results.keys())[0]
    first_result = cv_results[first_key]

    assert len(set(cv_results.keys())) == 4
    assert {"yhat", "y", "ds", "cutoff"}.issubset(set(first_result.columns))
示例#21
0
def test_prophet_group_subset_predict():

    _rows_to_generate = 30
    train = data_generator.generate_test_data(2, 1, 1000, "2020-01-01", 1)
    train_df = train.df

    model = GroupedProphet().fit(train_df, train.key_columns)

    key_entries = []
    for v in train_df[["key1", "key0"]].iloc[[0]].to_dict().values():
        key_entries.append(list(v.values())[0])
    groups = [tuple(key_entries)]

    group_prediction = model.predict_groups(groups, _rows_to_generate, "D")

    assert len(group_prediction) == _rows_to_generate
    _key1 = group_prediction["key1"].unique()
    assert len(_key1) == 1
    assert _key1[0] == groups[0][0]
    _key0 = group_prediction["key0"].unique()
    assert len(_key0) == 1
    assert _key0[0] == groups[0][1]