示例#1
0
def basic_arima(data):
    arima = ARIMA(out_of_sample_size=30, order=(2, 0, 3))
    return GroupedPmdarima(arima).fit(
        data.df,
        data.key_columns,
        "y",
        "ds",
    )
示例#2
0
def test_grouped_pmdarima_save_load_predict(model):

    save_path = "/tmp/pmdarima/test.pmd"
    forecast = model.predict(30, return_conf_int=True)
    model.save(save_path)
    loaded = GroupedPmdarima.load(save_path)
    loaded_forecast = loaded.predict(30, return_conf_int=True)

    assert_frame_equal(forecast, loaded_forecast)
示例#3
0
def basic_pmdarima(data):

    arima = AutoARIMA(out_of_sample_size=30)
    return GroupedPmdarima(arima).fit(
        data.df,
        data.key_columns,
        "y",
        "ds",
    )
示例#4
0
def test_grouped_model_cross_validate():

    metrics = ["smape", "mean_squared_error", "mean_absolute_error"]
    expected_columns = (
        [f"{met}_mean" for met in metrics]
        + [f"{met}_stddev" for met in metrics]
        + ["grouping_key_columns", "key0"]
    )

    train = data_generator.generate_test_data(1, 2, 765, "2019-01-01")

    grouped_model = GroupedPmdarima(
        model_template=AutoARIMA(max_order=5, out_of_sample_size=30),
    ).fit(train.df, train.key_columns, "y", "ds", silence_warnings=True)
    cross_validator = RollingForecastCV(h=90, step=120, initial=365)
    cv_metrics = grouped_model.cross_validate(train.df, metrics, cross_validator)

    assert len(cv_metrics) == 2
    assert set(cv_metrics.columns).issubset(set(expected_columns))
示例#5
0
def basic_pipeline(data):
    pipeline = Pipeline(steps=[
        ("fourier", FourierFeaturizer(k=3, m=7)),
        ("arima", AutoARIMA(out_of_sample_size=60)),
    ])
    return GroupedPmdarima(pipeline).fit(
        data.df,
        data.key_columns,
        "y",
        "ds",
    )
示例#6
0
def grouped_pmdarima(diviner_data):
    from pmdarima.arima.auto import AutoARIMA

    base_model = AutoARIMA(out_of_sample_size=60, maxiter=30)
    return GroupedPmdarima(model_template=base_model).fit(
        df=diviner_data.df,
        group_key_columns=diviner_data.key_columns,
        y_col="y",
        datetime_col="ds",
        silence_warnings=True,
    )
示例#7
0
def model(data):

    arima = GroupedPmdarima(model_template=Pipeline(
        steps=[("arima",
                AutoARIMA(out_of_sample_size=60, max_order=7))]), ).fit(
                    df=data.df,
                    group_key_columns=data.key_columns,
                    y_col="y",
                    datetime_col="ds",
                    silence_warnings=True,
                )
    return arima
示例#8
0
def test_pmdarima_default_arima_fit_attribute_extraction(data):

    arima_model = GroupedPmdarima(model_template=AutoARIMA(
        out_of_sample_size=30)).fit(data.df, data.key_columns, "y", "ds")

    for group in arima_model.model.keys():
        pipeline = arima_model._extract_individual_model(group)
        instance_model = _extract_arima_model(pipeline)

        group_metrics = _get_arima_training_metrics(instance_model)

        for key, value in group_metrics.items():
            assert value > 0
            assert key in _PMDARIMA_MODEL_METRICS
        for item in _PMDARIMA_MODEL_METRICS:
            assert item in group_metrics.keys()

        group_params = _get_arima_params(instance_model)

        for item in {"P", "D", "Q", "s"}:  # this isn't a seasonality model
            assert group_params[item] == 0
示例#9
0
def test_grouped_pmdarima_save_and_load(model):

    orig_params = model.get_model_params()
    orig_metrics = model.get_metrics()
    save_path = "/tmp/pmdarima/test.pmd"
    model.save(save_path)

    loaded = GroupedPmdarima.load(save_path)
    loaded_params = loaded.get_model_params()
    loaded_metrics = loaded.get_metrics()

    assert_frame_equal(orig_params, loaded_params)
    assert_frame_equal(orig_metrics, loaded_metrics)
示例#10
0
def test_pmdarima_ndiffs_override_class_args(data):

    ndiffs = PmdarimaAnalyzer(df=data.df,
                              group_key_columns=data.key_columns,
                              y_col="y",
                              datetime_col="ds").calculate_ndiffs(alpha=0.4,
                                                                  max_d=4)

    base_template = AutoARIMA(d=10, out_of_sample_size=7)

    model = GroupedPmdarima(base_template).fit(
        df=data.df,
        group_key_columns=data.key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiffs,
        silence_warnings=True,
    )

    params = model.get_model_params()

    for _, row in params.iterrows():
        assert row["d"] <= 4
示例#11
0
def pipeline_override_d(data):
    pipeline = Pipeline(steps=[("arima", AutoARIMA(out_of_sample_size=30))])
    util = PmdarimaAnalyzer(df=data.df,
                            group_key_columns=data.key_columns,
                            y_col="y",
                            datetime_col="ds")
    ndiffs = util.calculate_ndiffs(alpha=0.2, test="kpss", max_d=7)
    nsdiffs = util.calculate_nsdiffs(m=7, test="ocsb", max_D=7)
    return GroupedPmdarima(pipeline).fit(
        df=data.df,
        group_key_columns=data.key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiffs,
        nsdiffs=nsdiffs,
        silence_warnings=True,
    )
        column_count=3,
        series_count=4,
        series_size=365 * 4,
        start_dt="2019-01-01",
        days_period=1,
    )

    training_data = generated_data.df
    group_key_columns = generated_data.key_columns

    # Build a GroupedPmdarima model by specifying an ARIMA model
    arima_obj = ARIMA(order=(2, 1, 3), out_of_sample_size=60)
    base_arima = GroupedPmdarima(model_template=arima_obj).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        silence_warnings=True,
    )

    # Save to local directory
    save_dir = "/tmp/group_pmdarima/arima.gpmd"
    base_arima.save(save_dir)

    # Load from saved model
    loaded_model = GroupedPmdarima.load(save_dir)

    print("\nARIMA results:\n", "-" * 40)
    get_and_print_model_metrics_params(loaded_model)

    prediction = loaded_model.predict(
    diff_analyzer = PmdarimaAnalyzer(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
    )
    ndiff = diff_analyzer.calculate_ndiffs(
        alpha=0.05,
        test="kpss",
        max_d=4,
    )

    grouped_model = GroupedPmdarima(model_template=pipeline).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        ndiffs=ndiff,
        silence_warnings=True,
    )

    # Save to local directory
    save_dir = "/tmp/group_pmdarima/pipeline_override.gpmd"
    grouped_model.save(save_dir)

    # Load from saved model
    loaded_model = GroupedPmdarima.load(save_dir)

    print("\nAutoARIMA results:\n", "-" * 40)
    get_and_print_model_metrics_params(loaded_model)

    print("\nPredictions:\n", "-" * 40)
示例#14
0
    generated_data = generate_example_data(
        column_count=2,
        series_count=6,
        series_size=365 * 4,
        start_dt="2019-01-01",
        days_period=1,
    )

    training_data = generated_data.df
    group_key_columns = generated_data.key_columns

    arima_obj = ARIMA(order=(2, 1, 3), out_of_sample_size=60)
    base_arima = GroupedPmdarima(model_template=arima_obj).fit(
        df=training_data,
        group_key_columns=group_key_columns,
        y_col="y",
        datetime_col="ds",
        silence_warnings=True,
    )

    # Get a subset of group keys to generate forecasts for
    group_df = training_data.copy()
    group_df["groups"] = list(zip(*[group_df[c] for c in group_key_columns]))
    distinct_groups = group_df["groups"].unique()
    groups_to_predict = list(distinct_groups[:3])

    print("-" * 65)
    print(f"Unique groups that have been modeled: {distinct_groups}")
    print(f"Subset of groups to generate predictions for: {groups_to_predict}")
    print("-" * 65)