示例#1
0
def test_ktrlite_dual_seas(make_daily_data, seasonality_fs_order):
    train_df, _, _ = make_daily_data

    ktrlite = KTRLite(
        response_col='response',
        date_col='date',
        seasonality=[7, 365.25],
        seasonality_fs_order=seasonality_fs_order,
        estimator='stan-map',
        n_bootstrap_draws=-1,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(train_df)

    expected_columns = ['date', 'prediction']
    expected_shape = (train_df.shape[0], len(expected_columns))
    expected_num_parameters = 6

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
    smape_val = smape(train_df['response'].values,
                      predict_df['prediction'].values)
    assert smape_val <= SMAPE_TOLERANCE
示例#2
0
def test_ktrlite_predict_decompose(make_daily_data):
    train_df, test_df, coef = make_daily_data

    ktrlite = KTRLite(
        response_col='response',
        date_col='date',
        seasonality=[7, 365.25],
        seasonality_fs_order=[2, 5],
        estimator='stan-map',
        n_bootstrap_draws=1e4,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(test_df, decompose=True)

    expected_columns = [
        'date', 'prediction_5', 'prediction', 'prediction_95', 'trend_5',
        'trend', 'trend_95', 'seasonality_7_5', 'seasonality_7',
        'seasonality_7_95', 'seasonality_365.25_5', 'seasonality_365.25',
        'seasonality_365.25_95'
    ]
    expected_shape = (364, len(expected_columns))
    expected_num_parameters = 6

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
示例#3
0
def test_ktrlite_level_segments(make_daily_data, level_segments):
    train_df, test_df, coef = make_daily_data

    ktrlite = KTRLite(
        response_col='response',
        date_col='date',
        level_segments=level_segments,
        estimator='stan-map',
        n_bootstrap_draws=-1,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(test_df)

    expected_columns = ['date', 'prediction']
    expected_shape = (364, len(expected_columns))
    expected_num_parameters = 4

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
    knots_df = ktrlite.get_level_knots()
    levels_df = ktrlite.get_levels()
    assert knots_df.shape[0] in [level_segments + 1, level_segments + 2]
    assert levels_df.shape[0] == ktrlite.get_training_meta()['num_of_obs']
示例#4
0
def test_ktrlite_level_knot_distance(make_daily_data, level_knot_distance):
    train_df, test_df, coef = make_daily_data

    ktrlite = KTRLite(
        response_col='response',
        date_col='date',
        level_knot_distance=level_knot_distance,
        estimator='stan-map',
        n_bootstrap_draws=1e4,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(test_df)

    expected_columns = ['date', 'prediction_5', 'prediction', 'prediction_95']
    expected_shape = (364, len(expected_columns))
    expected_num_parameters = 4

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
示例#5
0
def test_ktrlite_hourly_data(ca_hourly_electricity_data):
    train_df, test_df = ca_hourly_electricity_data

    ktrlite = KTRLite(
        response_col='SDGE',
        date_col='Dates',
        seasonality=[24, 7, 365.25],
        seasonality_fs_order=[3, 3, 5],
        estimator='stan-map',
        n_bootstrap_draws=-1,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(train_df)

    expected_columns = ['Dates', 'prediction']
    expected_shape = (train_df.shape[0], len(expected_columns))
    expected_num_parameters = 6

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
    smape_val = smape(train_df['SDGE'].values, predict_df['prediction'].values)
    assert smape_val <= SMAPE_TOLERANCE
示例#6
0
def test_ktrlite_seas_segments(make_daily_data, seas_segments):
    train_df, test_df, coef = make_daily_data

    ktrlite = KTRLite(
        response_col='response',
        date_col='date',
        seasonality=[7, 365.25],
        seasonality_fs_order=[2, 5],
        level_segments=10,
        seasonality_segments=seas_segments,
        estimator='stan-map',
        n_bootstrap_draws=-1,
    )

    ktrlite.fit(train_df)
    predict_df = ktrlite.predict(test_df)

    expected_columns = ['date', 'prediction']
    expected_shape = (364, len(expected_columns))
    expected_num_parameters = 6

    assert predict_df.shape == expected_shape
    assert predict_df.columns.tolist() == expected_columns
    assert len(ktrlite._posterior_samples) == expected_num_parameters
示例#7
0
def test_backtester_ktr_and_missing_val(make_daily_data, missing_flag):
    train_df, test_df, _ = make_daily_data
    df = pd.concat([train_df, test_df], axis=0, ignore_index=True)
    if missing_flag:
        # create a missing value in testing
        df.loc[df.shape[0] - 3, 'response'] = np.nan
        # create a missing value in training
        df.loc[10, 'response'] = np.nan

    ktr = KTRLite(date_col='date',
                  response_col='response',
                  seasonality=[365.25],
                  verbose=False)

    bt = BackTester(
        model=ktr,
        df=df,
        n_splits=3,
        incremental_len=100,
        forecast_len=20,
    )

    bt.fit_predict()
    predicted_df = bt.get_predicted_df()
    assert set(predicted_df['split_key'].tolist()) == {0, 1, 2}

    bt_score_df = bt.score(include_training_metrics=False)
    num_testing_metrics = 6
    expected_shape = (num_testing_metrics, 3)
    assert bt_score_df.shape == expected_shape

    testing_metrics_df = bt_score_df[~bt_score_df['is_training_metric']]
    # rmsse is the only one not working for null values; otherwise, they should have valid values
    if missing_flag:
        metric_vals = testing_metrics_df.loc[
            testing_metrics_df['metric_name'] != 'rmsse',
            'metric_values'].values
        assert np.all(~np.isnan(metric_vals))
        missing_metric_val = testing_metrics_df.loc[
            testing_metrics_df['metric_name'] == 'rmsse',
            'metric_values'].values
        assert np.all(np.isnan(missing_metric_val))
    else:
        metric_vals = testing_metrics_df['metric_values'].values
        assert np.all(~np.isnan(metric_vals))