Пример #1
0
def test_extract_datetime_features_with_default_options(
        df_datetime, df_datetime_transformed):
    transformer = DatetimeFeatures()
    X = transformer.fit_transform(df_datetime)
    pd.testing.assert_frame_equal(
        X,
        df_datetime_transformed[
            vars_non_dt +
            [var + feat for var in vars_dt for feat in feat_names_default]],
    )
Пример #2
0
def test_extract_features_from_different_timezones(df_datetime,
                                                   df_datetime_transformed):
    time_zones = [4, -1, 9, -7]
    tz_df = pd.DataFrame(
        {"time_obj": df_datetime["time_obj"].add(["+4", "-1", "+9", "-7"])})
    transformer = DatetimeFeatures(variables="time_obj",
                                   features_to_extract=["hour"],
                                   utc=True)
    X = transformer.fit_transform(tz_df)

    pd.testing.assert_frame_equal(
        X,
        df_datetime_transformed[["time_obj_hour"
                                 ]].apply(lambda x: x.subtract(time_zones)),
    )
    exp_err_msg = ("ValueError: variable(s) time_obj "
                   "could not be converted to datetime. Try setting utc=True")
    with pytest.raises(ValueError) as errinfo:
        assert DatetimeFeatures(variables="time_obj",
                                features_to_extract=["hour"],
                                utc=False).fit_transform(tz_df)
    assert str(errinfo.value) == exp_err_msg
Пример #3
0
def test_get_feature_names_out(df_datetime, df_datetime_transformed):
    # default features from all variables
    transformer = DatetimeFeatures()
    X = transformer.fit_transform(df_datetime)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=vars_dt) == [
        var + feat for var in vars_dt for feat in feat_names_default
    ]
    assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [
        "date_obj1" + feat for feat in feat_names_default
    ]

    # default features from 1 variable
    transformer = DatetimeFeatures(variables="date_obj1")
    X = transformer.fit_transform(df_datetime)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [
        "date_obj1" + feat for feat in feat_names_default
    ]

    # all features
    transformer = DatetimeFeatures(features_to_extract="all")
    X = transformer.fit_transform(df_datetime)
    assert list(X.columns) == transformer.get_feature_names_out()
    feat_names_all = [FEATURES_SUFFIXES[feat] for feat in FEATURES_SUPPORTED]
    assert transformer.get_feature_names_out(input_features=vars_dt) == [
        var + feat for var in vars_dt for feat in feat_names_all
    ]
    assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [
        "date_obj1" + feat for feat in feat_names_all
    ]

    # specified features
    transformer = DatetimeFeatures(features_to_extract=["semester", "week"])
    X = transformer.fit_transform(df_datetime)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=vars_dt) == [
        var + "_" + feat for var in vars_dt for feat in ["semester", "week"]
    ]
    assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [
        "date_obj1_" + feat for feat in ["semester", "week"]
    ]

    # features were extracted from index
    transformer = DatetimeFeatures(variables="index",
                                   features_to_extract=["semester", "week"])
    X = transformer.fit_transform(dates_idx_dt)
    # user passes something else than index as input_features
    with pytest.raises(ValueError):
        transformer.get_feature_names_out(input_features="not_index")
    with pytest.raises(ValueError):
        transformer.get_feature_names_out(
            input_features=["still", "not", "index"])
    # input_features is None
    assert list(X.columns) == transformer.get_feature_names_out()
    # input_features is index
    assert ["semester", "week"
            ] == transformer.get_feature_names_out(input_features="index")

    # when drop original is False
    transformer = DatetimeFeatures(drop_original=False)
    X = transformer.fit_transform(df_datetime)
    assert list(X.columns) == transformer.get_feature_names_out()
    assert transformer.get_feature_names_out(input_features=vars_dt) == [
        var + feat for var in vars_dt for feat in feat_names_default
    ]
    assert transformer.get_feature_names_out(input_features=["date_obj1"]) == [
        "date_obj1" + feat for feat in feat_names_default
    ]

    with pytest.raises(ValueError):
        # assert error when user passes a string instead of list
        transformer.get_feature_names_out(input_features="date_obj1")

    with pytest.raises(ValueError):
        # assert error when uses passes features that were not lagged
        transformer.get_feature_names_out(input_features=["color"])