def test_error_new_variable_names_not_permitted():
    variables = ["Age", "Name"]
    with pytest.raises(ValueError):
        MathFeatures(
            variables=variables,
            func=["sum", "mean"],
            new_variables_names=[
                "sum_of_two_vars",
                "mean_of_two_vars",
                "another_alias",
            ],
        )

    with pytest.raises(ValueError):
        MathFeatures(
            variables=variables,
            func=["sum"],
            new_variables_names=["sum_of_two_vars", "mean_of_two_vars"],
        )

    with pytest.raises(ValueError):
        MathFeatures(
            variables=variables,
            func="sum",
            new_variables_names=["sum_of_two_vars", "mean_of_two_vars"],
        )
    with pytest.raises(ValueError):
        MathFeatures(
            variables=variables,
            func=["sum", "mean"],
            new_variables_names=["sum_of_two_vars", "sum_of_two_vars"],
        )
def test_aggregations_with_strings(df_vartypes):
    transformer = MathFeatures(
        variables=["Age", "Marks"],
        func=["sum", "prod", "mean", "std", "max", "min"])
    X = transformer.fit_transform(df_vartypes)

    ref = pd.DataFrame.from_dict({
        "Name": ["tom", "nick", "krish", "jack"],
        "City": ["London", "Manchester", "Liverpool", "Bristol"],
        "Age": [20, 21, 19, 18],
        "Marks": [0.9, 0.8, 0.7, 0.6],
        "dob":
        pd.date_range("2020-02-24", periods=4, freq="T"),
        "sum_Age_Marks": [20.9, 21.8, 19.7, 18.6],
        "prod_Age_Marks": [18.0, 16.8, 13.299999999999999, 10.799999999999999],
        "mean_Age_Marks": [10.45, 10.9, 9.85, 9.3],
        "std_Age_Marks": [
            13.505739520663058,
            14.28355697996826,
            12.94005409571382,
            12.303657992645928,
        ],
        "max_Age_Marks": [20.0, 21.0, 19.0, 18.0],
        "min_Age_Marks": [0.9, 0.8, 0.7, 0.6],
    })

    # transform params
    pd.testing.assert_frame_equal(X, ref)
def test_no_error_when_null_values_in_variable(df_vartypes):

    df_na = df_vartypes.copy()
    df_na.loc[1, "Age"] = np.nan

    transformer = MathFeatures(
        variables=["Age", "Marks"],
        func=["sum", "mean"],
        missing_values="ignore",
    )

    X = transformer.fit_transform(df_na)

    ref = pd.DataFrame.from_dict({
        "Name": ["tom", "nick", "krish", "jack"],
        "City": ["London", "Manchester", "Liverpool", "Bristol"],
        "Age": [20, np.nan, 19, 18],
        "Marks": [0.9, 0.8, 0.7, 0.6],
        "dob":
        pd.date_range("2020-02-24", periods=4, freq="T"),
        "sum_Age_Marks": [20.9, 0.8, 19.7, 18.6],
        "mean_Age_Marks": [10.45, 0.8, 9.85, 9.3],
    })
    # transform params
    pd.testing.assert_frame_equal(X, ref)
def test_variable_names_when_df_cols_are_integers(df_numeric_columns):
    transformer = MathFeatures(
        variables=[2, 3], func=["sum", "prod", "mean", "std", "max", "min"])

    X = transformer.fit_transform(df_numeric_columns)

    ref = pd.DataFrame.from_dict({
        0: ["tom", "nick", "krish", "jack"],
        1: ["London", "Manchester", "Liverpool", "Bristol"],
        2: [20, 21, 19, 18],
        3: [0.9, 0.8, 0.7, 0.6],
        4:
        pd.date_range("2020-02-24", periods=4, freq="T"),
        "sum_2_3": [20.9, 21.8, 19.7, 18.6],
        "prod_2_3": [18.0, 16.8, 13.299999999999999, 10.799999999999999],
        "mean_2_3": [10.45, 10.9, 9.85, 9.3],
        "std_2_3": [
            13.505739520663058,
            14.28355697996826,
            12.94005409571382,
            12.303657992645928,
        ],
        "max_2_3": [20.0, 21.0, 19.0, 18.0],
        "min_2_3": [0.9, 0.8, 0.7, 0.6],
    })

    pd.testing.assert_frame_equal(X, ref)
def test_get_feature_names_out(_varnames, _drop, df_vartypes):

    # set up transformer
    transformer = MathFeatures(
        variables=["Age", "Marks"],
        func=["sum", "mean"],
        new_variables_names=_varnames,
        drop_original=_drop,
    )

    # fit transformer
    X = transformer.fit_transform(df_vartypes)

    # assert functionality
    assert list(
        X.columns) == transformer.get_feature_names_out(input_features=None)
    assert list(
        X.columns) == transformer.get_feature_names_out(input_features=False)

    if _varnames is not None:
        assert _varnames == transformer.get_feature_names_out(
            input_features=True)
    else:
        assert ["sum_Age_Marks", "mean_Age_Marks"
                ] == transformer.get_feature_names_out(input_features=True)
def test_user_enters_two_operations(df_vartypes):
    transformer = MathFeatures(variables=["Age", "Marks"],
                               func=["sum", np.mean])

    X = transformer.fit_transform(df_vartypes)

    ref = pd.DataFrame.from_dict({
        "Name": ["tom", "nick", "krish", "jack"],
        "City": ["London", "Manchester", "Liverpool", "Bristol"],
        "Age": [20, 21, 19, 18],
        "Marks": [0.9, 0.8, 0.7, 0.6],
        "dob":
        pd.date_range("2020-02-24", periods=4, freq="T"),
        "sum_Age_Marks": [20.9, 21.8, 19.7, 18.6],
        "mean_Age_Marks": [10.45, 10.9, 9.85, 9.3],
    })

    pd.testing.assert_frame_equal(X, ref)
def test_error_when_null_values_in_variable(df_vartypes):

    df_na = df_vartypes.copy()
    df_na.loc[1, "Age"] = np.nan

    math_combinator = MathFeatures(
        variables=["Age", "Marks"],
        func=["sum", "mean"],
        missing_values="raise",
    )

    with pytest.raises(ValueError):
        math_combinator.fit(df_na)

    math_combinator.fit(df_vartypes)
    with pytest.raises(ValueError):
        math_combinator.transform(df_na)
def test_get_feature_names_out_raises_error_when_wrong_param(
        _input_features, df_vartypes):
    transformer = MathFeatures(
        variables=["Age", "Marks"],
        func=["sum", "mean"],
    )
    transformer.fit(df_vartypes)

    with pytest.raises(ValueError):
        transformer.get_feature_names_out(input_features=_input_features)
def test_error_if_new_variable_names_not_permitted(_variables):
    with pytest.raises(ValueError):
        MathFeatures(variables=["Age", "Name"],
                     func=["sum"],
                     new_variables_names=_variables)
Exemplo n.º 10
0
def test_error_if_func_is_dictionary():
    with pytest.raises(NotImplementedError):
        MathFeatures(variables=["Age", "Name"], func={"A": "sum", "B": "mean"})
Exemplo n.º 11
0
def test_error_when_variables_not_permitted(_variables):
    with pytest.raises(ValueError):
        MathFeatures(variables=_variables, func=["sum", "mean"])
Exemplo n.º 12
0
def test_error_when_required_params_not_entered():
    with pytest.raises(TypeError):
        MathFeatures(func=["mean", "sum"])
    with pytest.raises(TypeError):
        MathFeatures(variables=["vara", "varb"])
Exemplo n.º 13
0
import pytest
from sklearn.utils.estimator_checks import check_estimator

from feature_engine.creation import (
    CyclicalFeatures,
    MathFeatures,
    RelativeFeatures,
    # FIXME: remove in version 1.4
    CombineWithReferenceFeature,
    CyclicalTransformer,
    MathematicalCombination,
)
from tests.estimator_checks.estimator_checks import check_feature_engine_estimator

_estimators = [
    MathFeatures(variables=["0", "1"], func="mean", missing_values="ignore"),
    RelativeFeatures(variables=["0", "1"],
                     reference=["0"],
                     func=["add"],
                     missing_values="ignore"),
    CyclicalFeatures(),
    # FIXME: remove in version 1.4
    MathematicalCombination(variables_to_combine=["0", "1"]),
    CombineWithReferenceFeature(variables_to_combine=["0"],
                                reference_variables=["1"]),
    CyclicalTransformer(),
]


@pytest.mark.parametrize("estimator", _estimators)
def test_check_estimator_from_sklearn(estimator):