示例#1
0
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_5(),
            actual=df,
            msg="Check X not changing during fit",
        )
示例#2
0
    def test_super_fit_called(self, mocker):
        """Test that fit calls BaseTransformer.fit."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        expected_call_args = {
            0: {
                "args": (d.create_df_5(), None),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "fit", expected_call_args):

            x.fit(df)
示例#3
0
    def test_weight_column_not_in_X_error(self):
        """Test that an exception is raised if weight is not in X."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"], weight="aaaa")

        with pytest.raises(ValueError, match="weight aaaa not in X"):

            x.fit(df)
示例#4
0
    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_5(),
        ):

            x.transform(df)
示例#5
0
    def test_fit_returns_self(self):
        """Test fit returns self?"""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x_fitted = x.fit(df)

        assert (
            x_fitted is x
        ), "Returned value from GroupRareLevelsTransformer.fit not as expected."
示例#6
0
    def test_non_numeric_column_error(self):
        """Test that transform will raise an error if a column to transform is not numeric."""

        df = d.create_df_5()

        x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]})

        with pytest.raises(
            TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]"
        ):

            x.transform(df)
    def test_error_with_non_numeric_columns(self):
        """Test an exception is raised if transform is applied to non-numeric columns."""

        df = d.create_df_5()

        x = LogTransformer(columns=["a", "b", "c"])

        with pytest.raises(
                TypeError,
                match=
                r"The following columns are not numeric in X; \['b', 'c'\]"):

            x.transform(df)
示例#8
0
    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)
示例#9
0
    def test_learnt_values_no_weight(self):
        """Test that the impute values learnt during fit, without using a weight, are expected."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "mapping_": {
                    "b": ["a", np.NaN],
                    "c": ["a", "c", "e"]
                }
            },
            msg="mapping_ attribute",
        )
示例#10
0
    def test_learnt_values_not_modified(self):
        """Test that the mapping_ from fit are not changed in transform."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x2.fit(df)

        x2.transform(df)

        h.assert_equal_dispatch(
            expected=x.mapping_,
            actual=x2.mapping_,
            msg="Non rare levels not changed in transform",
        )
示例#11
0
class TestTransform(object):
    """Tests for the transform method on CrossColumnAddTransformer."""
    def expected_df_1():
        """Expected output from test_expected_output."""

        df = pd.DataFrame({
            "a": [2.1, 3.2, 4.3, 5.4, 6.5, 7.6],
            "b": ["a", "b", "c", "d", "e", "f"]
        })

        return df

    def expected_df_2():
        """Expected output from test_non_specified_values_unchanged."""

        df = pd.DataFrame({
            "a": [2.1, 3.2, 3, 4, 5, 6],
            "b": ["a", "b", "c", "d", "e", "f"]
        })

        return df

    def expected_df_3():
        """Expected output from test_multiple_mappings_expected_output."""

        df = pd.DataFrame({
            "a": [4.1, 5.1, 4.1, 4, 8, 10.2, 7, 8, 9, np.NaN],
            "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN],
            "c": ["a", "a", "c", "c", "e", "e", "f", "g", "h", np.NaN],
        })

        df["c"] = df["c"].astype("category")

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(
            func=CrossColumnAddTransformer.transform,
            expected_arguments=["self", "X"],
            expected_default_values=None,
        )

    def test_check_is_fitted_call(self, mocker):
        """Test the call to check_is_fitted."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        expected_call_args = {0: {"args": (["adjust_column"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_call(self, mocker):
        """Test the call to BaseMappingTransformer.transform."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_1(),
        ):

            x.transform(df)

    def test_adjust_col_not_in_x_error(self):
        """Test that an exception is raised if the adjust_column is not present in the dataframe."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c")

        with pytest.raises(ValueError, match="variable c is not in X"):

            x.transform(df)

    def test_adjust_col_not_numeric_error(self):
        """Test that an exception is raised if the adjust_column is not numeric."""

        df = d.create_df_2()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c")

        with pytest.raises(TypeError,
                           match="variable c must have numeric dtype."):

            x.transform(df)

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_1()) +
        h.index_preserved_params(d.create_df_1(), expected_df_1()),
    )
    def test_expected_output(self, df, expected):
        """Test that transform is giving the expected output."""

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_1(), expected_df_2()) +
        h.index_preserved_params(d.create_df_1(), expected_df_2()),
    )
    def test_non_specified_values_unchanged(self, df, expected):
        """Test that values not specified in mappings are left unchanged in transform."""

        mapping = {"b": {"a": 1.1, "b": 1.2}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_5(), expected_df_3()) +
        h.index_preserved_params(d.create_df_5(), expected_df_3()),
    )
    def test_multiple_mappings_expected_output(self, df, expected):
        """Test that mappings by multiple columns are both applied in transform"""

        mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}}

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="expected output from cross column add transformer",
        )

    def test_mappings_unchanged(self):
        """Test that mappings is unchanged in transform."""

        df = d.create_df_1()

        mapping = {
            "b": {
                "a": 1.1,
                "b": 1.2,
                "c": 1.3,
                "d": 1.4,
                "e": 1.5,
                "f": 1.6
            }
        }

        x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a")

        x.transform(df)

        h.assert_equal_dispatch(
            expected=mapping,
            actual=x.mappings,
            msg=
            "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly",
        )
示例#12
0
class TestTransform(object):
    """Tests for GroupRareLevelsTransformer.transform()."""
    def expected_df_1():
        """Expected output for test_expected_output_no_weight."""

        df = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, np.NaN]})

        df["b"] = pd.Series([
            "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN,
            np.NaN
        ])

        df["c"] = pd.Series(
            ["a", "a", "c", "c", "e", "e", "rare", "rare", "rare", "rare"],
            dtype=pd.CategoricalDtype(
                categories=["a", "c", "e", "f", "g", "h", "rare"],
                ordered=False),
        )

        return df

    def expected_df_2():
        """Expected output for test_expected_output_weight."""

        df = pd.DataFrame({
            "a": [2, 2, 2, 2, np.NaN, 2, 2, 2, 3, 3],
            "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN],
            "c": ["a", "b", "c", "d", "f", "f", "f", "g", "g", np.NaN],
        })

        df["c"] = df["c"].astype("category")

        df["b"] = pd.Series([
            "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN,
            np.NaN
        ])

        return df

    def test_arguments(self):
        """Test that transform has expected arguments."""

        h.test_function_arguments(func=GroupRareLevelsTransformer.transform,
                                  expected_arguments=["self", "X"])

    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)

    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_5(),
        ):

            x.transform(df)

    def test_learnt_values_not_modified(self):
        """Test that the mapping_ from fit are not changed in transform."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x2.fit(df)

        x2.transform(df)

        h.assert_equal_dispatch(
            expected=x.mapping_,
            actual=x2.mapping_,
            msg="Non rare levels not changed in transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_5(), expected_df_1()) +
        h.index_preserved_params(d.create_df_5(), expected_df_1()),
    )
    def test_expected_output_no_weight(self, df, expected):
        """Test that the output is expected from transform."""

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    def test_expected_output_no_weight_single_row_na(self):
        """test output from a single row transform with np.NaN value remains the same,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    def test_expected_output_no_weight_single_row_na_category_column(self):
        """test output from a single row transform with np.NaN value remains the same, when column is type category,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        one_row_df["c"] = one_row_df["c"].astype("category")

        # add rare as a category in dataframe
        one_row_df["c"].cat.add_categories("rare", inplace=True)

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )

    @pytest.mark.parametrize(
        "df, expected",
        h.row_by_row_params(d.create_df_6(), expected_df_2()) +
        h.index_preserved_params(d.create_df_6(), expected_df_2()),
    )
    def test_expected_output_weight(self, df, expected):
        """Test that the output is expected from transform, when weights are used."""

        x = GroupRareLevelsTransformer(columns=["b"],
                                       cut_off_percent=0.3,
                                       weight="a")

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag=
            "Unexpected values in GroupRareLevelsTransformer.transform (with weights)",
        )