Exemplo n.º 1
0
    def test_weight_column_not_in_X_error(self):
        """Test that an exception is raised if weight is not in X."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"], weight="aaaa")

        with pytest.raises(ValueError, match="weight aaaa not in X"):

            x.fit(df)
Exemplo n.º 2
0
    def test_fit_returns_self(self):
        """Test fit returns self?"""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x_fitted = x.fit(df)

        assert (
            x_fitted is x
        ), "Returned value from GroupRareLevelsTransformer.fit not as expected."
Exemplo n.º 3
0
    def test_fit_not_changing_data(self):
        """Test fit does not change X."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        h.assert_equal_dispatch(
            expected=d.create_df_5(),
            actual=df,
            msg="Check X not changing during fit",
        )
Exemplo n.º 4
0
    def test_expected_output_no_weight(self, df, expected):
        """Test that the output is expected from transform."""

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]}

        df_transformed = x.transform(df)

        h.assert_frame_equal_msg(
            actual=df_transformed,
            expected=expected,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )
Exemplo n.º 5
0
    def test_cut_off_percent_negative_error(self):
        """Test that an exception is raised if cut_off_percent is negative."""

        with pytest.raises(ValueError,
                           match="cut_off_percent must be > 0 and < 1"):

            GroupRareLevelsTransformer(cut_off_percent=-1.0)
Exemplo n.º 6
0
    def test_record_rare_levels_not_str_error(self):
        """Test that an exception is raised if record_rare_levels is not a bool."""

        with pytest.raises(ValueError,
                           match="record_rare_levels must be a bool"):

            GroupRareLevelsTransformer(record_rare_levels=2)
Exemplo n.º 7
0
    def test_cut_off_percent_gt_one_error(self):
        """Test that an exception is raised if cut_off_percent is greater than 1."""

        with pytest.raises(ValueError,
                           match="cut_off_percent must be > 0 and < 1"):

            GroupRareLevelsTransformer(cut_off_percent=2.0)
Exemplo n.º 8
0
    def test_cut_off_percent_not_float_error(self):
        """Test that an exception is raised if cut_off_percent is not an float."""

        with pytest.raises(ValueError,
                           match="cut_off_percent must be a float"):

            GroupRareLevelsTransformer(cut_off_percent="a")
Exemplo n.º 9
0
    def test_weight_not_str_error(self):
        """Test that an exception is raised if weight is not a str, if supplied."""

        with pytest.raises(ValueError,
                           match="weight should be a single column"):

            GroupRareLevelsTransformer(weight=2)
Exemplo n.º 10
0
    def test_expected_output_no_weight_single_row_na(self):
        """test output from a single row transform with np.NaN value remains the same,
        the type is perserved if using existing dataframe, so need to create a new dataframe"""

        one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]})
        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        # set the mappging dict directly rather than fitting x on df so test works with decorators
        x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]}

        one_row_df_transformed = x.transform(one_row_df)

        h.assert_frame_equal_msg(
            actual=one_row_df_transformed,
            expected=one_row_df,
            msg_tag="Unexpected values in GroupRareLevelsTransformer.transform",
        )
Exemplo n.º 11
0
    def test_super_fit_called(self, mocker):
        """Test that fit calls BaseTransformer.fit."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        expected_call_args = {
            0: {
                "args": (d.create_df_5(), None),
                "kwargs": {}
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "fit", expected_call_args):

            x.fit(df)
Exemplo n.º 12
0
    def test_learnt_values_weight_2(self):
        """Test that the impute values learnt during fit, using a weight, are expected."""

        df = d.create_df_6()

        x = GroupRareLevelsTransformer(columns=["c"],
                                       cut_off_percent=0.2,
                                       weights="a")

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={"mapping_": {
                "c": ["f", "g"]
            }},
            msg="mapping_ attribute",
        )
Exemplo n.º 13
0
    def test_class_methods(self):
        """Test that GroupRareLevelsTransformer has fit and transform methods."""

        x = GroupRareLevelsTransformer()

        h.test_object_method(obj=x, expected_method="fit", msg="fit")

        h.test_object_method(obj=x,
                             expected_method="transform",
                             msg="transform")
Exemplo n.º 14
0
    def test_learnt_values_no_weight(self):
        """Test that the impute values learnt during fit, without using a weight, are expected."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2)

        x.fit(df)

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "mapping_": {
                    "b": ["a", np.NaN],
                    "c": ["a", "c", "e"]
                }
            },
            msg="mapping_ attribute",
        )
Exemplo n.º 15
0
    def test_learnt_values_not_modified(self):
        """Test that the mapping_ from fit are not changed in transform."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x2.fit(df)

        x2.transform(df)

        h.assert_equal_dispatch(
            expected=x.mapping_,
            actual=x2.mapping_,
            msg="Non rare levels not changed in transform",
        )
Exemplo n.º 16
0
    def test_super_init_called(self, mocker):
        """Test that init calls BaseTransformer.init."""

        expected_call_args = {
            0: {
                "args": (),
                "kwargs": {
                    "columns": None,
                    "verbose": True,
                    "copy": True
                }
            }
        }

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "__init__", expected_call_args):

            GroupRareLevelsTransformer(columns=None, verbose=True, copy=True)
Exemplo n.º 17
0
    def test_values_passed_in_init_set_to_attribute(self):
        """Test that the values passed in init are saved in an attribute of the same name."""

        x = GroupRareLevelsTransformer(
            cut_off_percent=0.05,
            weight="aaa",
            rare_level_name="bbb",
            record_rare_levels=False,
        )

        h.test_object_attributes(
            obj=x,
            expected_attributes={
                "cut_off_percent": 0.05,
                "weight": "aaa",
                "rare_level_name": "bbb",
                "record_rare_levels": False,
            },
            msg="Attributes for GroupRareLevelsTransformer set in init",
        )
Exemplo n.º 18
0
    def test_check_is_fitted_called(self, mocker):
        """Test that BaseTransformer check_is_fitted called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}}

        with h.assert_function_call(mocker, tubular.base.BaseTransformer,
                                    "check_is_fitted", expected_call_args):

            x.transform(df)
Exemplo n.º 19
0
    def test_super_transform_called(self, mocker):
        """Test that BaseTransformer.transform called."""

        df = d.create_df_5()

        x = GroupRareLevelsTransformer(columns=["a", "b", "c"])

        x.fit(df)

        expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}}

        with h.assert_function_call(
                mocker,
                tubular.base.BaseTransformer,
                "transform",
                expected_call_args,
                return_value=d.create_df_5(),
        ):

            x.transform(df)
Exemplo n.º 20
0
    def test_rare_level_name_not_str_error(self):
        """Test that an exception is raised if rare_level_name is not a str."""

        with pytest.raises(ValueError, match="rare_level_name must be a str"):

            GroupRareLevelsTransformer(rare_level_name=2)
Exemplo n.º 21
0
    def test_inheritance(self):
        """Test that NominalToIntegerTransformer inherits from BaseNominalTransformer."""

        x = GroupRareLevelsTransformer()

        h.assert_inheritance(x, tubular.nominal.BaseNominalTransformer)