def test_one_hot_encoder_fit_called(self, mocker): """Test that fit calls OneHotEncoder.fit.""" expected_keyword_args = {"X": d.create_df_1()[["b"]], "y": None} df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") mocker.patch("sklearn.preprocessing.OneHotEncoder.fit") x.fit(df) assert ( sklearn.preprocessing.OneHotEncoder.fit.call_count == 1 ), f"Not enough calls to OneHotEncoder.fit -\n Expected: 1\n Actual: {sklearn.preprocessing.OneHotEncoder.fit.call_count}" call_args = sklearn.preprocessing.OneHotEncoder.fit.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for OneHotEncoder.fit in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in OneHotEncoder.fit call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in OneHotEncoder.fit call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_super_fit_call(self, mocker): """Test the call to CappingTransformer.fit.""" spy = mocker.spy(tubular.capping.CappingTransformer, "fit") df = d.create_df_9() x = OutOfRangeNullTransformer(quantiles={ "a": [0.1, 1], "b": [0.5, None] }, weights_column="c") x.fit(df) assert (spy.call_count == 1 ), "unexpected number of calls to CappingTransformer.fit" call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] assert call_pos_args == ( x, ), "unexpected positional args in CappingTransformer.fit call" expected_kwargs = {"X": d.create_df_9(), "y": None} h.assert_equal_dispatch( expected=expected_kwargs, actual=call_kwargs, msg="unexpected kwargs in CappingTransformer.fit call", )
def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly", )
def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "BaseMappingTransformer.transform has changed self.mappings unexpectedly", )
def test_base_nominal_transformer_fit_called(self, mocker): """Test that fit calls BaseNominalTransformer.fit.""" expected_keyword_args = {"X": d.create_df_1(), "y": None} df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") mocker.patch("tubular.nominal.BaseNominalTransformer.fit") x.fit(df) assert ( tubular.nominal.BaseNominalTransformer.fit.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.fit -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.fit.call_count}" call_args = tubular.nominal.BaseNominalTransformer.fit.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.fit in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in BaseNominalTransformer.fit call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in BaseNominalTransformer.fit call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_one_hot_encoder_init_called(self, mocker): """Test that init calls OneHotEncoder.init. Again not using h.assert_function_call for this as it does not handle self being passed to OneHotEncoder.init """ expected_keyword_args = {"sparse": False, "handle_unknown": "ignore"} mocker.patch("sklearn.preprocessing.OneHotEncoder.__init__") x = OneHotEncodingTransformer( columns=None, verbose=True, copy=True, separator="x", drop_original=True ) assert ( sklearn.preprocessing.OneHotEncoder.__init__.call_count == 1 ), f"Not enough calls to OneHotEncoder.__init__ -\n Expected: 1\n Actual: {sklearn.preprocessing.OneHotEncoder.__init__.call_count}" call_args = sklearn.preprocessing.OneHotEncoder.__init__.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for OneHotEncoder.__init__ in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in OneHotEncoder.__init__ call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in OneHotEncoder.__init__ call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_X_returned(self, df, expected): """Test that X is returned from transform.""" mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check X returned from transform", )
def test_scaler_transform_call(self, mocker, scaler, scaler_type_str): """Test that the call to the scaler.transform method.""" df = d.create_df_3() x = ScalingTransformer(columns=["b", "c"], scaler=scaler, scaler_kwargs={"copy": True}) x.fit(df) mocked = mocker.patch( f"sklearn.preprocessing.{scaler_type_str}.transform", return_value=df[["b", "c"]], ) x.transform(df) assert mocked.call_count == 1, "unexpected number of calls to scaler fit" call_args = mocked.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] expected_positional_args = (df[["b", "c"]], ) h.assert_equal_dispatch( expected=expected_positional_args, actual=call_pos_args, msg= f"unexpected positional args in {scaler_type_str} transform call", ) assert (call_kwargs == {} ), f"unexpected kwargs in {scaler_type_str} transform call"
def test_output_from_scaler_transform_set_to_columns( self, mocker, scaler, scaler_type_str): """Test that the call to the scaler.transform method.""" df = d.create_df_3() x = ScalingTransformer(columns=["b", "c"], scaler=scaler, scaler_kwargs={"copy": True}) x.fit(df) scaler_transform_output = pd.DataFrame({ "b": [1, 2, 3, 4, 5, 6, 7], "c": [7, 6, 5, 4, 3, 2, 1] }) mocker.patch( f"sklearn.preprocessing.{scaler_type_str}.transform", return_value=scaler_transform_output, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=scaler_transform_output, actual=df_transformed[["b", "c"]], msg= f"output from {scaler_type_str} transform not assigned to columns", )
def test_categories_not_modified(self): """Test that the categories from fit are not changed in transform.""" df_train = d.create_df_1() df_test = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x2 = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x.fit(df_train) x2.fit(df_train) x.transform(df_test) h.assert_equal_dispatch( expected=list(x2.categories_[0]), actual=list(x.categories_[0]), msg="categories_ (index 0) modified during transform", ) h.assert_equal_dispatch( expected=list(x2.categories_[1]), actual=list(x.categories_[1]), msg="categories_ (index 1) modified during transform", )
def test_super_init_called(self, mocker): """Test that init calls BaseNominalTransformer.init. Note, not using h.assert_function_call for this as it does not handle self being passed to BaseNominalTransformer.init. """ expected_keyword_args = {"columns": None, "verbose": True, "copy": True} mocker.patch("tubular.nominal.BaseNominalTransformer.__init__") x = OneHotEncodingTransformer(columns=None, verbose=True, copy=True) assert ( tubular.nominal.BaseNominalTransformer.__init__.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.__init__ -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.__init__.call_count}" call_args = tubular.nominal.BaseNominalTransformer.__init__.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.__init__ in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in BaseNominalTransformer.__init__ call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in BaseNominalTransformer.__init__ call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_value_set_in_transform(self, df, expected): """Test that transform sets the value as expected.""" x = SetValueTransformer(columns=["a", "b"], value="a") df_transformed = x.transform(df) h.assert_equal_dispatch( actual=df_transformed, expected=expected, msg="incorrect value after SetValueTransformer transform", )
def test_X_returned(self, df, expected): """Test that X is returned from transform.""" x = BaseTransformer(columns="a", copy=True) df_transformed = x.transform(X=df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check X returned from transform", )
def test_null_indicator_columns_correct(self, df, expected): """Test that the created indicator column is correct - and unrelated columns are unchanged""" x = NullIndicator(columns=["b", "c"]) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check null indicator columns created correctly in transform.", )
def test_expected_output_1(self, df, expected): """Test that transform is giving the expected output when applied to float column.""" x1 = BaseImputer() x1.columns = ["a"] x1.impute_values_ = {"a": 7} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col a", )
def test_expected_output_3(self, df, expected): """Test that transform is giving the expected output when applied to object and categorical columns.""" x1 = BaseImputer() x1.columns = ["b", "c"] x1.impute_values_ = {"b": "g", "c": "f"} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col b, c", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_OrdinalEncoderTransformer_test_df() x = OrdinalEncoderTransformer(response_column="a", columns="b") x.fit(df) h.assert_equal_dispatch( expected=d.create_OrdinalEncoderTransformer_test_df(), actual=df, msg="Check X not changing during fit", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_columns_set_to_all_columns_when_none(self): """Test that X.columns are set to self.columns if self.columns is None when function called.""" df = d.create_df_1() x = BaseTransformer(columns=None) x.columns_set_or_check(X=df) h.assert_equal_dispatch( expected=list(df.columns.values), actual=x.columns, msg="x.columns set when None", )
def test_X_returned(self): """Test that the input X is returned from the method.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") df_returned = x.check_numeric_columns(df) h.assert_equal_dispatch( expected=df, actual=df_returned, msg="unexepcted object returned from check_numeric_columns", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) h.assert_equal_dispatch( expected=d.create_df_5(), actual=df, msg="Check X not changing during fit", )
def test_columns_none_get_cat_columns(self): """If self.columns is None then object and categorical columns are set as self.columns.""" df = d.create_df_4() x = BaseNominalTransformer() x.columns = None x.columns_set_or_check(df) h.assert_equal_dispatch(expected=["b", "c"], actual=x.columns, msg="nominal columns getting")
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_nulls_imputed_correctly(self, df, expected): """Test missing values are filled with the correct values.""" x = MedianImputer(columns=["a", "b", "c"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 3.5, "b": 5.0, "c": -1.5} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", )
def test_unseen_categories_encoded_as_all_zeroes(self, df_test, expected): """Test OneHotEncodingTransformer.transform encodes unseen categories correctly (all 0s).""" # transformer is fit on the whole dataset separately from the input df to work with the decorators df_train = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b", "c"], verbose=False) x.fit(df_train) df_transformed = x.transform(df_test) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="unseen category rows not encoded as 0s", )
def test_nulls_imputed_correctly_2(self, df, expected): """Test missing values are filled with the correct values - and unrelated columns are not changed.""" x = ModeImputer(columns=["a"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 1.0} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", )
def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg="CrossColumnMappingTransformer.transform has changed self.mappings unexpectedly", )
def test_original_columns_kept_when_specified(self): """Test OneHotEncodingTransformer.transform keeps original columns when specified.""" df = d.create_df_7() x = OneHotEncodingTransformer(drop_original=False) x.fit(df) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=list(set()), actual=list(set(["a", "b", "c"]) - set(df_transformed.columns)), msg="original columns not kept", )
def test_expected_output_4(self, df, expected): """Test that transform is giving the expected output when adding one and not dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=True, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", )