def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformer.transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df)
def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
def test_one_hot_encoder_fit_called(self, mocker): """Test that fit calls OneHotEncoder.fit.""" expected_keyword_args = {"X": d.create_df_1()[["b"]], "y": None} df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") mocker.patch("sklearn.preprocessing.OneHotEncoder.fit") x.fit(df) assert ( sklearn.preprocessing.OneHotEncoder.fit.call_count == 1 ), f"Not enough calls to OneHotEncoder.fit -\n Expected: 1\n Actual: {sklearn.preprocessing.OneHotEncoder.fit.call_count}" call_args = sklearn.preprocessing.OneHotEncoder.fit.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for OneHotEncoder.fit in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in OneHotEncoder.fit call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in OneHotEncoder.fit call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_base_nominal_transformer_fit_called(self, mocker): """Test that fit calls BaseNominalTransformer.fit.""" expected_keyword_args = {"X": d.create_df_1(), "y": None} df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") mocker.patch("tubular.nominal.BaseNominalTransformer.fit") x.fit(df) assert ( tubular.nominal.BaseNominalTransformer.fit.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.fit -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.fit.call_count}" call_args = tubular.nominal.BaseNominalTransformer.fit.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected=expected_keyword_args, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.fit in OneHotEncodingTransformer.init", ) assert ( len(call_pos_args) == 1 ), f"Unepxected number of positional args in BaseNominalTransformer.fit call -\n Expected: 1\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self) in BaseNominalTransformer.fit call -\n Expected: self\n Actual: {call_pos_args[0]}"
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = NullIndicator(columns="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_fit_not_changing_data(self): """Test fit does not change X.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) h.assert_equal_dispatch( expected=d.create_df_1(), actual=df, msg="Check X not changing during fit", )
def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "BaseMappingTransformer.transform has changed self.mappings unexpectedly", )
def test_check_is_fitted_call(self, mocker): """Test the call to check_is_fitted.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformMixin() x.columns = ["a", "b"] x.mappings = mapping expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df)
def test_categories_not_modified(self): """Test that the categories from fit are not changed in transform.""" df_train = d.create_df_1() df_test = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x2 = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x.fit(df_train) x2.fit(df_train) x.transform(df_test) h.assert_equal_dispatch( expected=list(x2.categories_[0]), actual=list(x.categories_[0]), msg="categories_ (index 0) modified during transform", ) h.assert_equal_dispatch( expected=list(x2.categories_[1]), actual=list(x.categories_[1]), msg="categories_ (index 1) modified during transform", )
def test_exception_raised(self): """Test an exception is raised if non-mappable rows are present in X.""" df = d.create_df_1() x = BaseNominalTransformer() x.columns = ["a", "b"] x.mappings = { "a": { 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7 }, "b": { "a": 1, "c": 2, "d": 3, "e": 4, "f": 5 }, } with pytest.raises( ValueError, match= "nulls would be introduced into column b from levels not present in mapping", ): x.check_mappable_rows(df)
def test_columns_check_call(self, mocker): """Test the first call to BaseTransformer columns_check.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args ): x.transform(df)
def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly", )
def test_columns_set_or_check_called(self, mocker): """Test that fit calls BaseNominalTransformer.columns_set_or_check.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.nominal.BaseNominalTransformer, "columns_set_or_check", expected_call_args, ): x.fit(df)
def test_base_nominal_transformer_transform_called(self, mocker): """Test that BaseNominalTransformer.transform called.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) mocker.patch( "tubular.nominal.BaseNominalTransformer.transform", return_value=d.create_df_1(), ) x.transform(df) assert ( tubular.nominal.BaseNominalTransformer.transform.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.transform -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.transform.call_count}" call_args = tubular.nominal.BaseNominalTransformer.transform.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected={}, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.transform in OneHotEncodingTransformer.init", ) expected_pos_args = (x, d.create_df_1()) assert ( len(call_pos_args) == 2 ), f"Unepxected number of positional args in BaseNominalTransformer.transform call -\n Expected: 2\n Actual: {len(call_pos_args)}" h.assert_frame_equal_msg( expected=expected_pos_args[1], actual=call_pos_args[1], msg_tag="X positional arg in BaseNominalTransformer.transform call", ) assert ( expected_pos_args[0] == call_pos_args[0] ), "self positional arg in BaseNominalTransformer.transform call"
def test_y_multi_col_df_error(self): """Test an error is raised if y is passed as a multi column pd.DataFrame.""" df = d.create_df_1() x = BaseTransformer(columns="a") with pytest.raises(ValueError): x.fit(X=df, y=df)
def test_fit_returns_self(self): """Test fit returns self?""" df = d.create_df_1() x = ModeImputer(columns="a") x_fitted = x.fit(df) assert x_fitted is x, "Returned value from ModeImputer.fit not as expected."
def test_columns_not_in_X_error(self): """Test an error is raised if self.columns contains a value not in X.""" df = d.create_df_1() x = BaseTransformer(columns=["a", "z"]) with pytest.raises(ValueError): x.columns_check(X=df)
def test_non_pd_type_error(self): """Test an error is raised if y is not passed as a pd.DataFrame or pd.Series.""" df = d.create_df_1() x = BaseTransformer(columns="a") with pytest.raises(ValueError): x.fit(X=df, y=[1, 2, 3, 4, 5, 6])
def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformer.transform.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.mapping.BaseMappingTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df)
def test_successful_usage(mocker): """Test an example of successful run of h.assert_function_call_count.""" df = d.create_df_1() x = tubular.base.BaseTransformer(columns="a") with h.assert_function_call_count(mocker, tubular.base.BaseTransformer, "columns_set_or_check", 1): x.fit(X=df)
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns="a") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df)
def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformMixin.transform.""" df = d.create_df_1() mapping = { "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}, "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, } x = MappingTransformer(mappings=mapping) spy = mocker.spy(tubular.mapping.BaseMappingTransformMixin, "transform") x.transform(df) assert ( spy.call_count == 1 ), "unexpected number of calls to BaseMappingTransformMixin.transform" call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] expected_kwargs = {} assert ( call_kwargs == expected_kwargs ), "unexpected kwargs in BaseMappingTransformMixin.transform call" expected_pos_args = (x, d.create_df_1()) assert ( expected_pos_args[0] == call_pos_args[0] ), "unexpected 1st positional arg in BaseMappingTransformMixin.transform call" h.assert_equal_dispatch( expected_pos_args[1], call_pos_args[1], "unexpected 2ns positional arg in BaseMappingTransformMixin.transform call", )
def test_columns_none_error(self): """Test an error is raised if self.columns is None.""" df = d.create_df_1() x = BaseTransformer(columns=None) assert x.columns is None, f"self.columns should be None but got {x.columns}" with pytest.raises(ValueError): x.columns_check(X=df)
def test_fit_returns_self(self): """Test fit returns self?""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x_fitted = x.fit(df) assert ( x_fitted is x ), "Returned value from OneHotEncodingTransformer.fit not as expected."
def test_adjust_col_not_in_x_error(self): """Test that an exception is raised if the adjust_column is not present in the dataframe.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c") with pytest.raises(ValueError, match="variable c is not in X"): x.transform(df)
def test_adjust_col_not_in_x_error(self): """Test that an exception is raised if the adjust_column is not present in the dataframe.""" df = d.create_df_1() mapping = {"b": {"a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6}} x = CrossColumnMultiplyTransformer(mappings=mapping, adjust_column="c") with pytest.raises(ValueError, match="variable c is not in X"): x.transform(df)
def test_fit_returns_self(self): """Test fit returns self?""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x_fitted = x.fit(df) assert ( x_fitted is x ), "Returned value from NominalToIntegerTransformer.fit not as expected."
def test_columns_str_error(self): """Test an error is raised if self.columns is not a list.""" df = d.create_df_1() x = BaseTransformer(columns=None) x.columns = "a" with pytest.raises(ValueError): x.columns_check(X=df)