class TestTransform: """Tests for the SetValueTransformer.transform method.""" def expected_df_1(): """Expected output of test_value_set_in_transform.""" df = d.create_df_2() df["a"] = "a" df["b"] = "a" return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=SetValueTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_7() x = SetValueTransformer(columns=["a", "b"], value=1) expected_call_args = {0: {"args": (d.create_df_7(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_1()) + h.index_preserved_params(d.create_df_2(), expected_df_1()), ) def test_value_set_in_transform(self, df, expected): """Test that transform sets the value as expected.""" x = SetValueTransformer(columns=["a", "b"], value="a") df_transformed = x.transform(df) h.assert_equal_dispatch( actual=df_transformed, expected=expected, msg="incorrect value after SetValueTransformer transform", )
def test_super_columns_check_called(self, mocker): """Test that BaseTransformer.columns_check called.""" df = d.create_df_2() x = ArbitraryImputer(impute_value=-1, columns="a") expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args): x.transform(df)
def test_super_transform_called(self, mocker): """Test that BaseImputer.transform called.""" df = d.create_df_2() x = ArbitraryImputer(impute_value=1, columns="a") expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.imputers.BaseImputer, "transform", expected_call_args): x.transform(df)
def test_super_transform_called(self, mocker): """Test that BaseImputer.transform called.""" df = d.create_df_2() x = BaseImputer() x.columns = [] x.impute_values_ = {} expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
def test_check_numeric_columns_call(self, mocker): """Test the call to ScalingTransformer.check_numeric_columns.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.numeric.ScalingTransformer, "check_numeric_columns", expected_call_args, return_value=d.create_df_2(), ): x.fit(df)
def test_super_fit_call(self, mocker): """Test the call to BaseTransformer.fit.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") expected_call_args = { 0: { "args": (d.create_df_2(), None), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "fit", expected_call_args): x.fit(df)
def expected_df_1(): """Expected output of test_value_set_in_transform.""" df = d.create_df_2() df["a"] = "a" df["b"] = "a" return df
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") x.fit(df) expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_2(), ): x.transform(df)
def test_return_self(self): """Test that fit returns self.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") x_fitted = x.fit(df) assert ( x_fitted is x ), "return value from ScalingTransformer.fit not as expected (self)."
def test_adjust_col_not_numeric_error(self): """Test that an exception is raised if the adjust_column is not numeric.""" df = d.create_df_2() mapping = {"b": {"a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6}} x = CrossColumnMultiplyTransformer(mappings=mapping, adjust_column="c") with pytest.raises(TypeError, match="variable c must have numeric dtype."): x.transform(df)
def test_nulls_in_X_error(self): """Test that an exception is raised if X has nulls in column to be fit on.""" df = d.create_df_2() x = OneHotEncodingTransformer(columns=["b", "c"]) with pytest.raises( ValueError, match="column b has nulls - replace before proceeding" ): x.fit(df)
def test_X_returned(self): """Test that the input X is returned from the method.""" df = d.create_df_2() x = ScalingTransformer(columns=["a"], scaler="standard") df_returned = x.check_numeric_columns(df) h.assert_equal_dispatch( expected=df, actual=df_returned, msg="unexepcted object returned from check_numeric_columns", )
def test_exception_raised(self): """Test an exception is raised if non numeric columns are passed in X.""" df = d.create_df_2() x = ScalingTransformer(columns=["a", "b", "c"], scaler="standard") with pytest.raises( TypeError, match= r"""The following columns are not numeric in X; \['b', 'c'\]""", ): x.check_numeric_columns(df)
def test_non_numeric_column_error_1(self): """Test that transform will raise an error if a column to transform has nulls.""" df_train = d.create_df_1() df_test = d.create_df_2() x = OneHotEncodingTransformer(columns=["b"]) x.fit(df_train) with pytest.raises( ValueError, match="column b has nulls - replace before proceeding" ): x.transform(df_test)
def test_impute_values_set(self, mocker): """Test that impute_values_ are set with imput_value in transform.""" df = d.create_df_2() x = ArbitraryImputer(impute_value=1, columns=["a", "b", "c"]) # mock BaseImputer.transform so it does not run mocker.patch.object(tubular.imputers.BaseImputer, "transform", return_value=1234) x.transform(df) assert x.impute_values_ == { "a": 1, "b": 1, "c": 1, }, "impute_values_ not set with imput_value in transform"
class TestTransform: """Tests for BaseImputer.transform.""" def expected_df_1(): """Expected output of test_expected_output_1.""" df = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], "b": ["a", "b", "c", "d", "e", "f", np.NaN], "c": ["a", "b", "c", "d", "e", "f", np.NaN], }) df["c"] = df["c"].astype("category") return df def expected_df_2(): """Expected output of test_expected_output_2.""" df2 = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g"], "c": ["a", "b", "c", "d", "e", "f", np.NaN], }) df2["c"] = df2["c"].astype("category") return df2 def expected_df_3(): """Expected output of test_expected_output_3.""" df3 = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g"], "c": ["a", "b", "c", "d", "e", "f", "f"], }) df3["c"] = df3["c"].astype("category") return df3 def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=BaseImputer.transform, expected_arguments=["self", "X"]) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_1()) + h.index_preserved_params(d.create_df_2(), expected_df_1()), ) def test_expected_output_1(self, df, expected): """Test that transform is giving the expected output when applied to float column.""" x1 = BaseImputer() x1.columns = ["a"] x1.impute_values_ = {"a": 7} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col a", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_2()) + h.index_preserved_params(d.create_df_2(), expected_df_2()), ) def test_expected_output_2(self, df, expected): """Test that transform is giving the expected output when applied to object column.""" x1 = BaseImputer() x1.columns = ["b"] x1.impute_values_ = {"b": "g"} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col b", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_3()) + h.index_preserved_params(d.create_df_2(), expected_df_3()), ) def test_expected_output_3(self, df, expected): """Test that transform is giving the expected output when applied to object and categorical columns.""" x1 = BaseImputer() x1.columns = ["b", "c"] x1.impute_values_ = {"b": "g", "c": "f"} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col b, c", ) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = BaseImputer() x.columns = [] expected_call_args = { 0: { "args": (["impute_values_"], ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseImputer.transform called.""" df = d.create_df_2() x = BaseImputer() x.columns = [] x.impute_values_ = {} expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)