class TestTransform(object): """Tests for NullIndicator.transform()""" def expected_df_1(): """Expected output for test_null_indicator_columns_correct.""" df = pd.DataFrame({ "a": [1, 2, np.nan, 4, np.nan, 6], "b": [np.nan, 5, 4, 3, 2, 1], "c": [3, 2, 1, 4, 5, 6], "b_nulls": [1, 0, 0, 0, 0, 0], "c_nulls": [0, 0, 0, 0, 0, 0], }) df[["b_nulls", "c_nulls"]] = df[["b_nulls", "c_nulls"]].astype("int32") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=NullIndicator.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = NullIndicator(columns="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_9(), expected_df_1()) + h.index_preserved_params(d.create_df_9(), expected_df_1()), ) def test_null_indicator_columns_correct(self, df, expected): """Test that the created indicator column is correct - and unrelated columns are unchanged""" x = NullIndicator(columns=["b", "c"]) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check null indicator columns created correctly in transform.", )
def test__check_dfs_passed_call(): """Test the call to _check_dfs_passed.""" df1 = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=[7, 8, 9]) df2 = pd.DataFrame({"a": [2, 3, 4], "b": [5, 6, 7]}, index=[7, 8, 9]) with mock.patch.object(tubular.testing.helpers, "_check_dfs_passed") as mocked: h.row_by_row_params(df1, df2) assert mocked.call_count == 1, "unexpected number of calls to _check_dfs_passed" call_args = mocked.call_args_list[0] assert call_args[1] == {}, "unexpected kwargs in _check_dfs_passed call" assert call_args[0] == ( df1, df2, ), "unexpected positional args in _check_dfs_passed call"
class TestTransform: """Tests for the SetValueTransformer.transform method.""" def expected_df_1(): """Expected output of test_value_set_in_transform.""" df = d.create_df_2() df["a"] = "a" df["b"] = "a" return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=SetValueTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_7() x = SetValueTransformer(columns=["a", "b"], value=1) expected_call_args = {0: {"args": (d.create_df_7(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_1()) + h.index_preserved_params(d.create_df_2(), expected_df_1()), ) def test_value_set_in_transform(self, df, expected): """Test that transform sets the value as expected.""" x = SetValueTransformer(columns=["a", "b"], value="a") df_transformed = x.transform(df) h.assert_equal_dispatch( actual=df_transformed, expected=expected, msg="incorrect value after SetValueTransformer transform", )
def test_returned_object(): """Test the function returns the expected output.""" df1_1 = pd.DataFrame({"a": [1], "b": [4]}, index=[7]) df1_2 = pd.DataFrame({"a": [2], "b": [5]}, index=[8]) df1_3 = pd.DataFrame({"a": [3], "b": [6]}, index=[9]) df2_1 = pd.DataFrame({"c": [10], "d": [13]}, index=[7]) df2_2 = pd.DataFrame({"c": [11], "d": [14]}, index=[8]) df2_3 = pd.DataFrame({"c": [12], "d": [15]}, index=[9]) df1 = pd.concat([df1_1, df1_2, df1_3], axis=0) df2 = pd.concat([df2_1, df2_2, df2_3], axis=0) expected_df_pairs = [(df1_1, df2_1), (df1_2, df2_2), (df1_3, df2_3), (df1, df2)] expected_ids = ["index 7", "index 8", "index 9", "all rows (3)"] results = h.row_by_row_params(df1, df2) assert (type(results) is list), "unexpected type for object returned from row_by_row_params" assert len(results) == len( expected_df_pairs ), "unexpected len of object returned from row_by_row_params" for i in range(len(expected_df_pairs)): assert (type(results[i]) is ParameterSet ), f"unexpected type for {i}th item in returned list" h.assert_equal_dispatch( results[i].values, expected_df_pairs[i], f"unexpected values for {i}th item in returned list", ) assert (results[i].marks == () ), f"unexpected marks for {i}th item in returned list" assert (results[i].id == expected_ids[i] ), f"unexpected id for {i}th item in returned list"
class TestTransform(object): """Tests for DateDiffLeapYearTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output_drop_cols_true.""" df = pd.DataFrame({"c": [ 26, 19, 0, 0, 0, -2, -3, 30, ]}) return df def expected_df_2(): """Expected output for test_expected_output_drop_cols_false.""" df = pd.DataFrame({ "a": [ datetime.date(1993, 9, 27), # day/month greater than datetime.date(2000, 3, 19), # day/month less than datetime.date(2018, 11, 10), # same day datetime.date(2018, 10, 10), # same year day/month greater than datetime.date(2018, 10, 10), # same year day/month less than datetime.date(2018, 10, 10), # negative day/month less than datetime.date(2018, 12, 10), # negative day/month greater than datetime.date( 1985, 7, 23 ), # large gap, this is incorrect with timedelta64 solutions ], "b": [ datetime.date(2020, 5, 1), datetime.date(2019, 12, 25), datetime.date(2018, 11, 10), datetime.date(2018, 11, 10), datetime.date(2018, 9, 10), datetime.date(2015, 11, 10), datetime.date(2015, 11, 10), datetime.date(2015, 7, 23), ], "c": [ 26, 19, 0, 0, 0, -2, -3, 30, ], }) return df def expected_df_3(): """Expected output for test_expected_output_nulls.""" df = pd.DataFrame({ "a": [ np.NaN, ], "b": [ np.NaN, ], "c": [None], }) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=DateDiffLeapYearTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_date_test_df() x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=True) expected_call_args = { 0: { "args": (d.create_date_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_date_test_df(), ): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_date_test_df(), expected_df_1()) + h.index_preserved_params(d.create_date_test_df(), expected_df_1()), ) def test_expected_output_drop_cols_true(self, df, expected): """Test that the output is expected from transform, when drop_cols is True. This tests positive year gaps, negative year gaps, and missing values. """ x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=True) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDiffLeapYearTransformer.transform (with drop_cols)", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_date_test_df(), expected_df_2()) + h.index_preserved_params(d.create_date_test_df(), expected_df_2()), ) def test_expected_output_drop_cols_false(self, df, expected): """Test that the output is expected from transform, when drop_cols is False. This tests positive year gaps , negative year gaps, and missing values. """ x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=False) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDiffLeapYearTransformer.transform (without drop_cols)", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_date_test_nulls_df(), expected_df_3()) + h.index_preserved_params(d.create_date_test_nulls_df(), expected_df_3()), ) def test_expected_output_nulls(self, df, expected): """Test that the output is expected from transform, when columns are nulls.""" x = DateDiffLeapYearTransformer(column_lower="a", column_upper="b", new_column_name="c", drop_cols=False) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDiffLeapYearTransformer.transform (nulls)", )
class TestTransform(object): """Tests for BetweenDatesTransformer.transform""" def expected_df_1(): """Expected output from transform in test_output.""" df = d.create_is_between_dates_df_1() df["d"] = [True, False] return df def expected_df_2(): """Expected output from transform in test_output_both_exclusive.""" df = d.create_is_between_dates_df_2() df["e"] = [False, False, True, True, False, False] return df def expected_df_3(): """Expected output from transform in test_output_lower_exclusive.""" df = d.create_is_between_dates_df_2() df["e"] = [False, False, True, True, True, False] return df def expected_df_4(): """Expected output from transform in test_output_upper_exclusive.""" df = d.create_is_between_dates_df_2() df["e"] = [False, True, True, True, False, False] return df def expected_df_5(): """Expected output from transform in test_output_both_inclusive.""" df = d.create_is_between_dates_df_2() df["e"] = [False, True, True, True, True, False] return df def test_arguments(self): """Test that fit has expected arguments.""" h.test_function_arguments( func=BetweenDatesTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_super_transform_call(self, mocker): """Test that call the BaseTransformer.transform() is as expected.""" df = d.create_is_between_dates_df_1() x = BetweenDatesTransformer(column_lower="a", column_between="b", column_upper="c", new_column_name="d") expected_call_args = { 0: { "args": (d.create_is_between_dates_df_1(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_is_between_dates_df_1(), ): x.transform(df) def test_cols_not_datetime(self): """Test that an exception is raised if cols not datetime.""" df = pd.DataFrame({ "a": [2, 1], "b": pd.date_range(start="1/3/2016", end="27/09/2017", periods=2), "c": pd.date_range(start="1/2/2016", end="27/04/2017", periods=2), }) x = BetweenDatesTransformer(column_lower="a", column_between="b", column_upper="c", new_column_name="d") with pytest.raises( TypeError, match=r"a should be datetime64\[ns\] type but got int64"): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_is_between_dates_df_1(), expected_df_1()) + h.index_preserved_params(d.create_is_between_dates_df_1(), expected_df_1()), ) def test_output(self, df, expected): """Test the output of transform is as expected.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="d", lower_inclusive=False, upper_inclusive=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="BetweenDatesTransformer.transform results not as expected", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_2()) + h.index_preserved_params(d.create_is_between_dates_df_2(), expected_df_2()), ) def test_output_both_exclusive(self, df, expected): """Test the output of transform is as expected if both limits are exclusive.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="e", lower_inclusive=False, upper_inclusive=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="BetweenDatesTransformer.transform results not as expected", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_3()) + h.index_preserved_params(d.create_is_between_dates_df_2(), expected_df_3()), ) def test_output_lower_exclusive(self, df, expected): """Test the output of transform is as expected if the lower limits are exclusive only.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="e", lower_inclusive=False, upper_inclusive=True, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="BetweenDatesTransformer.transform results not as expected", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_4()) + h.index_preserved_params(d.create_is_between_dates_df_2(), expected_df_4()), ) def test_output_upper_exclusive(self, df, expected): """Test the output of transform is as expected if the upper limits are exclusive only.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="e", lower_inclusive=True, upper_inclusive=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="BetweenDatesTransformer.transform results not as expected", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_is_between_dates_df_2(), expected_df_5()) + h.index_preserved_params(d.create_is_between_dates_df_2(), expected_df_5()), ) def test_output_both_inclusive(self, df, expected): """Test the output of transform is as expected if the both limits are inclusive.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="e", lower_inclusive=True, upper_inclusive=True, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="BetweenDatesTransformer.transform results not as expected", ) def test_warning_message(self): """Test a warning is generated if not all the values in column_upper are greater than or equal to column_lower.""" x = BetweenDatesTransformer( column_lower="a", column_between="b", column_upper="c", new_column_name="e", lower_inclusive=True, upper_inclusive=True, ) df = d.create_is_between_dates_df_2() df["c"][0] = datetime.datetime(1989, 3, 1) with pytest.warns(Warning, match="not all c are greater than or equal to a"): x.transform(df)
class TestTransform(object): """Tests for SeriesStrMethodTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_no_overwrite.""" df = d.create_df_7() df["b_new"] = df["b"].str.find(sub="a") return df def expected_df_2(): """Expected output of test_expected_output_overwrite.""" df = d.create_df_7() df["b"] = df["b"].str.pad(width=10) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=SeriesStrMethodTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_7() x = SeriesStrMethodTransformer(new_column_name="cc", pd_method_name="find", columns=["c"]) expected_call_args = {0: {"args": (d.create_df_7(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_7(), expected_df_1()) + h.index_preserved_params(d.create_df_7(), expected_df_1()), ) def test_expected_output_no_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when not overwriting the original column.""" x = SeriesStrMethodTransformer( new_column_name="b_new", pd_method_name="find", columns=["b"], pd_method_kwargs={"sub": "a"}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesStrMethodTransformer.transform with find, not overwriting original column", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_7(), expected_df_2()) + h.index_preserved_params(d.create_df_7(), expected_df_2()), ) def test_expected_output_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when overwriting the original column.""" x = SeriesStrMethodTransformer( new_column_name="b", pd_method_name="pad", columns=["b"], pd_method_kwargs={"width": 10}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesStrMethodTransformer.transform with pad, overwriting original column", ) @pytest.mark.parametrize( "df, new_column_name, pd_method_name, columns, pd_method_kwargs", [ (d.create_df_7(), "b_new", "find", ["b"], { "sub": "a" }), ( d.create_df_7(), "c_slice", "slice", ["c"], { "start": 0, "stop": 1, "step": 1 }, ), (d.create_df_7(), "b_upper", "upper", ["b"], {}), ], ) def test_pandas_method_called(self, mocker, df, new_column_name, pd_method_name, columns, pd_method_kwargs): """Test that the pandas.Series.str method is called as expected (with kwargs passed) during transform.""" spy = mocker.spy(pd.Series.str, pd_method_name) x = SeriesStrMethodTransformer( new_column_name=new_column_name, pd_method_name=pd_method_name, columns=columns, pd_method_kwargs=pd_method_kwargs, ) x.transform(df) # pull out positional and keyword args to target the call call_args = spy.call_args_list[0] call_kwargs = call_args[1] # test keyword are as expected h.assert_dict_equal_msg( actual=call_kwargs, expected=pd_method_kwargs, msg_tag=f"""Keyword arg assert for {pd_method_name}""", ) def test_attributes_unchanged_by_transform(self): """Test that attributes set in init are unchanged by the transform method.""" df = d.create_df_7() x = SeriesStrMethodTransformer( new_column_name="b", pd_method_name="pad", columns=["b"], pd_method_kwargs={"width": 10}, ) x2 = SeriesStrMethodTransformer( new_column_name="b", pd_method_name="pad", columns=["b"], pd_method_kwargs={"width": 10}, ) x.transform(df) assert ( x.new_column_name == x2.new_column_name ), "new_column_name changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_name == x2.pd_method_name ), "pd_method_name changed by SeriesDtMethodTransformer.transform" assert (x.columns == x2.columns ), "columns changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_kwargs == x2.pd_method_kwargs ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
class TestTransform(object): """Tests for SeriesDtMethodTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_no_overwrite.""" df = d.create_datediff_test_df() df["a_year"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985] return df def expected_df_2(): """Expected output of test_expected_output_overwrite.""" df = d.create_datediff_test_df() df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985] return df def expected_df_3(): """Expected output of test_expected_output_callable.""" df = d.create_datediff_test_df() df["b_new"] = df["b"].dt.to_period("M") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=SeriesDtMethodTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer(new_column_name="a2", pd_method_name="year", column="a") expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()), ) def test_expected_output_no_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when not overwriting the original column.""" x = SeriesDtMethodTransformer( new_column_name="a_year", pd_method_name="year", column="a", pd_method_kwargs={}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()), ) def test_expected_output_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when overwriting the original column.""" x = SeriesDtMethodTransformer( new_column_name="a", pd_method_name="year", column="a", pd_method_kwargs={}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with pad, overwriting original column", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()), ) def test_expected_output_callable(self, df, expected): """Test transform gives expected results, when pd_method_name is a callable.""" x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with to_period", ) def test_attributes_unchanged_by_transform(self): """Test that attributes set in init are unchanged by the transform method.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x2 = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x.transform(df) assert ( x.new_column_name == x2.new_column_name ), "new_column_name changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_name == x2.pd_method_name ), "pd_method_name changed by SeriesDtMethodTransformer.transform" assert (x.columns == x2.columns ), "columns changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_kwargs == x2.pd_method_kwargs ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
class TestTransform(object): """Tests for DateDifferenceTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output_units_Y.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "Y": [ 26.59340677135105, 19.76757257798535, 0.0, 0.08487511721664373, -0.08236536912690427, -2.915756882984136, -3.082769210410435, 29.999247075573077, ], }) return df def expected_df_2(): """Expected output for test_expected_output_units_M.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "M": [ 319.12088125621256, 237.21087093582423, 0.0, 1.0185014065997249, -0.9883844295228512, -34.989082595809634, -36.993230524925224, 359.9909649068769, ], }) return df def expected_df_3(): """Expected output for test_expected_output_units_D.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "D": [ 9713.042372685186, 7219.957627314815, 0.0, 31.0, -30.083333333333332, -1064.9583333333333, -1125.9583333333333, 10957.0, ], }) return df def expected_df_4(): """Expected output for test_expected_output_units_h.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "h": [ 233113.01694444445, 173278.98305555555, 0.0, 744.0, -722.0, -25559.0, -27023.0, 262968.0, ], }) return df def expected_df_5(): """Expected output for test_expected_output_units_m.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "m": [ 13986781.016666668, 10396738.983333332, 0.0, 44640.0, -43320.0, -1533540.0, -1621380.0, 15778080.0, ], }) return df def expected_df_6(): """Expected output for test_expected_output_units_s.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "s": [ 839206861.0, 623804339.0, 0.0, 2678400.0, -2599200.0, -92012400.0, -97282800.0, 946684800.0, ], }) return df def expected_df_7(): """Expected output for test_expected_output_nulls.""" df = pd.DataFrame( { "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), np.NaN, ], "b": [ np.NaN, datetime.datetime(2019, 12, 25, 11, 58, 58), ], "Y": [ np.NaN, np.NaN, ], }, index=[0, 1], ) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=DateDifferenceTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_datediff_test_df(), ): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()), ) def test_expected_output_units_Y(self, df, expected): """Test that the output is expected from transform, when units is Y. This tests positive year gaps and negative year gaps. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()), ) def test_expected_output_units_M(self, df, expected): """Test that the output is expected from transform, when units is M. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="M", units="M", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()), ) def test_expected_output_units_D(self, df, expected): """Test that the output is expected from transform, when units is D. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="D", units="D", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_4()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_4()), ) def test_expected_output_units_h(self, df, expected): """Test that the output is expected from transform, when units is h. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="h", units="h", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_5()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_5()), ) def test_expected_output_units_m(self, df, expected): """Test that the output is expected from transform, when units is m. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="m", units="m", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_6()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_6()), ) def test_expected_output_units_s(self, df, expected): """Test that the output is expected from transform, when units is s. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="s", units="s", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_nulls_df(), expected_df_7()) + h.index_preserved_params(d.create_datediff_test_nulls_df(), expected_df_7()), ) def test_expected_output_nulls(self, df, expected): """Test that the output is expected from transform, when columns are nulls.""" x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDifferenceTransformer.transform (nulls)", )
class TestTransform(object): """Tests for the transform method on MappingTransformer.""" def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=BaseMappingTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_check_is_fitted_call(self, mocker): """Test the call to check_is_fitted.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), d.create_df_1()) + h.index_preserved_params(d.create_df_1(), d.create_df_1()), ) def test_X_returned(self, df, expected): """Test that X is returned from transform.""" mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check X returned from transform", ) def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "a": { 1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f" }, "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, } x = BaseMappingTransformer(mappings=mapping) x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "BaseMappingTransformer.transform has changed self.mappings unexpectedly", )
class TestTransform(object): """Tests for ToDatetimeTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output.""" df = pd.DataFrame({ "a": [1950, 1960, 2000, 2001, np.NaN, 2010], "b": [1, 2, 3, 4, 5, np.NaN], "a_Y": [ datetime.datetime(1950, 1, 1), datetime.datetime(1960, 1, 1), datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1), pd.NaT, datetime.datetime(2010, 1, 1), ], "b_m": [ datetime.datetime(1900, 1, 1), datetime.datetime(1900, 2, 1), datetime.datetime(1900, 3, 1), datetime.datetime(1900, 4, 1), datetime.datetime(1900, 5, 1), pd.NaT, ], }) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=ToDatetimeTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform is as expected.""" df = d.create_datediff_test_df() to_dt = ToDatetimeTransformer(column="a", new_column_name="Y") expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_datediff_test_df(), ): to_dt.transform(df) def test_to_datetime_call(self, mocker): """Test the call to pd.to_datetime is as expected.""" df = d.create_to_datetime_test_df() to_dt = ToDatetimeTransformer(column="a", new_column_name="a_Y", to_datetime_kwargs={"format": "%Y"}) expected_call_args = { 0: { "args": (d.create_to_datetime_test_df()["a"], ), "kwargs": { "format": "%Y" }, } } with h.assert_function_call( mocker, pandas, "to_datetime", expected_call_args, return_value=pd.to_datetime( d.create_to_datetime_test_df()["a"]), ): to_dt.transform(df) def test_output_from_to_datetime_assigned_to_column(self, mocker): """Test that the output from pd.to_datetime is assigned to column with name new_column_name.""" df = d.create_to_datetime_test_df() to_dt = ToDatetimeTransformer(column="a", new_column_name="a_new", to_datetime_kwargs={"format": "%Y"}) to_datetime_output = [1, 2, 3, 4, 5, 6] mocker.patch("pandas.to_datetime", return_value=to_datetime_output) df_transformed = to_dt.transform(df) assert (df_transformed["a_new"].tolist() == to_datetime_output ), "unexpected values assigned to a_new column" @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_to_datetime_test_df(), expected_df_1()) + h.index_preserved_params(d.create_to_datetime_test_df(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test input data is transformed as expected.""" to_dt_1 = ToDatetimeTransformer(column="a", new_column_name="a_Y", to_datetime_kwargs={"format": "%Y"}) to_dt_2 = ToDatetimeTransformer(column="b", new_column_name="b_m", to_datetime_kwargs={"format": "%m"}) df_transformed = to_dt_1.transform(df) df_transformed = to_dt_2.transform(df_transformed) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ToDatetimeTransformer.transform output", )
class TestTransform(object): """Tests for GroupRareLevelsTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output_no_weight.""" df = pd.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9, np.NaN]}) df["b"] = pd.Series([ "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN, np.NaN ]) df["c"] = pd.Series( ["a", "a", "c", "c", "e", "e", "rare", "rare", "rare", "rare"], dtype=pd.CategoricalDtype( categories=["a", "c", "e", "f", "g", "h", "rare"], ordered=False), ) return df def expected_df_2(): """Expected output for test_expected_output_weight.""" df = pd.DataFrame({ "a": [2, 2, 2, 2, np.NaN, 2, 2, 2, 3, 3], "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN], "c": ["a", "b", "c", "d", "f", "f", "f", "g", "g", np.NaN], }) df["c"] = df["c"].astype("category") df["b"] = pd.Series([ "a", "a", "a", "rare", "rare", "rare", "rare", np.NaN, np.NaN, np.NaN ]) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=GroupRareLevelsTransformer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (["mapping_"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) expected_call_args = {0: {"args": (d.create_df_5(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_5(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the mapping_ from fit are not changed in transform.""" df = d.create_df_5() x = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x.fit(df) x2 = GroupRareLevelsTransformer(columns=["a", "b", "c"]) x2.fit(df) x2.transform(df) h.assert_equal_dispatch( expected=x.mapping_, actual=x2.mapping_, msg="Non rare levels not changed in transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_5(), expected_df_1()) + h.index_preserved_params(d.create_df_5(), expected_df_1()), ) def test_expected_output_no_weight(self, df, expected): """Test that the output is expected from transform.""" x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a"]} df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) def test_expected_output_no_weight_single_row_na(self): """test output from a single row transform with np.NaN value remains the same, the type is perserved if using existing dataframe, so need to create a new dataframe""" one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]}) x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]} one_row_df_transformed = x.transform(one_row_df) h.assert_frame_equal_msg( actual=one_row_df_transformed, expected=one_row_df, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) def test_expected_output_no_weight_single_row_na_category_column(self): """test output from a single row transform with np.NaN value remains the same, when column is type category, the type is perserved if using existing dataframe, so need to create a new dataframe""" one_row_df = pd.DataFrame({"b": [np.nan], "c": [np.NaN]}) one_row_df["c"] = one_row_df["c"].astype("category") # add rare as a category in dataframe one_row_df["c"].cat.add_categories("rare", inplace=True) x = GroupRareLevelsTransformer(columns=["b", "c"], cut_off_percent=0.2) # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN], "c": ["e", "c", "a", np.NaN]} one_row_df_transformed = x.transform(one_row_df) h.assert_frame_equal_msg( actual=one_row_df_transformed, expected=one_row_df, msg_tag="Unexpected values in GroupRareLevelsTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_6(), expected_df_2()) + h.index_preserved_params(d.create_df_6(), expected_df_2()), ) def test_expected_output_weight(self, df, expected): """Test that the output is expected from transform, when weights are used.""" x = GroupRareLevelsTransformer(columns=["b"], cut_off_percent=0.3, weight="a") # set the mappging dict directly rather than fitting x on df so test works with decorators x.mapping_ = {"b": ["a", np.NaN]} df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in GroupRareLevelsTransformer.transform (with weights)", )
class TestTransform(object): """Tests for LogTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_1.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"]) df["b_new_col"] = np.log(df["b"]) df.drop(columns=["a", "b"], inplace=True) return df def expected_df_2(): """Expected output of test_expected_output_2.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"] + 1) df["b_new_col"] = np.log(df["b"] + 1) df.drop(columns=["a", "b"], inplace=True) return df def expected_df_3(): """Expected output of test_expected_output_3.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"]) df["b_new_col"] = np.log(df["b"]) return df def expected_df_4(): """Expected output of test_expected_output_4.""" df = d.create_df_3() df["a_new_col"] = np.log(df["a"] + 1) df["b_new_col"] = np.log(df["b"] + 1) return df def expected_df_5(): """Expected output of test_expected_output_5.""" df = d.create_df_4() df["a_new_col"] = np.log(df["a"] + 1) / np.log(5) return df def expected_df_6(): """Expected output of test_expected_output_6.""" df = d.create_df_4() df["a_new_col"] = np.log(df["a"]) / np.log(7) df.drop("a", axis=1, inplace=True) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=LogTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = LogTransformer(columns=["a", "b"]) expected_call_args = {0: {"args": (d.create_df_3(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_3(), ): x.transform(df) def test_error_with_non_numeric_columns(self): """Test an exception is raised if transform is applied to non-numeric columns.""" df = d.create_df_5() x = LogTransformer(columns=["a", "b", "c"]) with pytest.raises( TypeError, match= r"The following columns are not numeric in X; \['b', 'c'\]"): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_1(self, df, expected): """Test that transform is giving the expected output when not adding one and dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=False, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_expected_output_2(self, df, expected): """Test that transform is giving the expected output when adding one and dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=True, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_3()) + h.index_preserved_params(d.create_df_3(), expected_df_3()), ) def test_expected_output_3(self, df, expected): """Test that transform is giving the expected output when not adding one and not dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=False, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_4()) + h.index_preserved_params(d.create_df_3(), expected_df_4()), ) def test_expected_output_4(self, df, expected): """Test that transform is giving the expected output when adding one and not dropping original columns.""" x1 = LogTransformer(columns=["a", "b"], add_1=True, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_5()) + h.index_preserved_params(d.create_df_4(), expected_df_5()), ) def test_expected_output_5(self, df, expected): """Test that transform is giving the expected output when adding one and not dropping original columns and using base.""" x1 = LogTransformer(columns=["a"], base=5, add_1=True, drop=False, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform not adding 1 and dropping original columns", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_6()) + h.index_preserved_params(d.create_df_4(), expected_df_6()), ) def test_expected_output_6(self, df, expected): """Test that transform is giving the expected output when not adding one and dropping original columns and using base.""" x1 = LogTransformer(columns=["a"], base=7, add_1=False, drop=True, suffix="new_col") df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg= "LogTransformer transform should be using base, not adding 1, and not dropping original columns", ) @pytest.mark.parametrize( "df, columns, add_1, extra_exception_text", ( [pd.DataFrame({"a": [1, 2, 0]}), ["a"], False, ""], [ pd.DataFrame({ "a": [1, 2, 0], "b": [1, 2, 3] }), ["a", "b"], False, "" ], [ pd.DataFrame({"a": [1, 2, -1]}), ["a"], True, r" \(after adding 1\)" ], [ pd.DataFrame({ "a": [1, 2, -1], "b": [1, 2, 3] }), ["a", "b"], True, r" \(after adding 1\)", ], [pd.DataFrame({"b": [1, 2, -0.001]}), ["b"], False, ""], [ pd.DataFrame({ "b": [1, 2, -0.001], "a": [1, 2, 3] }), ["a", "b"], False, "", ], [ pd.DataFrame({"b": [1, 2, -1.001]}), ["b"], True, r" \(after adding 1\)" ], [ pd.DataFrame({ "b": [1, 2, -1.001], "a": [1, 2, 3] }), ["a", "b"], True, r" \(after adding 1\)", ], ), ) def test_negative_values_raise_exception(self, df, columns, add_1, extra_exception_text): """Test that an exception is raised if negative values are passed in transform.""" x = LogTransformer(columns=columns, add_1=add_1, drop=True) with pytest.raises( ValueError, match= f"values less than or equal to 0 in columns{extra_exception_text}, make greater than 0 before using transform", ): x.transform(df)
class TestTransform: """Tests for BaseImputer.transform.""" def expected_df_1(): """Expected output of test_expected_output_1.""" df = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], "b": ["a", "b", "c", "d", "e", "f", np.NaN], "c": ["a", "b", "c", "d", "e", "f", np.NaN], }) df["c"] = df["c"].astype("category") return df def expected_df_2(): """Expected output of test_expected_output_2.""" df2 = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g"], "c": ["a", "b", "c", "d", "e", "f", np.NaN], }) df2["c"] = df2["c"].astype("category") return df2 def expected_df_3(): """Expected output of test_expected_output_3.""" df3 = pd.DataFrame({ "a": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g"], "c": ["a", "b", "c", "d", "e", "f", "f"], }) df3["c"] = df3["c"].astype("category") return df3 def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=BaseImputer.transform, expected_arguments=["self", "X"]) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_1()) + h.index_preserved_params(d.create_df_2(), expected_df_1()), ) def test_expected_output_1(self, df, expected): """Test that transform is giving the expected output when applied to float column.""" x1 = BaseImputer() x1.columns = ["a"] x1.impute_values_ = {"a": 7} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col a", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_2()) + h.index_preserved_params(d.create_df_2(), expected_df_2()), ) def test_expected_output_2(self, df, expected): """Test that transform is giving the expected output when applied to object column.""" x1 = BaseImputer() x1.columns = ["b"] x1.impute_values_ = {"b": "g"} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col b", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_2(), expected_df_3()) + h.index_preserved_params(d.create_df_2(), expected_df_3()), ) def test_expected_output_3(self, df, expected): """Test that transform is giving the expected output when applied to object and categorical columns.""" x1 = BaseImputer() x1.columns = ["b", "c"] x1.impute_values_ = {"b": "g", "c": "f"} df_transformed = x1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="ArbitraryImputer transform col b, c", ) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = BaseImputer() x.columns = [] expected_call_args = { 0: { "args": (["impute_values_"], ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseImputer.transform called.""" df = d.create_df_2() x = BaseImputer() x.columns = [] x.impute_values_ = {} expected_call_args = {0: {"args": (d.create_df_2(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
class TestTransform(object): """Tests for ModeImputer.transform().""" def expected_df_1(): """Expected output for test_nulls_imputed_correctly.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], }) for col in ["a", "b", "c"]: df[col].loc[df[col].isnull()] = df[col].mode()[0] return df def expected_df_2(): """Expected output for test_nulls_imputed_correctly_2.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], }) for col in ["a"]: df[col].loc[df[col].isnull()] = df[col].mode()[0] return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=ModeImputer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) expected_call_args = { 0: { "args": (["impute_values_"], ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = ModeImputer(columns="a") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_nulls_imputed_correctly(self, df, expected): """Test missing values are filled with the correct values.""" x = ModeImputer(columns=["a", "b", "c"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 1.0, "b": 1.0, "c": -6.0} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_nulls_imputed_correctly_2(self, df, expected): """Test missing values are filled with the correct values - and unrelated columns are not changed.""" x = ModeImputer(columns=["a"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.impute_values_ = {"a": 1.0} df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check nulls filled correctly in transform", ) def test_learnt_values_not_modified(self): """Test that the impute_values_ from fit are not changed in transform.""" df = d.create_df_3() x = ModeImputer(columns=["a", "b", "c"]) x.fit(df) x2 = ModeImputer(columns=["a", "b", "c"]) x2.fit_transform(df) h.assert_equal_dispatch( expected=x.impute_values_, actual=x2.impute_values_, msg="Impute values not changed in transform", )
class TestTransform(object): """Tests for DataFrameMethodTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_single_columns_assignment.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], "d": [1.0, 3.0, 5.0, 3.0, 3.0, 3.0, 3.0], }) return df def expected_df_2(): """Expected output of test_expected_output_multi_columns_assignment.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6, np.NaN], "b": [1, 2, 3, np.NaN, 7, 8, 9], "c": [np.NaN, 1, 2, 3, -4, -5, -6], "d": [0.5, 1.0, 1.5, np.NaN, 3.5, 4.0, 4.5], "e": [np.NaN, 0.5, 1.0, 1.5, -2.0, -2.5, -3.0], }) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=DataFrameMethodTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = DataFrameMethodTransformer(new_column_name="d", pd_method_name="sum", columns=["b", "c"]) expected_call_args = {0: {"args": (df.copy(), ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_single_columns_assignment(self, df, expected): """Test a single column output from transform gives expected results.""" x = DataFrameMethodTransformer( new_column_name="d", pd_method_name="sum", columns=["b", "c"], pd_method_kwargs={"axis": 1}, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="DataFrameMethodTransformer sum columns b and c", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_2()) + h.index_preserved_params(d.create_df_3(), expected_df_2()), ) def test_expected_output_multi_columns_assignment(self, df, expected): """Test a multiple column output from transform gives expected results.""" x = DataFrameMethodTransformer( new_column_name=["d", "e"], pd_method_name="div", columns=["b", "c"], pd_method_kwargs={"other": 2}, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="DataFrameMethodTransformer divide by 2 columns b and c", ) @pytest.mark.parametrize( "df, new_column_name, pd_method_name, columns, pd_method_kwargs", [ (d.create_df_3(), ["d", "e"], "div", ["b", "c"], { "other": 2 }), (d.create_df_3(), "d", "sum", ["b", "c"], { "axis": 1 }), (d.create_df_3(), ["d", "e"], "cumprod", ["b", "c"], { "axis": 1 }), (d.create_df_3(), ["d", "e", "f"], "mod", ["a", "b", "c"], { "other": 2 }), (d.create_df_3(), ["d", "e", "f"], "le", ["a", "b", "c"], { "other": 0 }), (d.create_df_3(), ["d", "e"], "abs", ["a", "b"], {}), ], ) def test_pandas_method_called(self, mocker, df, new_column_name, pd_method_name, columns, pd_method_kwargs): """Test that the pandas method is called as expected (with kwargs passed) during transform.""" spy = mocker.spy(pd.DataFrame, pd_method_name) x = DataFrameMethodTransformer( new_column_name=new_column_name, pd_method_name=pd_method_name, columns=columns, pd_method_kwargs=pd_method_kwargs, ) x.transform(df) # pull out positional and keyword args to target the call call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] # test keyword are as expected h.assert_dict_equal_msg( actual=call_kwargs, expected=pd_method_kwargs, msg_tag=f"""Keyword arg assert for {pd_method_name}""", ) # test positional args are as expected h.assert_list_tuple_equal_msg( actual=call_pos_args, expected=(df[columns], ), msg_tag=f"""Positional arg assert for {pd_method_name}""", )
class TestTransform(object): """Tests for BaseTransformer.transform().""" def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=BaseTransformer.transform, expected_arguments=["self", "X"] ) def test_columns_check_called(self, mocker): """Test that self.columns_check is called during transform.""" df = d.create_df_1() x = BaseTransformer(columns="a") expected_call_args = {0: {"args": (df,), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args ): x.transform(X=df) def test_non_pd_type_error(self): """Test an error is raised if y is not passed as a pd.DataFrame.""" x = BaseTransformer(columns="a") with pytest.raises(ValueError): x.transform(X=[1, 2, 3, 4, 5, 6]) def test_df_copy_called(self, mocker): """Test pd.DataFrame.copy is called if copy is True.""" df = d.create_df_1() x = BaseTransformer(columns="a", copy=True) expected_call_args = {0: {"args": (), "kwargs": {}}} with h.assert_function_call( mocker, pandas.DataFrame, "copy", expected_call_args, return_value=df ): x.transform(X=df) def test_no_rows_error(self): """Test an error is raised if X has no rows.""" x = BaseTransformer(columns="a") df = pandas.DataFrame(columns=["a"]) with pytest.raises(ValueError, match=re.escape("X has no rows; (0, 1)")): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), d.create_df_1()) + h.index_preserved_params(d.create_df_1(), d.create_df_1()), ) def test_X_returned(self, df, expected): """Test that X is returned from transform.""" x = BaseTransformer(columns="a", copy=True) df_transformed = x.transform(X=df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Check X returned from transform", )
class TestTransform(object): """Tests for the transform method on MappingTransformer.""" def expected_df_1(): """Expected output for test_expected_output.""" df = pd.DataFrame( {"a": ["a", "b", "c", "d", "e", "f"], "b": [1, 2, 3, 4, 5, 6]} ) return df def expected_df_2(): """Expected output for test_non_specified_values_unchanged.""" df = pd.DataFrame( {"a": [5, 6, 7, 4, 5, 6], "b": ["z", "y", "x", "d", "e", "f"]} ) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=MappingTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformMixin.transform.""" df = d.create_df_1() mapping = { "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}, "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, } x = MappingTransformer(mappings=mapping) spy = mocker.spy(tubular.mapping.BaseMappingTransformMixin, "transform") x.transform(df) assert ( spy.call_count == 1 ), "unexpected number of calls to BaseMappingTransformMixin.transform" call_args = spy.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] expected_kwargs = {} assert ( call_kwargs == expected_kwargs ), "unexpected kwargs in BaseMappingTransformMixin.transform call" expected_pos_args = (x, d.create_df_1()) assert ( expected_pos_args[0] == call_pos_args[0] ), "unexpected 1st positional arg in BaseMappingTransformMixin.transform call" h.assert_equal_dispatch( expected_pos_args[1], call_pos_args[1], "unexpected 2ns positional arg in BaseMappingTransformMixin.transform call", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_1()) + h.index_preserved_params(d.create_df_1(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = { "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}, "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, } x = MappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from mapping transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_2()) + h.index_preserved_params(d.create_df_1(), expected_df_2()), ) def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"a": {1: 5, 2: 6, 3: 7}, "b": {"a": "z", "b": "y", "c": "x"}} x = MappingTransformer(mappings=mapping) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from mapping transformer", ) def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}, "b": {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6}, } preserve_original_value_mapping = { "a": ReturnKeyDict(mapping["a"]), "b": ReturnKeyDict(mapping["b"]), } x = MappingTransformer(mappings=mapping) x.transform(df) h.assert_equal_dispatch( actual=x.mappings, expected=preserve_original_value_mapping, msg="MappingTransformer.transform has changed self.mappings unexpectedly", )
class TestTransform(object): """Tests for CutTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output.""" df = d.create_df_9() df["d"] = pd.Series(["c", "b", "a", "d", "e", "f"], dtype="category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=CutTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform is as expected.""" df = d.create_df_9() x = CutTransformer(column="a", new_column_name="Y", cut_kwargs={"bins": 3}) expected_call_args = {0: {"args": (d.create_df_9(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_9(), ): x.transform(df) def test_pd_cut_call(self, mocker): """Test the call to pd.cut is as expected.""" df = d.create_df_9() x = CutTransformer( column="a", new_column_name="a_cut", cut_kwargs={ "bins": 3, "right": False, "precision": 2 }, ) expected_call_args = { 0: { "args": (d.create_df_9()["a"], ), "kwargs": { "bins": 3, "right": False, "precision": 2 }, } } with h.assert_function_call(mocker, pandas, "cut", expected_call_args, return_value=[1, 2, 3, 4, 5, 6]): x.transform(df) def test_output_from_cut_assigned_to_column(self, mocker): """Test that the output from pd.cut is assigned to column with name new_column_name.""" df = d.create_df_9() x = CutTransformer(column="c", new_column_name="c_new", cut_kwargs={"bins": 2}) cut_output = [1, 2, 3, 4, 5, 6] mocker.patch("pandas.cut", return_value=cut_output) df_transformed = x.transform(df) assert (df_transformed["c_new"].tolist() == cut_output ), "unexpected values assigned to c_new column" @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_9(), expected_df_1()) + h.index_preserved_params(d.create_df_9(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test input data is transformed as expected.""" cut_1 = CutTransformer( column="c", new_column_name="d", cut_kwargs={ "bins": [0, 1, 2, 3, 4, 5, 6], "ordered": False, "labels": ["a", "b", "c", "d", "e", "f"], }, ) df_transformed = cut_1.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="CutTransformer.transform output", ) def test_non_numeric_column_error(self): """Test that an exception is raised if the column to discretise is not numeric.""" df = d.create_df_8() x = CutTransformer(column="b", new_column_name="d") with pytest.raises(TypeError, match="b should be a numeric dtype but got object"): x.transform(df)
class TestTransform(object): """Tests for CappingTransformer.transform().""" def expected_df_1(): """Expected output from test_expected_output_min_and_max.""" df = pd.DataFrame( { "a": [2, 2, 3, 4, 5, 5, np.NaN], "b": [1, 2, 3, np.NaN, 7, 7, 7], "c": [np.NaN, 1, 2, 3, 0, 0, 0], } ) return df def expected_df_2(): """Expected output from test_expected_output_max.""" df = pd.DataFrame( { "a": [2, 2, 3, 4, 5, 6, 7, np.NaN], "b": ["a", "b", "c", "d", "e", "f", "g", np.NaN], "c": ["a", "b", "c", "d", "e", "f", "g", np.NaN], } ) df["c"] = df["c"].astype("category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CappingTransformer.transform, expected_arguments=["self", "X"] ) def test_check_is_fitted_call_count(self, mocker): """Test there are 2 calls to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) with h.assert_function_call_count( mocker, tubular.base.BaseTransformer, "check_is_fitted", 2 ): x.transform(df) def test_check_is_fitted_call_1(self, mocker): """Test the first call to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = { 0: {"args": (["capping_values"],), "kwargs": {}}, 1: {"args": (["_replacement_values"],), "kwargs": {}}, } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_3(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the replacements from fit are not changed in transform.""" capping_values_dict = {"a": [2, 5], "b": [-1, 8]} df = d.create_df_3() x = CappingTransformer(capping_values_dict) x.transform(df) h.test_object_attributes( obj=x, expected_attributes={"capping_values": capping_values_dict}, msg="Attributes for CappingTransformer set in init", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_3(), expected_df_1()) + h.index_preserved_params(d.create_df_3(), expected_df_1()), ) def test_expected_output_min_and_max_combinations(self, df, expected): """Test that capping is applied correctly in transform.""" x = CappingTransformer( capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]} ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_4(), expected_df_2()) + h.index_preserved_params(d.create_df_4(), expected_df_2()), ) def test_non_cap_column_left_untouched(self, df, expected): """Test that capping is applied only to specific columns, others remain the same.""" x = CappingTransformer(capping_values={"a": [2, 10]}) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed", ) def test_non_numeric_column_error(self): """Test that transform will raise an error if a column to transform is not numeric.""" df = d.create_df_5() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]}) with pytest.raises( TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]" ): x.transform(df) def test_quantile_not_fit_error(self): """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) with pytest.raises( ValueError, match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df) def test_replacement_values_dict_not_set_error(self): """Test that transform will raise an error if _replacement_values is an empty dict.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) # manually set attribute to get past the capping_values attribute is an empty dict exception x.capping_values = {"a": [1, 4]} with pytest.raises( ValueError, match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df) def test_attributes_unchanged_from_transform(self): """Test that attributes are unchanged after transform is run.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x.fit(df) x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x2.fit(df) x2.transform(df) assert ( x.capping_values == x2.capping_values ), "capping_values attribute modified in transform" assert ( x._replacement_values == x2._replacement_values ), "_replacement_values attribute modified in transform" assert ( x.weights_column == x2.weights_column ), "weights_column attribute modified in transform" assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
class TestTransform(object): """Tests for the transform method on CrossColumnMappingTransformer.""" def expected_df_1(): """Expected output for test_expected_output.""" df = pd.DataFrame( {"a": [1, 2, 3, 4, 5, 6], "b": ["aa", "bb", "cc", "dd", "ee", "ff"]} ) return df def expected_df_2(): """Expected output for test_non_specified_values_unchanged.""" df = pd.DataFrame( {"a": [1, 2, 3, 4, 5, 6], "b": ["aa", "bb", "cc", "d", "e", "f"]} ) return df def expected_df_3(): """Expected output for test_multiple_mappings_ordered_dict.""" df = pd.DataFrame( { "a": [4, 2, 2, 1, 3], "b": ["x", "z", "y", "x", "x"], "c": ["cc", "dd", "bb", "cc", "cc"], } ) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CrossColumnMappingTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_check_is_fitted_call(self, mocker): """Test the call to check_is_fitted.""" df = d.create_df_1() mapping = {"a": {1: "a", 2: "b", 3: "c", 4: "d", 5: "e", 6: "f"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") expected_call_args = {0: {"args": (["adjust_column"],), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df) def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformer.transform.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.mapping.BaseMappingTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df) def test_adjust_col_not_in_x_error(self): """Test that an exception is raised if the adjust_column is not present in the dataframe.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c") with pytest.raises(ValueError, match="variable c is not in X"): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_1()) + h.index_preserved_params(d.create_df_1(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column mapping transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_2()) + h.index_preserved_params(d.create_df_1(), expected_df_2()), ) def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"a": {1: "aa", 2: "bb", 3: "cc"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column mapping transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_7(), expected_df_3()) + h.index_preserved_params(d.create_df_7(), expected_df_3()), ) def test_multiple_mappings_ordered_dict(self, df, expected): """Test that mappings by multiple columns using an ordered dict gives the expected output in transform""" mapping = OrderedDict() mapping["a"] = {1: "aa", 2: "bb"} mapping["b"] = {"x": "cc", "z": "dd"} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="c") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column mapping transformer", ) def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = {"a": {1: "aa", 2: "bb", 3: "cc", 4: "dd", 5: "ee", 6: "ff"}} x = CrossColumnMappingTransformer(mappings=mapping, adjust_column="b") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg="CrossColumnMappingTransformer.transform has changed self.mappings unexpectedly", )
class TestTransform(object): """Tests for OrdinalEncoderTransformer.transform().""" def expected_df_1(): """Expected output for .""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6], "b": [1, 2, 3, 4, 5, 6], "c": ["a", "b", "c", "d", "e", "f"], "d": [1, 2, 3, 4, 5, 6], "e": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0], "f": [1, 1, 1, 2, 2, 2], }) df["c"] = df["c"].astype("category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=OrdinalEncoderTransformer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_mappable_rows called.""" df = d.create_OrdinalEncoderTransformer_test_df() x = OrdinalEncoderTransformer(response_column="a", columns="b") x.fit(df) expected_call_args = {0: {"args": (df, ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.nominal.BaseNominalTransformer, "check_mappable_rows", expected_call_args, ): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseMappingTransformMixin.transform called.""" df = d.create_OrdinalEncoderTransformer_test_df() x = OrdinalEncoderTransformer(response_column="a", columns="b") x.fit(df) expected_call_args = { 0: { "args": ( x, d.create_OrdinalEncoderTransformer_test_df(), ), "kwargs": {}, } } with h.assert_function_call( mocker, tubular.mapping.BaseMappingTransformMixin, "transform", expected_call_args, return_value=d.create_OrdinalEncoderTransformer_test_df(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the mappings from fit are not changed in transform.""" df = d.create_OrdinalEncoderTransformer_test_df() x = OrdinalEncoderTransformer(response_column="a", columns="b") x.fit(df) x2 = OrdinalEncoderTransformer(response_column="a", columns="b") x2.fit(df) x2.transform(df) h.assert_equal_dispatch( expected=x.mappings, actual=x2.mappings, msg="Mean response values not changed in transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_OrdinalEncoderTransformer_test_df(), expected_df_1()) + h.index_preserved_params(d.create_OrdinalEncoderTransformer_test_df(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that the output is expected from transform.""" x = OrdinalEncoderTransformer(response_column="a", columns=["b", "d", "f"]) # set the impute values dict directly rather than fitting x on df so test works with helpers x.mappings = { "b": { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6 }, "d": { 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6 }, "f": { False: 1, True: 2 }, } df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in OrdinalEncoderTransformer.transform", ) def test_nulls_introduced_in_transform_error(self): """Test that transform will raise an error if nulls are introduced.""" df = d.create_OrdinalEncoderTransformer_test_df() x = OrdinalEncoderTransformer(response_column="a", columns=["b", "d", "f"]) x.fit(df) df["b"] = "z" with pytest.raises( ValueError, match= "nulls would be introduced into column b from levels not present in mapping", ): x.transform(df)
class TestTransform(object): """Tests for the transform method on CrossColumnAddTransformer.""" def expected_df_1(): """Expected output from test_expected_output.""" df = pd.DataFrame({ "a": [2.1, 3.2, 4.3, 5.4, 6.5, 7.6], "b": ["a", "b", "c", "d", "e", "f"] }) return df def expected_df_2(): """Expected output from test_non_specified_values_unchanged.""" df = pd.DataFrame({ "a": [2.1, 3.2, 3, 4, 5, 6], "b": ["a", "b", "c", "d", "e", "f"] }) return df def expected_df_3(): """Expected output from test_multiple_mappings_expected_output.""" df = pd.DataFrame({ "a": [4.1, 5.1, 4.1, 4, 8, 10.2, 7, 8, 9, np.NaN], "b": ["a", "a", "a", "d", "e", "f", "g", np.NaN, np.NaN, np.NaN], "c": ["a", "a", "c", "c", "e", "e", "f", "g", "h", np.NaN], }) df["c"] = df["c"].astype("category") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=CrossColumnAddTransformer.transform, expected_arguments=["self", "X"], expected_default_values=None, ) def test_check_is_fitted_call(self, mocker): """Test the call to check_is_fitted.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") expected_call_args = {0: {"args": (["adjust_column"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_call(self, mocker): """Test the call to BaseMappingTransformer.transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df) def test_adjust_col_not_in_x_error(self): """Test that an exception is raised if the adjust_column is not present in the dataframe.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c") with pytest.raises(ValueError, match="variable c is not in X"): x.transform(df) def test_adjust_col_not_numeric_error(self): """Test that an exception is raised if the adjust_column is not numeric.""" df = d.create_df_2() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="c") with pytest.raises(TypeError, match="variable c must have numeric dtype."): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_1()) + h.index_preserved_params(d.create_df_1(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that transform is giving the expected output.""" mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_2()) + h.index_preserved_params(d.create_df_1(), expected_df_2()), ) def test_non_specified_values_unchanged(self, df, expected): """Test that values not specified in mappings are left unchanged in transform.""" mapping = {"b": {"a": 1.1, "b": 1.2}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_5(), expected_df_3()) + h.index_preserved_params(d.create_df_5(), expected_df_3()), ) def test_multiple_mappings_expected_output(self, df, expected): """Test that mappings by multiple columns are both applied in transform""" mapping = {"b": {"a": 1.1, "f": 1.2}, "c": {"a": 2, "e": 3}} x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="expected output from cross column add transformer", ) def test_mappings_unchanged(self): """Test that mappings is unchanged in transform.""" df = d.create_df_1() mapping = { "b": { "a": 1.1, "b": 1.2, "c": 1.3, "d": 1.4, "e": 1.5, "f": 1.6 } } x = CrossColumnAddTransformer(mappings=mapping, adjust_column="a") x.transform(df) h.assert_equal_dispatch( expected=mapping, actual=x.mappings, msg= "CrossColumnAddTransformer.transform has changed self.mappings unexpectedly", )
class TestTransform(object): """Tests for NominalToIntegerTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output.""" df = pd.DataFrame({ "a": [1, 2, 3, 4, 5, 6], "b": ["a", "b", "c", "d", "e", "f"] }) df["a"] = df["a"].replace( {k: i for i, k in enumerate(df["a"].unique())}) df["b"] = df["b"].replace( {k: i for i, k in enumerate(df["b"].unique())}) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=NominalToIntegerTransformer.transform, expected_arguments=["self", "X"]) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.transform(df) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns="a") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(), ), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_1(), ): x.transform(df) def test_learnt_values_not_modified(self): """Test that the mappings from fit are not changed in transform.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) x2 = NominalToIntegerTransformer(columns=["a", "b"]) x2.fit_transform(df) h.assert_equal_dispatch( expected=x.mappings, actual=x2.mappings, msg="Impute values not changed in transform", ) def test_non_mappable_rows_raises_error(self): """Test that rows that cannot be mapped result in an exception.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) df["a"] = df["a"] + 1 with pytest.raises( ValueError, match= "nulls would be introduced into column a from levels not present in mapping", ): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), expected_df_1()) + h.index_preserved_params(d.create_df_1(), expected_df_1()), ) def test_expected_output(self, df, expected): """Test that the output is expected from transform.""" x = NominalToIntegerTransformer(columns=["a", "b"]) # set the mapping dict directly rather than fitting x on df so test works with helpers x.mappings = { "a": { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5 }, "b": { "a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5 }, } df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in NominalToIntegerTransformer.transform", )
class TestInverseTransform(object): """Tests for NominalToIntegerTransformer.inverse_transform().""" def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=NominalToIntegerTransformer.inverse_transform, expected_arguments=["self", "X"], ) def test_check_is_fitted_called(self, mocker): """Test that BaseTransformer check_is_fitted called.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) df_transformed = x.transform(df) expected_call_args = {0: {"args": (["mappings"], ), "kwargs": {}}} with h.assert_function_call(mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args): x.inverse_transform(df_transformed) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_df_1(), d.create_df_1()) + h.index_preserved_params(d.create_df_1(), d.create_df_1()), ) def test_expected_output(self, df, expected): """Test that transform then inverse_transform gets back to the original df.""" x = NominalToIntegerTransformer(columns=["a", "b"]) # set the mapping dict directly rather than fitting x on df so test works with helpers x.mappings = { "a": { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5 }, "b": { "a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5 }, } df_transformed = x.transform(df) df_transformed_back = x.inverse_transform(df_transformed) h.assert_frame_equal_msg( actual=df_transformed_back, expected=expected, msg_tag="transform reverse does not get back to original", ) def test_non_mappable_rows_raises_error(self): """Test that rows that cannot be mapped result in an exception.""" x = NominalToIntegerTransformer(columns=["a", "b"]) df = d.create_df_1() x.fit(df) df_transformed = x.transform(df) df_transformed["b"] = df_transformed["b"] + 1 with pytest.raises( ValueError, match= "nulls introduced from levels not present in mapping for column: b", ): x.inverse_transform(df_transformed) def test_learnt_values_not_modified(self): """Test that the mappings from fit are not changed in inverse_transform.""" df = d.create_df_1() x = NominalToIntegerTransformer(columns=["a", "b"]) x.fit(df) x2 = NominalToIntegerTransformer(columns=["a", "b"]) x2.fit(df) df_transformed = x2.transform(df) x2.inverse_transform(df_transformed) h.assert_equal_dispatch( expected=x.mappings, actual=x2.mappings, msg="Impute values not changed in inverse_transform", )
class TestTransform(object): """Tests for OneHotEncodingTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output.""" df = pd.DataFrame( { "a": [4, 2, 2, 1, 3], "b": ["x", "z", "y", "x", "x"], "c": ["c", "a", "a", "c", "b"], } ) df["c"] = df["c"].astype("category") df["b_x"] = [1.0, 0.0, 0.0, 1.0, 1.0] df["b_y"] = [0.0, 0.0, 1.0, 0.0, 0.0] df["b_z"] = [0.0, 1.0, 0.0, 0.0, 0.0] return df def expected_df_2(): """Expected output for test_unseen_categories_encoded_as_all_zeroes.""" df = pd.DataFrame( { "a": [1, 5, 2, 3, 3], "b": ["w", "w", "z", "y", "x"], "c": ["a", "a", "c", "b", "a"], }, index=[10, 15, 200, 251, 59], ) df["c"] = df["c"].astype("category") df["a_1"] = [1.0, 0.0, 0.0, 0.0, 0.0] df["a_2"] = [0.0, 0.0, 1.0, 0.0, 0.0] df["a_3"] = [0.0, 0.0, 0.0, 1.0, 1.0] df["a_4"] = [0.0, 0.0, 0.0, 0.0, 0.0] df["b_x"] = [0.0, 0.0, 0.0, 0.0, 1.0] df["b_y"] = [0.0, 0.0, 0.0, 1.0, 0.0] df["b_z"] = [0.0, 0.0, 1.0, 0.0, 0.0] df["c_a"] = [1.0, 1.0, 0.0, 0.0, 1.0] df["c_b"] = [0.0, 0.0, 0.0, 1.0, 0.0] df["c_c"] = [0.0, 0.0, 1.0, 0.0, 0.0] return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments( func=OneHotEncodingTransformer.transform, expected_arguments=["self", "X"] ) def test_columns_check_call(self, mocker): """Test the first call to BaseTransformer columns_check.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) expected_call_args = {0: {"args": (d.create_df_1(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "columns_check", expected_call_args ): x.transform(df) def test_check_is_fitted_first_call(self, mocker): """Test the calls to BaseTransformer check_is_fitted.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) expected_call_args = { 0: {"args": (["separator"],), "kwargs": {}}, 1: {"args": (["drop_original"],), "kwargs": {}}, } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df) def test_non_numeric_column_error_1(self): """Test that transform will raise an error if a column to transform has nulls.""" df_train = d.create_df_1() df_test = d.create_df_2() x = OneHotEncodingTransformer(columns=["b"]) x.fit(df_train) with pytest.raises( ValueError, match="column b has nulls - replace before proceeding" ): x.transform(df_test) def test_base_nominal_transformer_transform_called(self, mocker): """Test that BaseNominalTransformer.transform called.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) mocker.patch( "tubular.nominal.BaseNominalTransformer.transform", return_value=d.create_df_1(), ) x.transform(df) assert ( tubular.nominal.BaseNominalTransformer.transform.call_count == 1 ), f"Not enough calls to BaseNominalTransformer.transform -\n Expected: 1\n Actual: {tubular.nominal.BaseNominalTransformer.transform.call_count}" call_args = tubular.nominal.BaseNominalTransformer.transform.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected={}, actual=call_kwargs, msg="kwargs for BaseNominalTransformer.transform in OneHotEncodingTransformer.init", ) expected_pos_args = (x, d.create_df_1()) assert ( len(call_pos_args) == 2 ), f"Unepxected number of positional args in BaseNominalTransformer.transform call -\n Expected: 2\n Actual: {len(call_pos_args)}" h.assert_frame_equal_msg( expected=expected_pos_args[1], actual=call_pos_args[1], msg_tag="X positional arg in BaseNominalTransformer.transform call", ) assert ( expected_pos_args[0] == call_pos_args[0] ), "self positional arg in BaseNominalTransformer.transform call" def test_one_hot_encoder_transform_called(self, mocker): """Test that OneHotEncoder.transform called.""" df = d.create_df_1() x = OneHotEncodingTransformer(columns="b") x.fit(df) mocker.patch("sklearn.preprocessing.OneHotEncoder.transform") x.transform(df) assert ( sklearn.preprocessing.OneHotEncoder.transform.call_count == 1 ), f"Not enough calls to OneHotEncoder.transform -\n Expected: 1\n Actual: {sklearn.preprocessing.OneHotEncoder.transform.call_count}" call_args = sklearn.preprocessing.OneHotEncoder.transform.call_args_list[0] call_pos_args = call_args[0] call_kwargs = call_args[1] h.assert_equal_dispatch( expected={}, actual=call_kwargs, msg="kwargs for OneHotEncodingTransformer.transform in BaseTransformer.init", ) assert ( len(call_pos_args) == 2 ), f"Unepxected number of positional args in OneHotEncodingTransformer.transform call -\n Expected: 2\n Actual: {len(call_pos_args)}" assert ( call_pos_args[0] is x ), f"Unexpected positional arg (self, index 1) in OneHotEncodingTransformer.transform call -\n Expected: self\n Actual: {call_pos_args[0]}" h.assert_frame_equal_msg( expected=d.create_df_1()[["b"]], actual=call_pos_args[1], msg_tag="X positional arg in OneHotEncodingTransformer.transform call", ) @pytest.mark.parametrize( "df_test, expected", h.row_by_row_params(d.create_df_7(), expected_df_1()) + h.index_preserved_params(d.create_df_7(), expected_df_1()), ) def test_expected_output(self, df_test, expected): """Test that OneHotEncodingTransformer.transform encodes the feature correctly. Also tests that OneHotEncodingTransformer.transform does not modify unrelated columns. """ # transformer is fit on the whole dataset separately from the input df to work with the decorators df_train = d.create_df_7() x = OneHotEncodingTransformer(columns="b") x.fit(df_train) df_transformed = x.transform(df_test) h.assert_frame_equal_msg( expected=expected, actual=df_transformed, msg_tag="Unspecified columns changed in transform", ) def test_categories_not_modified(self): """Test that the categories from fit are not changed in transform.""" df_train = d.create_df_1() df_test = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x2 = OneHotEncodingTransformer(columns=["a", "b"], verbose=False) x.fit(df_train) x2.fit(df_train) x.transform(df_test) h.assert_equal_dispatch( expected=list(x2.categories_[0]), actual=list(x.categories_[0]), msg="categories_ (index 0) modified during transform", ) h.assert_equal_dispatch( expected=list(x2.categories_[1]), actual=list(x.categories_[1]), msg="categories_ (index 1) modified during transform", ) def test_renaming_feature_works_as_expected(self): """Test OneHotEncodingTransformer.transform() is renaming features correctly.""" df = d.create_df_7() df = df[["b", "c"]] x = OneHotEncodingTransformer( columns=["b", "c"], separator="|", drop_original=True ) x.fit(df) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=["b|x", "b|y", "b|z", "c|a", "c|b", "c|c"], actual=list(df_transformed.columns.values), msg="renaming columns feature in OneHotEncodingTransformer.transform", ) def test_warning_generated_by_unseen_categories(self): """Test OneHotEncodingTransformer.transform triggers a warning for unseen categories.""" df_train = d.create_df_7() df_test = d.create_df_8() x = OneHotEncodingTransformer(verbose=True) x.fit(df_train) with pytest.warns(Warning): x.transform(df_test) @pytest.mark.parametrize( "df_test, expected", h.row_by_row_params(d.create_df_8(), expected_df_2()) + h.index_preserved_params(d.create_df_8(), expected_df_2()), ) def test_unseen_categories_encoded_as_all_zeroes(self, df_test, expected): """Test OneHotEncodingTransformer.transform encodes unseen categories correctly (all 0s).""" # transformer is fit on the whole dataset separately from the input df to work with the decorators df_train = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b", "c"], verbose=False) x.fit(df_train) df_transformed = x.transform(df_test) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="unseen category rows not encoded as 0s", ) def test_original_columns_dropped_when_specified(self): """Test OneHotEncodingTransformer.transform drops original columns get when specified.""" df = d.create_df_7() x = OneHotEncodingTransformer(columns=["a", "b", "c"], drop_original=True) x.fit(df) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=["a", "b", "c"], actual=[ x for x in df.columns.values if x not in df_transformed.columns.values ], msg="original columns not dropped", ) def test_original_columns_kept_when_specified(self): """Test OneHotEncodingTransformer.transform keeps original columns when specified.""" df = d.create_df_7() x = OneHotEncodingTransformer(drop_original=False) x.fit(df) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=list(set()), actual=list(set(["a", "b", "c"]) - set(df_transformed.columns)), msg="original columns not kept", )