def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_datediff_test_df(), ): x.transform(df)
def test_attributes_unchanged_by_transform(self): """Test that attributes set in init are unchanged by the transform method.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x2 = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x.transform(df) assert ( x.new_column_name == x2.new_column_name ), "new_column_name changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_name == x2.pd_method_name ), "pd_method_name changed by SeriesDtMethodTransformer.transform" assert (x.columns == x2.columns ), "columns changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_kwargs == x2.pd_method_kwargs ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"
def expected_df_3(): """Expected output of test_expected_output_callable.""" df = d.create_datediff_test_df() df["b_new"] = df["b"].dt.to_period("M") return df
def expected_df_2(): """Expected output of test_expected_output_overwrite.""" df = d.create_datediff_test_df() df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985] return df
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer(new_column_name="a2", pd_method_name="year", column="a") expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df)
def test_super_transform_call(self, mocker): """Test the call to BaseTransformer.transform is as expected.""" df = d.create_datediff_test_df() to_dt = ToDatetimeTransformer(column="a", new_column_name="Y") expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_datediff_test_df(), ): to_dt.transform(df)
class TestTransform(object): """Tests for DateDifferenceTransformer.transform().""" def expected_df_1(): """Expected output for test_expected_output_units_Y.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "Y": [ 26.59340677135105, 19.76757257798535, 0.0, 0.08487511721664373, -0.08236536912690427, -2.915756882984136, -3.082769210410435, 29.999247075573077, ], }) return df def expected_df_2(): """Expected output for test_expected_output_units_M.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "M": [ 319.12088125621256, 237.21087093582423, 0.0, 1.0185014065997249, -0.9883844295228512, -34.989082595809634, -36.993230524925224, 359.9909649068769, ], }) return df def expected_df_3(): """Expected output for test_expected_output_units_D.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "D": [ 9713.042372685186, 7219.957627314815, 0.0, 31.0, -30.083333333333332, -1064.9583333333333, -1125.9583333333333, 10957.0, ], }) return df def expected_df_4(): """Expected output for test_expected_output_units_h.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "h": [ 233113.01694444445, 173278.98305555555, 0.0, 744.0, -722.0, -25559.0, -27023.0, 262968.0, ], }) return df def expected_df_5(): """Expected output for test_expected_output_units_m.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "m": [ 13986781.016666668, 10396738.983333332, 0.0, 44640.0, -43320.0, -1533540.0, -1621380.0, 15778080.0, ], }) return df def expected_df_6(): """Expected output for test_expected_output_units_s.""" df = pd.DataFrame({ "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), datetime.datetime(2000, 3, 19, 12, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 11, 59, 59), datetime.datetime(2018, 10, 10, 10, 59, 59), datetime.datetime(2018, 12, 10, 11, 59, 59), datetime.datetime(1985, 7, 23, 11, 59, 59), ], "b": [ datetime.datetime(2020, 5, 1, 12, 59, 59), datetime.datetime(2019, 12, 25, 11, 58, 58), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 11, 10, 11, 59, 59), datetime.datetime(2018, 9, 10, 9, 59, 59), datetime.datetime(2015, 11, 10, 11, 59, 59), datetime.datetime(2015, 11, 10, 12, 59, 59), datetime.datetime(2015, 7, 23, 11, 59, 59), ], "s": [ 839206861.0, 623804339.0, 0.0, 2678400.0, -2599200.0, -92012400.0, -97282800.0, 946684800.0, ], }) return df def expected_df_7(): """Expected output for test_expected_output_nulls.""" df = pd.DataFrame( { "a": [ datetime.datetime(1993, 9, 27, 11, 58, 58), np.NaN, ], "b": [ np.NaN, datetime.datetime(2019, 12, 25, 11, 58, 58), ], "Y": [ np.NaN, np.NaN, ], }, index=[0, 1], ) return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=DateDifferenceTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_datediff_test_df(), ): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()), ) def test_expected_output_units_Y(self, df, expected): """Test that the output is expected from transform, when units is Y. This tests positive year gaps and negative year gaps. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()), ) def test_expected_output_units_M(self, df, expected): """Test that the output is expected from transform, when units is M. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="M", units="M", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()), ) def test_expected_output_units_D(self, df, expected): """Test that the output is expected from transform, when units is D. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="D", units="D", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_4()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_4()), ) def test_expected_output_units_h(self, df, expected): """Test that the output is expected from transform, when units is h. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="h", units="h", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_5()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_5()), ) def test_expected_output_units_m(self, df, expected): """Test that the output is expected from transform, when units is m. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="m", units="m", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_6()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_6()), ) def test_expected_output_units_s(self, df, expected): """Test that the output is expected from transform, when units is s. This tests positive month gaps, negative month gaps, and missing values. """ x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="s", units="s", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_equal_dispatch( expected=expected, actual=df_transformed, msg="Unexpected values in DateDifferenceYearTransformer.transform", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_nulls_df(), expected_df_7()) + h.index_preserved_params(d.create_datediff_test_nulls_df(), expected_df_7()), ) def test_expected_output_nulls(self, df, expected): """Test that the output is expected from transform, when columns are nulls.""" x = DateDifferenceTransformer( column_lower="a", column_upper="b", new_column_name="Y", units="Y", copy=True, verbose=False, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in DateDifferenceTransformer.transform (nulls)", )
class TestTransform(object): """Tests for SeriesDtMethodTransformer.transform().""" def expected_df_1(): """Expected output of test_expected_output_no_overwrite.""" df = d.create_datediff_test_df() df["a_year"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985] return df def expected_df_2(): """Expected output of test_expected_output_overwrite.""" df = d.create_datediff_test_df() df["a"] = [1993, 2000, 2018, 2018, 2018, 2018, 2018, 1985] return df def expected_df_3(): """Expected output of test_expected_output_callable.""" df = d.create_datediff_test_df() df["b_new"] = df["b"].dt.to_period("M") return df def test_arguments(self): """Test that transform has expected arguments.""" h.test_function_arguments(func=SeriesDtMethodTransformer.transform, expected_arguments=["self", "X"]) def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer(new_column_name="a2", pd_method_name="year", column="a") expected_call_args = { 0: { "args": (d.create_datediff_test_df(), ), "kwargs": {} } } with h.assert_function_call(mocker, tubular.base.BaseTransformer, "transform", expected_call_args): x.transform(df) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_1()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_1()), ) def test_expected_output_no_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when not overwriting the original column.""" x = SeriesDtMethodTransformer( new_column_name="a_year", pd_method_name="year", column="a", pd_method_kwargs={}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with find, not overwriting original column", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_2()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_2()), ) def test_expected_output_overwrite(self, df, expected): """Test a single column output from transform gives expected results, when overwriting the original column.""" x = SeriesDtMethodTransformer( new_column_name="a", pd_method_name="year", column="a", pd_method_kwargs={}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with pad, overwriting original column", ) @pytest.mark.parametrize( "df, expected", h.row_by_row_params(d.create_datediff_test_df(), expected_df_3()) + h.index_preserved_params(d.create_datediff_test_df(), expected_df_3()), ) def test_expected_output_callable(self, df, expected): """Test transform gives expected results, when pd_method_name is a callable.""" x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag= "Unexpected values in SeriesDtMethodTransformer.transform with to_period", ) def test_attributes_unchanged_by_transform(self): """Test that attributes set in init are unchanged by the transform method.""" df = d.create_datediff_test_df() x = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x2 = SeriesDtMethodTransformer( new_column_name="b_new", pd_method_name="to_period", column="b", pd_method_kwargs={"freq": "M"}, ) x.transform(df) assert ( x.new_column_name == x2.new_column_name ), "new_column_name changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_name == x2.pd_method_name ), "pd_method_name changed by SeriesDtMethodTransformer.transform" assert (x.columns == x2.columns ), "columns changed by SeriesDtMethodTransformer.transform" assert ( x.pd_method_kwargs == x2.pd_method_kwargs ), "pd_method_kwargs changed by SeriesDtMethodTransformer.transform"