def test_attributes_unchanged_from_transform(self): """Test that attributes are unchanged after transform is run.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x.fit(df) x2 = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) x2.fit(df) x2.transform(df) assert ( x.capping_values == x2.capping_values ), "capping_values attribute modified in transform" assert ( x._replacement_values == x2._replacement_values ), "_replacement_values attribute modified in transform" assert ( x.weights_column == x2.weights_column ), "weights_column attribute modified in transform" assert x.quantiles == x2.quantiles, "quantiles attribute modified in transform"
def test_check_is_fitted_call_count(self, mocker): """Test there are 2 calls to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) with h.assert_function_call_count( mocker, tubular.base.BaseTransformer, "check_is_fitted", 2 ): x.transform(df)
def test_non_numeric_column_error(self): """Test that transform will raise an error if a column to transform is not numeric.""" df = d.create_df_5() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8], "c": [-1, 8]}) with pytest.raises( TypeError, match=r"The following columns are not numeric in X; \['b', 'c'\]" ): x.transform(df)
def test_quantile_not_fit_error(self): """Test that transform will raise an error if quantiles are specified in init but fit is not run before calling transform.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) with pytest.raises( ValueError, match="capping_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df)
def test_replacement_values_dict_not_set_error(self): """Test that transform will raise an error if _replacement_values is an empty dict.""" df = d.create_df_9() x = CappingTransformer(quantiles={"a": [0.2, 1], "b": [0, 1]}) # manually set attribute to get past the capping_values attribute is an empty dict exception x.capping_values = {"a": [1, 4]} with pytest.raises( ValueError, match="_replacement_values attribute is an empty dict - perhaps the fit method has not been run yet", ): x.transform(df)
def test_learnt_values_not_modified(self): """Test that the replacements from fit are not changed in transform.""" capping_values_dict = {"a": [2, 5], "b": [-1, 8]} df = d.create_df_3() x = CappingTransformer(capping_values_dict) x.transform(df) h.test_object_attributes( obj=x, expected_attributes={"capping_values": capping_values_dict}, msg="Attributes for CappingTransformer set in init", )
def test_check_is_fitted_call_1(self, mocker): """Test the first call to BaseTransformer check_is_fitted in transform.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = { 0: {"args": (["capping_values"],), "kwargs": {}}, 1: {"args": (["_replacement_values"],), "kwargs": {}}, } with h.assert_function_call( mocker, tubular.base.BaseTransformer, "check_is_fitted", expected_call_args ): x.transform(df)
def test_super_transform_called(self, mocker): """Test that BaseTransformer.transform called.""" df = d.create_df_3() x = CappingTransformer(capping_values={"a": [2, 5], "b": [-1, 8]}) expected_call_args = {0: {"args": (d.create_df_3(),), "kwargs": {}}} with h.assert_function_call( mocker, tubular.base.BaseTransformer, "transform", expected_call_args, return_value=d.create_df_3(), ): x.transform(df)
def test_non_cap_column_left_untouched(self, df, expected): """Test that capping is applied only to specific columns, others remain the same.""" x = CappingTransformer(capping_values={"a": [2, 10]}) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform, with columns meant to not be transformed", )
def test_expected_output_min_and_max_combinations(self, df, expected): """Test that capping is applied correctly in transform.""" x = CappingTransformer( capping_values={"a": [2, 5], "b": [None, 7], "c": [0, None]} ) df_transformed = x.transform(df) h.assert_frame_equal_msg( actual=df_transformed, expected=expected, msg_tag="Unexpected values in CappingTransformer.transform", )