示例#1
0
    def it_can_apply_replace_strings(self, request, columns, derived_columns,
                                     expected_new_columns, expected_inplace):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        pd_str_replace_ = function_mock(request, "pandas.Series.str.replace")
        pd_str_replace_.return_value = pd.Series([0] * 100)
        replace_substrings = fop.ReplaceSubstrings(
            columns=columns,
            derived_columns=derived_columns,
            replacement_map={"a": "b"})

        replaced_dataset = replace_substrings._apply(dataset)

        assert replaced_dataset is not None
        assert replaced_dataset is not dataset
        assert isinstance(replaced_dataset, Dataset)
        for col in expected_new_columns:
            assert col in replaced_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(pd_str_replace_.call_args_list) == len(columns)
        pd.testing.assert_series_equal(
            pd_str_replace_.call_args_list[0][0][0][:], df[columns[0]])
        assert pd_str_replace_.call_args_list[0][1] == {
            "pat": "a",
            "repl": "b",
        }
示例#2
0
def test_replace_substrings(csv, columns, derived_columns, expected_csv):
    dataset = Dataset(data_file=csv)
    expected_df = load_expectation(expected_csv, type_="csv")
    replace_substrings = fop.ReplaceSubstrings(columns=columns,
                                               derived_columns=derived_columns,
                                               replacement_map={"r": "c"})

    replaced_dataset = replace_substrings(dataset)

    pd.testing.assert_frame_equal(replaced_dataset.data, expected_df)
示例#3
0
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.ReplaceSubstrings)

        replace_substrings = fop.ReplaceSubstrings(columns=["col0"],
                                                   derived_columns=["col1"],
                                                   replacement_map={"a": "b"})

        _init_.assert_called_once_with(ANY,
                                       columns=["col0"],
                                       derived_columns=["col1"],
                                       replacement_map={"a": "b"})
        assert isinstance(replace_substrings, fop.ReplaceSubstrings)
示例#4
0
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal
示例#5
0
    def it_knows_how_to_validate_replacement_map(self, request,
                                                 replacement_map):
        initializer_mock(request, fop.ReplaceSubstrings)
        replace_strings = fop.ReplaceSubstrings(
            columns=["col0"],
            derived_columns=["col1"],
            replacement_map=replacement_map)

        with pytest.raises(TypeError) as err:
            replace_strings._validate_replacement_map(replacement_map)

        assert isinstance(err.value, TypeError)
        assert (
            "replacement_map must be a non-empty dict mapping string keys to string "
            "values" == str(err.value))
示例#6
0
    def it_knows_its_str(self):
        feat_op = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        _str = str(feat_op)

        assert type(_str) == str
        assert _str == (
            "ReplaceSubstrings(\n\tcolumns=['exam_num_col_0'],\n\treplacement_map="
            "{'a': 'b', 'c': 'd'},\n\tderived_columns=['replaced_exam_num_col_0'],\n)"
        )
示例#7
0
    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.ReplaceSubstrings,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.ReplaceSubstrings,
            "_validate_single_element_derived_columns")
        validate_replacement_map = method_mock(request, fop.ReplaceSubstrings,
                                               "_validate_replacement_map")

        replace_strings = fop.ReplaceSubstrings(columns=["col0"],
                                                derived_columns=["col1"],
                                                replacement_map={"a": "b"})

        validate_columns_.assert_called_once_with(replace_strings, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            replace_strings, ["col1"])
        validate_replacement_map.assert_called_once_with(
            replace_strings, {"a": "b"})
示例#8
0
    def it_can_replace_with_template_call(self, request):
        _apply_ = method_mock(request, fop.ReplaceSubstrings, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        replace_substrings = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["exam_str_col_0"],
            replacement_map={"a": "b"},
        )

        replaced_dataset = replace_substrings(dataset_in)

        _apply_.assert_called_once_with(replace_substrings, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               replace_substrings)
        assert replaced_dataset is dataset_out
示例#9
0
class DescribeReplaceSubstrings:
    def it_construct_from_args(self, request):
        _init_ = initializer_mock(request, fop.ReplaceSubstrings)

        replace_substrings = fop.ReplaceSubstrings(columns=["col0"],
                                                   derived_columns=["col1"],
                                                   replacement_map={"a": "b"})

        _init_.assert_called_once_with(ANY,
                                       columns=["col0"],
                                       derived_columns=["col1"],
                                       replacement_map={"a": "b"})
        assert isinstance(replace_substrings, fop.ReplaceSubstrings)

    def and_it_validates_its_arguments(self, request):
        validate_columns_ = method_mock(request, fop.ReplaceSubstrings,
                                        "_validate_single_element_columns")
        validate_derived_columns_ = method_mock(
            request, fop.ReplaceSubstrings,
            "_validate_single_element_derived_columns")
        validate_replacement_map = method_mock(request, fop.ReplaceSubstrings,
                                               "_validate_replacement_map")

        replace_strings = fop.ReplaceSubstrings(columns=["col0"],
                                                derived_columns=["col1"],
                                                replacement_map={"a": "b"})

        validate_columns_.assert_called_once_with(replace_strings, ["col0"])
        validate_derived_columns_.assert_called_once_with(
            replace_strings, ["col1"])
        validate_replacement_map.assert_called_once_with(
            replace_strings, {"a": "b"})

    @pytest.mark.parametrize(
        "replacement_map",
        [([]), ({}), ({
            "a": 1
        }), ({
            1: "a"
        })],
    )
    def it_knows_how_to_validate_replacement_map(self, request,
                                                 replacement_map):
        initializer_mock(request, fop.ReplaceSubstrings)
        replace_strings = fop.ReplaceSubstrings(
            columns=["col0"],
            derived_columns=["col1"],
            replacement_map=replacement_map)

        with pytest.raises(TypeError) as err:
            replace_strings._validate_replacement_map(replacement_map)

        assert isinstance(err.value, TypeError)
        assert (
            "replacement_map must be a non-empty dict mapping string keys to string "
            "values" == str(err.value))

    @pytest.mark.parametrize(
        "columns, derived_columns, expected_new_columns, expected_inplace",
        [
            (["exam_str_col_0"], ["col1"], ["col1"], False),
            (["exam_str_col_0"], None, [], True),
        ],
    )
    def it_can_apply_replace_strings(self, request, columns, derived_columns,
                                     expected_new_columns, expected_inplace):
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset = Dataset(data_file="fake/path0")
        pd_str_replace_ = function_mock(request, "pandas.Series.str.replace")
        pd_str_replace_.return_value = pd.Series([0] * 100)
        replace_substrings = fop.ReplaceSubstrings(
            columns=columns,
            derived_columns=derived_columns,
            replacement_map={"a": "b"})

        replaced_dataset = replace_substrings._apply(dataset)

        assert replaced_dataset is not None
        assert replaced_dataset is not dataset
        assert isinstance(replaced_dataset, Dataset)
        for col in expected_new_columns:
            assert col in replaced_dataset.data.columns
        get_df_from_csv_.assert_called_once_with("fake/path0")
        assert len(pd_str_replace_.call_args_list) == len(columns)
        pd.testing.assert_series_equal(
            pd_str_replace_.call_args_list[0][0][0][:], df[columns[0]])
        assert pd_str_replace_.call_args_list[0][1] == {
            "pat": "a",
            "repl": "b",
        }

    def it_can_replace_with_template_call(self, request):
        _apply_ = method_mock(request, fop.ReplaceSubstrings, "_apply")
        track_history_ = method_mock(request, Dataset, "track_history")
        df = DataFrameMock.df_generic(sample_size=100)
        get_df_from_csv_ = function_mock(request,
                                         "trousse.dataset.get_df_from_csv")
        get_df_from_csv_.return_value = df
        dataset_in = Dataset(data_file="fake/path0")
        dataset_out = Dataset(data_file="fake/path0")
        _apply_.return_value = dataset_out
        replace_substrings = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["exam_str_col_0"],
            replacement_map={"a": "b"},
        )

        replaced_dataset = replace_substrings(dataset_in)

        _apply_.assert_called_once_with(replace_substrings, dataset_in)
        track_history_.assert_called_once_with(replaced_dataset,
                                               replace_substrings)
        assert replaced_dataset is dataset_out

    @pytest.mark.parametrize(
        "other, expected_equal",
        [
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                True,
            ),
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "c": "d",
                        "a": "b"
                    },
                ),
                True,
            ),
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_1"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                False,
            ),
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_1"],
                    replacement_map={
                        "a": "b",
                        "c": "d"
                    },
                ),
                False,
            ),
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "a": "b",
                    },
                ),
                False,
            ),
            (
                fop.ReplaceSubstrings(
                    columns=["exam_num_col_0"],
                    derived_columns=["replaced_exam_num_col_0"],
                    replacement_map={
                        "c": "b",
                    },
                ),
                False,
            ),
            (dict(), False),
        ],
    )
    def it_knows_if_equal(self, other, expected_equal):
        feat_op = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        equal = feat_op == other

        assert type(equal) == bool
        assert equal == expected_equal

    def it_knows_its_str(self):
        feat_op = fop.ReplaceSubstrings(
            columns=["exam_num_col_0"],
            derived_columns=["replaced_exam_num_col_0"],
            replacement_map={
                "a": "b",
                "c": "d"
            },
        )

        _str = str(feat_op)

        assert type(_str) == str
        assert _str == (
            "ReplaceSubstrings(\n\tcolumns=['exam_num_col_0'],\n\treplacement_map="
            "{'a': 'b', 'c': 'd'},\n\tderived_columns=['replaced_exam_num_col_0'],\n)"
        )