Пример #1
0
    def test_returns_empty_list_for_unique_columns(self):
        csv_data = "aaa,bbb\n" "1,1\n" "2,2\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["aaa", "bbb"]

        assert check_duplicate_columns(df, "file") == []
Пример #2
0
    def test_returns_empty_list_if_column_end_with_number_but_is_not_duplicate(
            self):
        csv_data = "taxon taxon,taxon taxon f.1\n" "1,1\n" "2,2\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["taxon taxon", "taxon taxon f.1"]

        assert check_duplicate_columns(df, "file") == []
Пример #3
0
    def test_returns_False_if_space_columns_have_similiar_names_but_different_values(
        self, ):
        csv_data = "a,a \n" "1,3\n" "2,4\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["a", "a "]

        expected = [{
            "bad_column": "a ",
            "filename": "file",
            "same_value": False
        }]
        assert check_duplicate_columns(df, "file") == expected
Пример #4
0
    def test_returns_True_if_space_columns_have_similiar_names_and_same_values(
            self):
        csv_data = "a,a \n" "1,1\n" "2,2\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["a", "a "]

        expected = [{
            "bad_column": "a ",
            "filename": "file",
            "same_value": True
        }]
        assert check_duplicate_columns(df, "file") == expected
Пример #5
0
    def test_returns_False_if_duplicate_columns_have_different_values(self):
        csv_data = "a,a,a\n" "1,3,3\n" "2,4,4\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["a", "a.1", "a.2"]

        expected = [
            {
                "filename": "file",
                "bad_column": "a.1",
                "same_value": False
            },
            {
                "filename": "file",
                "bad_column": "a.2",
                "same_value": False
            },
        ]
        assert check_duplicate_columns(df, "file") == expected
Пример #6
0
    def test_returns_True_if_duplicate_columns_have_same_value(self):
        csv_data = "a,a,a\n" "1,1,1\n" "2,2,2\n"

        df = pd.read_csv(StringIO(csv_data))
        assert list(df.columns) == ["a", "a.1", "a.2"]

        expected = [
            {
                "filename": "file",
                "bad_column": "a.1",
                "same_value": True
            },
            {
                "filename": "file",
                "bad_column": "a.2",
                "same_value": True
            },
        ]
        assert check_duplicate_columns(df, "file") == expected
Пример #7
0
    def test_returns_empty_list_if_space_columns_have_differnt_names(self):
        csv_data = "a,b \n" "1,1\n" "2,2\n"
        df = pd.read_csv(StringIO(csv_data))

        assert list(df.columns) == ["a", "b "]
        assert check_duplicate_columns(df, "file") == []