Пример #1
0
def test_duplicate_folders():
    checker = FileHashChecker()

    assert checker.directories is not None
    assert checker.directories.empty

    checker.iterate("test_data/files2/")
    folder_paths = checker.directories.index.to_list()

    assert pathlib.Path("test_data/files2/A") in folder_paths
    assert pathlib.Path("test_data/files2/Subfolder") in folder_paths
    assert pathlib.Path("test_data/files2/Subfolder/A") in folder_paths
    assert len(checker.directories) == 8

    assert (checker.df.loc[pathlib.Path("test_data/files2/Subfolder"),
                           "number_no_dir_files"] == 1
            ), "Should only count the actual files"
    assert (checker.directories.loc[pathlib.Path("test_data/files2/Subfolder"),
                                    "number_files"] == 3
            ), "Should contain 2 folders and 1 file"

    assert (checker.directories.loc[pathlib.Path("test_data/files2/A"),
                                    "number_no_dir_files"] == 3)
    assert (checker.directories.loc[pathlib.Path("test_data/files2/A"),
                                    "number_files"] == 3)

    assert (len(checker.directories[checker.directories.number_no_dir_files !=
                                    checker.directories.number_files]) == 1
            ), "Only 1 path with subfolders"
Пример #2
0
def test_duplicate_directories():
    checker = FileHashChecker()
    checker.iterate("test_data/files2/")

    assert set(checker.duplicate_directories.index.to_list()) == {
        pathlib.Path("test_data/files2/A"),
        pathlib.Path("test_data/files2/A_copy"),
        pathlib.Path("test_data/files2/Subfolder/A"),
    }
Пример #3
0
def main():
    args = parse_arguments()

    print_sauber()
    print_usage_if_no_args(args)

    checker = FileHashChecker()
    checker.iterate(pathlib.Path(args.path), debug=args.debug)

    handle_duplicate_arguments(args, checker)
    handle_find_arguments(args, checker)
Пример #4
0
def test_export_import_data():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")
    checker.export_data("test_data/data.csv")
    assert len(checker.files) == 21

    checker2 = FileHashChecker()
    assert len(checker2.files) == 0

    checker2.import_data("test_data/data.csv")
    assert len(checker2.files) == len(checker.files)
Пример #5
0
def test_duplicate_documents():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")
    filenames = set(checker.duplicate_documents.name.unique())
    assert filenames == {
        "document (original).pdf",
        "document (copy).pdf",
        "document (recreated).pdf",
        "lorem_ipsum_1000.txt",
    }

    assert "document (slightly different).pdf" not in filenames
    assert len(checker.duplicate_documents) == 6
Пример #6
0
def test_duplicate_music():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")

    assert (pathlib.Path("test_data/files/base/mp3/Right_Here_Beside_You.mp3")
            in checker.duplicate_music.index)
    assert (pathlib.Path(
        "test_data/files/duplicates/mp3/Right_Here_Beside_You.mp3")
            in checker.duplicate_music.index)
    assert (pathlib.Path(
        "test_data/files/partial duplicates/music/Right Here Beside You.mp3")
            in checker.duplicate_music.index)
    assert len(checker.duplicate_music.index) == 3, "Only 3 songs in test data"
Пример #7
0
def test_find():
    checker = FileHashChecker()
    checker.iterate("test_data/files")

    music = checker.find_music
    assert music.suffix.unique() == [".mp3"]

    videos = checker.find_videos
    assert videos.empty

    documents = checker.find_documents
    assert sorted(documents.suffix.unique()) == [".pdf", ".txt"]

    images = checker.find_images
    assert images.suffix.unique() == [".jpg"]
Пример #8
0
def test_directory_hashes():
    checker = FileHashChecker()
    checker.iterate("test_data/files2/")

    assert (checker.df.loc[pathlib.Path("test_data/files2/A"),
                           "hash"] == "a8f963f199e83660cd8ff21ac94d440a")
    assert (checker.df.loc[pathlib.Path("test_data/files2/Subfolder/A"),
                           "hash"] == checker.df.
            loc[pathlib.Path("test_data/files2/A"),
                "hash"]), "Excact directory copies should have exact hash"

    assert (checker.df.loc[pathlib.Path("test_data/files2/Subfolder/Empty"),
                           "hash"] == "d41d8cd98f00b204e9800998ecf8427e"
            ), "Empty directories should have empty hash"

    assert not pandas.isnull(
        checker.df.loc[pathlib.Path("test_data/files2/Subfolder"), "hash"])
Пример #9
0
def test_duplicates():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")

    expected_duplicates = [
        "test_data/files/base/jpeg/clouds.jpg",
        "test_data/files/base/jpeg/curved_road.jpg",
        "test_data/files/base/pdf/document (original).pdf",
        "test_data/files/base/txt/lorem_ipsum_1000.txt",
        "test_data/files/base/mp3/Right_Here_Beside_You.mp3",
        "test_data/files/duplicates/a/very/deep/path/document (copy).pdf",
        "test_data/files/duplicates/jpeg/clouds.jpg",
        "test_data/files/duplicates/jpeg/curved_road.jpg",
        "test_data/files/duplicates/pdf/document (original).pdf",
        "test_data/files/duplicates/pdf/document (recreated).pdf",
        "test_data/files/duplicates/txt/lorem_ipsum_1000.txt",
        "test_data/files/duplicates/mp3/Right_Here_Beside_You.mp3",
        "test_data/files/partial duplicates/jpeg/clouds.jpg",
        "test_data/files/partial duplicates/jpeg/curved_road.jpg",
        "test_data/files/partial duplicates/a new folder/unique_file",
        "test_data/files/partial duplicates/music/Right Here Beside You.mp3",
        "test_data/files/base/empty_file",
    ]

    expected_no_duplicates = [
        "test_data/files/base/jpeg/asphalt.jpg",
        "test_data/files/partial duplicates/jpeg/roses.jpg",
        "test_data/files/duplicates/pdf/document (slightly different).pdf",
        "test_data/files/partial duplicates/txt/lorem_ipsum_999.txt",
    ]

    expected_duplicates = {pathlib.Path(p) for p in expected_duplicates}
    expected_no_duplicates = {pathlib.Path(p) for p in expected_no_duplicates}

    for file in expected_no_duplicates:
        assert file.exists()

    actual_duplicates = set(checker.duplicate_files.index.to_list())
    assert actual_duplicates == expected_duplicates
    assert actual_duplicates - expected_no_duplicates == actual_duplicates
Пример #10
0
def test_duplicate_images():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")

    assert len(checker.duplicate_images) == 2 * 3
Пример #11
0
def test_duplicate_videos():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")

    assert len(checker.duplicate_videos) == 0
Пример #12
0
def test_debug_messages():
    checker = FileHashChecker()
    checker.iterate("test_data/files2", debug=True)
    checker.iterate("test_data/files2/Empty", debug=True)
Пример #13
0
def test_no_files():
    checker = FileHashChecker()
    checker.iterate("test_data/files2/Empty")
Пример #14
0
def test_no_folders():
    checker = FileHashChecker()
    checker.iterate("test_data/files2/A")
Пример #15
0
def test_iterate():
    checker = FileHashChecker()
    checker.iterate("test_data/files/")
    assert len(checker.files) == 21