Exemplo n.º 1
0
def test_walk(folder, name):
    path = tests_data_path / folder

    # run
    listing, tree, forbidden = aw.walk(path)

    # for logging purpose only
    if debug:
        asd.save_json_index(path,
                            listing,
                            tree,
                            forbidden,
                            start_path=tests_data_path)

    # load expected
    expected_listing = asd.load_json_listing(path / '.alfeios_expected' /
                                             'listing.json',
                                             start_path=tests_data_path)
    expected_tree = asd.load_json_tree(path / '.alfeios_expected' /
                                       'tree.json',
                                       start_path=tests_data_path)

    # reset mtime for everybody as it is updated with the test itself
    listing = reset_listing_mtime(listing)
    expected_listing = reset_listing_mtime(expected_listing)
    tree = reset_tree_mtime(tree)
    expected_tree = reset_tree_mtime(expected_tree)

    # verify
    assert listing == expected_listing
    assert tree == expected_tree
    assert forbidden == {}
Exemplo n.º 2
0
def index(path, exclusion=None):
    """

    - Index all file and directory contents in a root directory
      including the inside of zip, tar, gztar, bztar and xztar compressed files
    - Contents are identified by their hash-code, path-type (file or directory)
      and size
    - It saves three files in the root directory:
       - A listing.json file that is a dictionary: content -> list of paths
       - A tree.json.file that is a dictionary: path -> content
         (the listing.json dual)
       - A forbidden.json file that lists paths with no access
    - In case of no write access to the root directory, the output files are
      saved in a temp directory of the filesystem with a unique identifier

    Args:
        path (str or pathlib.Path): path to the root directory
        exclusion (set of str): set of directories and files not to consider
    """

    path = pathlib.Path(path)
    if path.is_dir():
        listing, tree, forbidden = _walk_with_progressbar(path,
                                                          exclusion=exclusion)
        asd.save_json_index(path, listing, tree, forbidden)
    else:
        print(colorama.Fore.RED + 'This is not a valid path - exiting',
              file=sys.stderr)
        return
Exemplo n.º 3
0
def test_missing_not_fully_included():
    path = tests_data_path / 'Folder0'

    # run
    listing8, tree8, forbidden8 = aw.walk(tests_data_path / 'Folder8')
    listing0, tree0, forbidden0 = aw.walk(path)
    missing_listing = aw.get_missing(listing8, listing0)

    # for logging purpose only
    if debug:
        asd.save_json_index(path,
                            missing_listing,
                            start_path=tests_data_path,
                            prefix='missing_not_fully_included_')

    # load expected
    expected_missing_listing = asd.load_json_listing(
        path / '.alfeios_expected' / 'listing_missing_from_Folder8.json',
        start_path=tests_data_path)

    # reset mtime for everybody as it is updated with the test itself
    missing_listing = reset_listing_mtime(missing_listing)
    expected_missing_listing = reset_listing_mtime(expected_missing_listing)

    # verify
    assert missing_listing == expected_missing_listing
Exemplo n.º 4
0
def test_duplicate_with_zip():
    # run
    listing, tree, forbidden = aw.walk(tests_data_path)
    duplicate_listing, size_gain = aw.get_duplicate(listing)

    # for logging purpose only
    if debug:
        asd.save_json_index(tests_data_path,
                            duplicate_listing,
                            start_path=tests_data_path,
                            prefix='duplicate_with_zip_')

    # verify
    # here we only check that the root directory content of 4 folders are equal
    # it sould be enough thanks to the Merkle tree property of alfeios listing
    duplicate_root_content = ('4f8c48630a797715e8b86466e0218aa1', 'DIR',
                              3598557)
    duplicate_root_pointers = duplicate_listing[duplicate_root_content]
    # remove mtime for everybody as it is updated with the test itself
    duplicate_root_directories = {
        path
        for path, mtime in duplicate_root_pointers
    }
    assert duplicate_root_directories == {
        tests_data_path / 'Folder0', tests_data_path / 'FolderZipFile',
        tests_data_path / 'FolderZipFolder',
        tests_data_path / 'FolderZipNested'
    }
Exemplo n.º 5
0
def test_duplicate():
    path = tests_data_path / 'Folder0' / 'Folder3'

    # run
    listing, tree, forbidden = aw.walk(path)
    duplicate_listing, size_gain = aw.get_duplicate(listing)

    # for logging purpose only
    if debug:
        asd.save_json_index(path,
                            duplicate_listing,
                            start_path=tests_data_path,
                            prefix='duplicate_')

    # load expected
    expected_duplicate_listing = asd.load_json_listing(
        path / '.alfeios_expected' / 'duplicate_listing.json',
        start_path=tests_data_path)

    # reset mtime for everybody as it is updated with the test itself
    duplicate_listing = reset_listing_mtime(duplicate_listing)
    expected_duplicate_listing = reset_listing_mtime(
        expected_duplicate_listing)

    # verify
    assert duplicate_listing == expected_duplicate_listing
    assert size_gain == 367645
Exemplo n.º 6
0
def test_walk_with_exclusions():
    path = tests_data_path / 'Folder0'
    exclusion = {'Folder3', 'Folder4_1', 'file3.txt', 'groundhog.png'}

    # run
    listing, tree, forbidden = aw.walk(path, exclusion=exclusion)

    # for logging purpose only
    if debug:
        asd.save_json_index(path,
                            listing,
                            tree,
                            forbidden,
                            start_path=tests_data_path,
                            prefix='with_exclusions_')

    # load expected
    expected_listing = asd.load_json_listing(path / '.alfeios_expected' /
                                             'listing_with_exclusions.json',
                                             start_path=tests_data_path)
    expected_tree = asd.load_json_tree(path / '.alfeios_expected' /
                                       'tree_with_exclusions.json',
                                       start_path=tests_data_path)

    # reset mtime for everybody as it is updated with the test itself
    listing = reset_listing_mtime(listing)
    expected_listing = reset_listing_mtime(expected_listing)
    tree = reset_tree_mtime(tree)
    expected_tree = reset_tree_mtime(expected_tree)

    # verify
    assert listing == expected_listing
    assert tree == expected_tree
    assert forbidden == {}
Exemplo n.º 7
0
def duplicate(path, exclusion=None, save_index=False):
    """

    - List all duplicated files and directories in a root directory
    - Save result as a duplicate_listing.json file in the root directory
    - Print the potential space gain
    - If a listing.json file is passed as positional argument instead of a root
      directory, the listing is deserialized from the json file instead of
      being generated, which is significantly quicker but of course less up to
      date
    - Can save the listing.json, tree.json and forbidden.json files in the root
      directory
    - In case of no write access to the root directory, the output files are
      saved in a temp directory of the filesystem with a unique identifier

    Args:
        path (str or pathlib.Path): path to the root directory to parse or the
                                    listing.json file to deserialize
        exclusion (set of str): set of directories and files not to consider
        save_index (bool): flag to save the listing.json, tree.json and
                           forbidden.json files in the root directory
                           default is False
    """

    path = pathlib.Path(path)
    if path.is_file() and path.name.endswith('listing.json'):
        listing = asd.load_json_listing(path)
        directory_path = path.parent.parent
    elif path.is_dir():
        listing, tree, forbidden = _walk_with_progressbar(path,
                                                          exclusion=exclusion)
        directory_path = path
        if save_index:
            asd.save_json_index(directory_path, listing, tree, forbidden)
    else:
        print(colorama.Fore.RED + 'This is not a valid path - exiting',
              file=sys.stderr)
        return

    duplicate_listing, size_gain = aw.get_duplicate(listing)
    if duplicate_listing:
        tag = asd.save_json_index(directory_path,
                                  duplicate_listing,
                                  prefix='duplicate_')
        result_path = directory_path / '.alfeios' / (tag + 'listing.json')
        print(colorama.Fore.GREEN +
              f'You can gain {at.natural_size(size_gain)} '
              f'space by going through {str(result_path)}')
    else:
        print(colorama.Fore.GREEN +
              'Congratulations there is no duplicate here')
Exemplo n.º 8
0
def test_missing_fully_included():
    path = tests_data_path / 'Folder0'

    # run
    listing3, tree3, forbidden3 = aw.walk(path / 'Folder3')
    listing0, tree0, forbidden0 = aw.walk(path)
    missing_listing = aw.get_missing(listing3, listing0)

    # for logging purpose only
    if debug:
        asd.save_json_index(path,
                            missing_listing,
                            start_path=tests_data_path,
                            prefix='missing_fully_included_')

    # verify
    assert missing_listing == {}
Exemplo n.º 9
0
def missing(old_path, new_path, exclusion=None, save_index=False):
    """

    - List all files and directories that are present in an old root directory
      and that are missing in a new one
    - Save result as a missing_listing.json file in the new root directory
    - Print the number of missing files
    - If a listing.json file is passed as positional argument instead of a root
      directory, the corresponding listing is deserialized from the json file
      instead of being generated, which is significantly quicker but of course
      less up to date
    - Can save the listing.json, tree.json and forbidden.json files in the 2
      root directories
    - In case of no write access to the new root directory, the output files
      are saved in a temp directory of the filesystem with a unique identifier

    Args:
        old_path (str or pathlib.Path): path to the old root directory to parse
                                        or the listing.json file to deserialize
        new_path (str or pathlib.Path): path to the new root directory to parse
                                        or the listing.json file to deserialize
        exclusion (set of str): set of directories and files not to consider
        save_index (bool): flag to save the listing.json, tree.json
                           and forbidden.json files in the 2 root directories
                           default is False
    """

    old_path = pathlib.Path(old_path)
    if old_path.is_file() and old_path.name.endswith('listing.json'):
        old_listing = asd.load_json_listing(old_path)
    elif old_path.is_dir():
        old_listing, old_tree, old_forbidden = _walk_with_progressbar(
            old_path, exclusion=exclusion)
        old_directory_path = old_path  # todo understand if necessary ?
        if save_index:
            asd.save_json_index(old_directory_path, old_listing, old_tree,
                                old_forbidden)
    else:
        print(colorama.Fore.RED + 'Old is not a valid path - exiting',
              file=sys.stderr)
        return

    new_path = pathlib.Path(new_path)
    if new_path.is_file() and new_path.name.endswith('listing.json'):
        new_listing = asd.load_json_listing(new_path)
        new_directory_path = new_path.parent.parent
    elif new_path.is_dir():
        new_listing, new_tree, new_forbidden = _walk_with_progressbar(
            new_path, exclusion=exclusion)
        new_directory_path = new_path
        if save_index:
            asd.save_json_index(new_directory_path, new_listing, new_tree,
                                new_forbidden)
    else:
        print(colorama.Fore.RED + 'New is not a valid path - exiting',
              file=sys.stderr)
        return

    missing_listing = aw.get_missing(old_listing, new_listing)
    if missing_listing:
        tag = asd.save_json_index(new_directory_path,
                                  missing_listing,
                                  prefix='missing_')
        result_path = new_directory_path / '.alfeios' / (tag + 'listing.json')
        print(colorama.Fore.GREEN +
              f'There are {len(missing_listing)} Old files missing in New'
              f' - please go through {str(result_path)}')
    else:
        print(colorama.Fore.GREEN +
              'Congratulations Old content is totally included in New')