def test_walk(folder, name): path = tests_data_path / folder # run listing, tree, forbidden = aw.walk(path) # for logging purpose only if debug: asd.save_json_index(path, listing, tree, forbidden, start_path=tests_data_path) # load expected expected_listing = asd.load_json_listing(path / '.alfeios_expected' / 'listing.json', start_path=tests_data_path) expected_tree = asd.load_json_tree(path / '.alfeios_expected' / 'tree.json', start_path=tests_data_path) # reset mtime for everybody as it is updated with the test itself listing = reset_listing_mtime(listing) expected_listing = reset_listing_mtime(expected_listing) tree = reset_tree_mtime(tree) expected_tree = reset_tree_mtime(expected_tree) # verify assert listing == expected_listing assert tree == expected_tree assert forbidden == {}
def index(path, exclusion=None): """ - Index all file and directory contents in a root directory including the inside of zip, tar, gztar, bztar and xztar compressed files - Contents are identified by their hash-code, path-type (file or directory) and size - It saves three files in the root directory: - A listing.json file that is a dictionary: content -> list of paths - A tree.json.file that is a dictionary: path -> content (the listing.json dual) - A forbidden.json file that lists paths with no access - In case of no write access to the root directory, the output files are saved in a temp directory of the filesystem with a unique identifier Args: path (str or pathlib.Path): path to the root directory exclusion (set of str): set of directories and files not to consider """ path = pathlib.Path(path) if path.is_dir(): listing, tree, forbidden = _walk_with_progressbar(path, exclusion=exclusion) asd.save_json_index(path, listing, tree, forbidden) else: print(colorama.Fore.RED + 'This is not a valid path - exiting', file=sys.stderr) return
def test_missing_not_fully_included(): path = tests_data_path / 'Folder0' # run listing8, tree8, forbidden8 = aw.walk(tests_data_path / 'Folder8') listing0, tree0, forbidden0 = aw.walk(path) missing_listing = aw.get_missing(listing8, listing0) # for logging purpose only if debug: asd.save_json_index(path, missing_listing, start_path=tests_data_path, prefix='missing_not_fully_included_') # load expected expected_missing_listing = asd.load_json_listing( path / '.alfeios_expected' / 'listing_missing_from_Folder8.json', start_path=tests_data_path) # reset mtime for everybody as it is updated with the test itself missing_listing = reset_listing_mtime(missing_listing) expected_missing_listing = reset_listing_mtime(expected_missing_listing) # verify assert missing_listing == expected_missing_listing
def test_duplicate_with_zip(): # run listing, tree, forbidden = aw.walk(tests_data_path) duplicate_listing, size_gain = aw.get_duplicate(listing) # for logging purpose only if debug: asd.save_json_index(tests_data_path, duplicate_listing, start_path=tests_data_path, prefix='duplicate_with_zip_') # verify # here we only check that the root directory content of 4 folders are equal # it sould be enough thanks to the Merkle tree property of alfeios listing duplicate_root_content = ('4f8c48630a797715e8b86466e0218aa1', 'DIR', 3598557) duplicate_root_pointers = duplicate_listing[duplicate_root_content] # remove mtime for everybody as it is updated with the test itself duplicate_root_directories = { path for path, mtime in duplicate_root_pointers } assert duplicate_root_directories == { tests_data_path / 'Folder0', tests_data_path / 'FolderZipFile', tests_data_path / 'FolderZipFolder', tests_data_path / 'FolderZipNested' }
def test_duplicate(): path = tests_data_path / 'Folder0' / 'Folder3' # run listing, tree, forbidden = aw.walk(path) duplicate_listing, size_gain = aw.get_duplicate(listing) # for logging purpose only if debug: asd.save_json_index(path, duplicate_listing, start_path=tests_data_path, prefix='duplicate_') # load expected expected_duplicate_listing = asd.load_json_listing( path / '.alfeios_expected' / 'duplicate_listing.json', start_path=tests_data_path) # reset mtime for everybody as it is updated with the test itself duplicate_listing = reset_listing_mtime(duplicate_listing) expected_duplicate_listing = reset_listing_mtime( expected_duplicate_listing) # verify assert duplicate_listing == expected_duplicate_listing assert size_gain == 367645
def test_walk_with_exclusions(): path = tests_data_path / 'Folder0' exclusion = {'Folder3', 'Folder4_1', 'file3.txt', 'groundhog.png'} # run listing, tree, forbidden = aw.walk(path, exclusion=exclusion) # for logging purpose only if debug: asd.save_json_index(path, listing, tree, forbidden, start_path=tests_data_path, prefix='with_exclusions_') # load expected expected_listing = asd.load_json_listing(path / '.alfeios_expected' / 'listing_with_exclusions.json', start_path=tests_data_path) expected_tree = asd.load_json_tree(path / '.alfeios_expected' / 'tree_with_exclusions.json', start_path=tests_data_path) # reset mtime for everybody as it is updated with the test itself listing = reset_listing_mtime(listing) expected_listing = reset_listing_mtime(expected_listing) tree = reset_tree_mtime(tree) expected_tree = reset_tree_mtime(expected_tree) # verify assert listing == expected_listing assert tree == expected_tree assert forbidden == {}
def duplicate(path, exclusion=None, save_index=False): """ - List all duplicated files and directories in a root directory - Save result as a duplicate_listing.json file in the root directory - Print the potential space gain - If a listing.json file is passed as positional argument instead of a root directory, the listing is deserialized from the json file instead of being generated, which is significantly quicker but of course less up to date - Can save the listing.json, tree.json and forbidden.json files in the root directory - In case of no write access to the root directory, the output files are saved in a temp directory of the filesystem with a unique identifier Args: path (str or pathlib.Path): path to the root directory to parse or the listing.json file to deserialize exclusion (set of str): set of directories and files not to consider save_index (bool): flag to save the listing.json, tree.json and forbidden.json files in the root directory default is False """ path = pathlib.Path(path) if path.is_file() and path.name.endswith('listing.json'): listing = asd.load_json_listing(path) directory_path = path.parent.parent elif path.is_dir(): listing, tree, forbidden = _walk_with_progressbar(path, exclusion=exclusion) directory_path = path if save_index: asd.save_json_index(directory_path, listing, tree, forbidden) else: print(colorama.Fore.RED + 'This is not a valid path - exiting', file=sys.stderr) return duplicate_listing, size_gain = aw.get_duplicate(listing) if duplicate_listing: tag = asd.save_json_index(directory_path, duplicate_listing, prefix='duplicate_') result_path = directory_path / '.alfeios' / (tag + 'listing.json') print(colorama.Fore.GREEN + f'You can gain {at.natural_size(size_gain)} ' f'space by going through {str(result_path)}') else: print(colorama.Fore.GREEN + 'Congratulations there is no duplicate here')
def test_missing_fully_included(): path = tests_data_path / 'Folder0' # run listing3, tree3, forbidden3 = aw.walk(path / 'Folder3') listing0, tree0, forbidden0 = aw.walk(path) missing_listing = aw.get_missing(listing3, listing0) # for logging purpose only if debug: asd.save_json_index(path, missing_listing, start_path=tests_data_path, prefix='missing_fully_included_') # verify assert missing_listing == {}
def missing(old_path, new_path, exclusion=None, save_index=False): """ - List all files and directories that are present in an old root directory and that are missing in a new one - Save result as a missing_listing.json file in the new root directory - Print the number of missing files - If a listing.json file is passed as positional argument instead of a root directory, the corresponding listing is deserialized from the json file instead of being generated, which is significantly quicker but of course less up to date - Can save the listing.json, tree.json and forbidden.json files in the 2 root directories - In case of no write access to the new root directory, the output files are saved in a temp directory of the filesystem with a unique identifier Args: old_path (str or pathlib.Path): path to the old root directory to parse or the listing.json file to deserialize new_path (str or pathlib.Path): path to the new root directory to parse or the listing.json file to deserialize exclusion (set of str): set of directories and files not to consider save_index (bool): flag to save the listing.json, tree.json and forbidden.json files in the 2 root directories default is False """ old_path = pathlib.Path(old_path) if old_path.is_file() and old_path.name.endswith('listing.json'): old_listing = asd.load_json_listing(old_path) elif old_path.is_dir(): old_listing, old_tree, old_forbidden = _walk_with_progressbar( old_path, exclusion=exclusion) old_directory_path = old_path # todo understand if necessary ? if save_index: asd.save_json_index(old_directory_path, old_listing, old_tree, old_forbidden) else: print(colorama.Fore.RED + 'Old is not a valid path - exiting', file=sys.stderr) return new_path = pathlib.Path(new_path) if new_path.is_file() and new_path.name.endswith('listing.json'): new_listing = asd.load_json_listing(new_path) new_directory_path = new_path.parent.parent elif new_path.is_dir(): new_listing, new_tree, new_forbidden = _walk_with_progressbar( new_path, exclusion=exclusion) new_directory_path = new_path if save_index: asd.save_json_index(new_directory_path, new_listing, new_tree, new_forbidden) else: print(colorama.Fore.RED + 'New is not a valid path - exiting', file=sys.stderr) return missing_listing = aw.get_missing(old_listing, new_listing) if missing_listing: tag = asd.save_json_index(new_directory_path, missing_listing, prefix='missing_') result_path = new_directory_path / '.alfeios' / (tag + 'listing.json') print(colorama.Fore.GREEN + f'There are {len(missing_listing)} Old files missing in New' f' - please go through {str(result_path)}') else: print(colorama.Fore.GREEN + 'Congratulations Old content is totally included in New')