def verify_backup(rootdir, mdata, num_threads=None): # Results changed = [] missing = [] unexpected = [] scan_errors = [] # First, we get all the relevant paths scan_result = scan_backup(rootdir) scan_errors = scan_result.errors unexpected = scan_result.ignored # Build a dictionary with all original metdata original_metadata = {} for p in itertools.chain(mdata.files, mdata.symlinks, mdata.directories): original_metadata[p.name] = p # Build a dictionary with all current metadata digest_map = hashdeep.compute_digests(rootdir, scan_result.files, num_threads) uid_map = utils.get_uid_name_map() gid_map = utils.get_gid_name_map() current_metadata = {} for f in scan_result.files: current_metadata[f] = metadata.get_file_metadata(rootdir, f, digest_map, uid_map, gid_map) for s in scan_result.symlinks: current_metadata[s] = metadata.get_symlink_metadata(rootdir, s, uid_map, gid_map) for d in scan_result.directories: current_metadata[d] = metadata.get_directory_metadata(rootdir, d, uid_map, gid_map) # Find missing and unexpected files all_current_paths = set(scan_result.files + scan_result.symlinks + scan_result.directories) all_original_paths = set(original_metadata.keys()) missing = sorted(list(all_original_paths - all_current_paths)) unexpected.extend(list(all_current_paths - all_original_paths)) unexpected.sort() # Find changed files for p in all_current_paths.intersection(all_original_paths): if not lenient_match(current_metadata[p], original_metadata[p]): changed.append(p) # Return the final result return VerificationResult(changed, missing, unexpected, scan_errors)
def get_default_metadata(rootdir, path, uid_map=None, gid_map=None): """Returns a dictonary, whose keys are default_metadata (as defined above) and entries are the results of the corresponding os.stat call on native_path.""" if uid_map is None: uid_map = utils.get_uid_name_map() if gid_map is None: gid_map = utils.get_gid_name_map() native_path = utils.build_native_path(rootdir, path) stat = os.lstat(native_path) metadata = {} metadata['name'] = os.path.basename(path) # TODO: use strings instead of UNIX epoch seconds # TODO: fields we are currently not using: st_mode, st_ino, st_dev, st_nlink # - is this intentional? metadata['mtime'] = stat.st_mtime metadata['user'] = uid_map.get(stat.st_uid, str(stat.st_uid)) metadata['group'] = gid_map.get(stat.st_gid, str(stat.st_gid)) metadata['permissions'] = '%04o' % stat.st_mode return metadata
new_node = get_symlink_node(rootdir, linkname, uid_map, gid_map) dir_node.children[basename] = new_node # Step 3: insert the files into the directory tree for filename in files: dirname, basename = os.path.split(filename) # Navigate to the right part in the directory tree dir_node = find_directory_in_tree(root_node, dirname) # Insert the file node new_node = get_file_node(rootdir, filename, digest_map, uid_map, gid_map) dir_node.children[basename] = new_node return root_node if __name__ == "__main__": import collections import pattern import StringIO rootdir = '/home/madars/Documents/projects/go-backup' patterns_file = StringIO.StringIO("+ /\n") patterns = pattern.parse_pattern_file(patterns_file) res = pattern.assemble_paths(rootdir, patterns) digest_map = collections.defaultdict(lambda : 'demo') uid_map = utils.get_uid_name_map() gid_map = utils.get_gid_name_map() tree = get_metadata_tree(rootdir, res.filenames, res.symlinks, res.directories, digest_map, uid_map, gid_map) import json print json.dumps(tree, indent=2)
def test_get_uid_name_map_consistency(): # TODO: maybe mock out in future? uid_name_map = utils.get_uid_name_map() for (uid, name) in uid_name_map.iteritems(): assert pwd.getpwuid(uid).pw_name == name assert pwd.getpwnam(name).pw_uid == uid