def compare(old_dirname: str, new_dirname: str, list_identical: bool, list_changed: bool, list_vanished: bool, list_added: bool, list_all: bool) -> None: """ Summarise changes from OLD_DIRNAME to NEW_DIRNAME. Both directories must previously have been scanned (by the 'info' command or otherwise). The names OLD and NEW assume that you're comparing two snapshots of "the same" collection of files, for example two backups on different dates. """ identical = set() changed = set() vanished = set() old_dir = DirInfo.load(old_dirname) new_dir = DirInfo.load(new_dirname) for old_file in old_dir: rel_str = old_file._rel_str try: new_file = new_dir.get_relative(rel_str) except KeyError: vanished.add(rel_str) continue if not isinstance(old_file, FileInfo): raise Exception(f'No hash for {old_file.fullpath}') if not isinstance(new_file, FileInfo): raise Exception(f'No hash for {new_file.fullpath}') if new_file.hash == old_file.hash: identical.add(rel_str) else: changed.add(rel_str) added = ({file._rel_str for file in new_dir}.difference(file._rel_str for file in old_dir)) print('old:', old_dir.file_count) print('new:', new_dir.file_count) print('identical:', len(identical)) print('changed: ', len(changed)) print('vanished:', len(vanished)) print('added:', len(added)) if list_identical or list_all: print('\nidentical files:\n ', end='') print('\n '.join(sorted(identical))) if list_changed or list_all: print('\nchanged files:\n ', end='') print('\n '.join(sorted(changed))) if list_vanished or list_all: print('\nvanished files:\n ', end='') print('\n '.join(sorted(vanished))) if list_added or list_all: print('\nadded files:\n ', end='') print('\n '.join(sorted(added)))
def test_serialisation(tmp_path): """ Test failure modes. Success is tested in check_everything. """ subdir = (tmp_path / 'sub') jsonfile = (tmp_path / 'sub.dirinfo.json') (subdir / 'dir').mkdir(parents=True) dirinfo = DirInfo(subdir) assert dirinfo.save() == os.fspath(jsonfile) # Not exactly a requirement, but for the tests to work we need this. assert jsonfile.exists() # If this fails, then testing that the bad cases fail is kind of pointless. assert DirInfo.load(subdir).base == os.fspath(subdir) # Make sure the encoder isn't accidentally used for something it can't handle. with pytest.raises(TypeError): json.dumps(object(), cls=Encoder) # Make sure bad json file contents are rejected def bad_jsonfile(jsondata): with open(jsonfile, 'w', encoding='utf8') as outfile: json.dump(jsondata, outfile) with pytest.raises(ValueError): DirInfo.load(subdir) bad_jsonfile({'foo': 'bar'}) bad_jsonfile(None) # If the serialised base doesn't match the actual location, then something # is wrong and we should refuse to load it. assert dirinfo.save() == os.fspath(jsonfile) with open(jsonfile, 'r', encoding='utf8') as infile: jsondata = json.load(infile) jsondata['base'] += 'X' bad_jsonfile(jsondata) # If there's no data then load() fails, but cached() succeeds. jsonfile.unlink() with pytest.raises(FileNotFoundError): DirInfo.load(subdir) assert DirInfo.cached(subdir).base == subdir
def check_everything(file_size, subdir, files, dupes, info, no_empty=False, fast=False, is_copy=False): def skip_hash(duplicated_sizes, actual_size): # Optionally some files aren't hashed. return ((no_empty and actual_size == 0) or (fast and actual_size not in duplicated_sizes)) assert info.file_count == len(files) > 0 for rel_path_str, content in files.items(): record = info.get_relative(rel_path_str) assert record.basepath == subdir assert isinstance(record.relpath, Path) assert os.fspath(record.relpath) == rel_path_str assert record.size == len(content) if skip_hash([file_size], record.size): assert not hasattr(record, 'hash'), rel_path_str else: assert record.hash == hashlib.sha256(content).digest() # 10 seconds is arbitrary, but it *shouldn't* be that slow. assert record.when >= datetime.now(tz=timezone.utc) - timedelta( seconds=10) assert sorted(files.keys()) == sorted(file._rel_str for file in info) # Must notice that two files have the same hash dupe_groups = tuple(info.dupe_groups()) assert len(dupe_groups) == len(dupes) sets = tuple( set(os.fspath(stats.relpath) for stats in group) for group in dupe_groups) assert sets == dupes if not is_copy: info.save() clone = DirInfo.load(subdir) check_everything(file_size, subdir, files, dupes, clone, no_empty, fast, is_copy=True)
def bad_jsonfile(jsondata): with open(jsonfile, 'w', encoding='utf8') as outfile: json.dump(jsondata, outfile) with pytest.raises(ValueError): DirInfo.load(subdir)