def test_recurse_files(tmp_path): """ recurse_files() reads the files in a directory and its subdirectories. """ with tempfiles(tmp_path) as path: chnl = file.recurse_files(path.parent) assert isinstance(chnl, Channel) files = set(chnl) assert path in files # Starting point is not included. assert path.parent not in files assert set(file.recurse_files(path)) == set() all_files = set(file.recurse_files(tmp_path)) # Test that it really is just files, not directories assert path.parent not in all_files assert all(f.is_file() for f in all_files if f.name != 'deep') assert all(file.longname(f).is_file() for f in all_files) # Test that filenames starting . are included. assert tmp_path.joinpath('.hidden') in all_files # Test that long paths are traversed assert any(f.name == 'deep' for f in all_files), all_files # pragma: no branch # A containment check without set() could lead to abandoning the # iterator. We'd need more files to confirm that really has caused an # early exit in the worker thread, and anyway the Channel tests look into # that, so let's just make sure the proper code works. with file.recurse_files(tmp_path).cancel_context() as chnl: assert path in chnl
def test_string_param(tmp_path): """ recurse_files() and recurse_filestats() accept a string argument. """ with tempfiles(tmp_path): assert set(file.recurse_files(tmp_path)) == set(file.recurse_files(str(tmp_path))) assert set(file.recurse_filestats(tmp_path)) == set(file.recurse_filestats(str(tmp_path)))
def test_channel_param(tmp_path): """ recurse_files() and recurse_filestats() accept a channel parameter. """ with tempfiles(tmp_path): assert set(file.recurse_files(tmp_path)) == set(file.recurse_files(tmp_path, channel=QueueChannel())) assert set(file.recurse_filestats(tmp_path)) == set(file.recurse_filestats(tmp_path, channel=QueueChannel()))
def test_hasher(count, tmp_hashfiles): """ A hasher is a crew of threads that computes the hashes of the contents of the files we pass to it. """ requests, responses = file.hasher(count) filenames = tuple(file.recurse_files(tmp_hashfiles)) assert len(filenames) == 20 requests.put_many(filenames).end() results = tuple(responses) assert len(results) == len(filenames) assert set(result[0] for result in results) == set(filenames) for filename, digest in results: assert binascii.hexlify(digest).decode('ascii') == filename.name
def test_recurse_filestats(tmp_path): """ recurse_filestats() is like recurse_files(), but returns more data than just the path of each file it finds. """ with tempfiles(tmp_path, file_size=27): chnl = file.recurse_filestats(tmp_path) assert isinstance(chnl, Channel) all_files = set(chnl) # Check the same inclusion/exclusion as recurse_files() assert set(file.fullpath for file in all_files) == set(file.recurse_files(tmp_path)) nonempty = [file for file in all_files if file.fullpath == tmp_path / '.hidden'] assert len(nonempty) == 1 nonempty = nonempty[0] # Check we have all the stats we want assert nonempty.fullpath == tmp_path / '.hidden' assert nonempty.basepath == tmp_path assert nonempty.relpath == Path('.hidden') assert nonempty.size == 27 assert all(f.size == 27 for f in all_files), [(f, f.size) for f in all_files if f.size != 27]