def test_no_files() -> None: ''' Test for empty matches. They work, but should result in warning ''' assert get_files('') == () # todo test these for warnings? assert get_files([]) == () assert get_files('bad*glob') == ()
def test_explicit_glob() -> None: ''' You can pass a glob to restrict the extensions ''' create('/tmp/hpi_test/file_3.zip') create('/tmp/hpi_test/file_2.zip') create('/tmp/hpi_test/ignoreme') create('/tmp/hpi_test/file.zip') # todo walrus operator would be great here... expected = ( Path('/tmp/hpi_test/file_2.zip'), Path('/tmp/hpi_test/file_3.zip'), ) assert get_files('/tmp/hpi_test', 'file_*.zip') == expected "named argument should work too" assert get_files('/tmp/hpi_test', glob='file_*.zip') == expected
def activity() -> Iterator[Activity]: emitted: Set[str] = set() for exp in get_files(config.export_path): with match_structure( exp, expected=EXPECTED_DISCORD_STRUCTURE) as discord_export: for activity_dir in [d / "activity" for d in discord_export]: for act in parse_activity(activity_dir): if act.event_id in emitted: continue yield act emitted.add(act.event_id)
def test_single_file() -> None: ''' Regular file path is just returned as is. ''' "Exception if it doesn't exist" with pytest.raises(Exception): get_files('/tmp/hpi_test/file.ext') create('/tmp/hpi_test/file.ext') ''' Couple of things: 1. Return type is a tuple, it's friendlier for hashing/caching 2. It always return pathlib.Path instead of plain strings ''' assert get_files('/tmp/hpi_test/file.ext') == ( Path('/tmp/hpi_test/file.ext'), ) "if the path starts with ~, we expand it" if not windows: # windows dowsn't have bashrc.. ugh assert get_files('~/.bashrc') == (Path('~').expanduser() / '.bashrc', )
def _get_files(x, *args, **kwargs): import my.core.common as C def repl(x): if isinstance(x, str): return x.replace('/tmp', TMP) elif isinstance(x, Path): assert x.parts[:2] == (os.sep, 'tmp') # meh return Path(TMP) / Path(*x.parts[2:]) else: # iterable? return [repl(i) for i in x] x = repl(x) res = C.get_files(x, *args, **kwargs) return tuple(Path(str(i).replace(TMP, '/tmp')) for i in res) # hack back for asserts..
def messages() -> Iterator[Message]: emitted: Set[int] = set() for exp in get_files(config.export_path): with match_structure( exp, expected=EXPECTED_DISCORD_STRUCTURE) as discord_export: for message_dir in [d / "messages" for d in discord_export]: for msg in parse_messages(message_dir): if msg.message_id in emitted: continue yield Message( message_id=msg.message_id, timestamp=msg.timestamp, channel=msg.channel, content=_remove_link_suppression(msg.content), attachments=msg.attachments, ) emitted.add(msg.message_id)
def test_implicit_glob() -> None: ''' Asterisc in the path results in globing too. ''' # todo hopefully that makes sense? dunno why would anyone actually rely on asteriscs in names.. # this is very convenient in configs, so people don't have to use some special types create('/tmp/hpi_test/123/') create('/tmp/hpi_test/123/dummy') create('/tmp/hpi_test/123/file.zip') create('/tmp/hpi_test/456/') create('/tmp/hpi_test/456/dummy') create('/tmp/hpi_test/456/file.zip') assert get_files(['/tmp/hpi_test/*/*.zip']) == ( Path('/tmp/hpi_test/123/file.zip'), Path('/tmp/hpi_test/456/file.zip'), )
def test_multiple_files() -> None: ''' If you pass a directory/multiple directories, it flattens the contents ''' create('/tmp/hpi_test/dir1/') create('/tmp/hpi_test/dir1/zzz') create('/tmp/hpi_test/dir1/yyy') # create('/tmp/hpi_test/dir1/whatever/') # TODO not sure about this... should really allow extra dirs create('/tmp/hpi_test/dir2/') create('/tmp/hpi_test/dir2/mmm') create('/tmp/hpi_test/dir2/nnn') create('/tmp/hpi_test/dir3/') create('/tmp/hpi_test/dir3/ttt') assert get_files([ Path('/tmp/hpi_test/dir3'), # it takes in Path as well as str '/tmp/hpi_test/dir1', ]) == ( # the paths are always returned in sorted order (unless you pass sort=False) Path('/tmp/hpi_test/dir1/yyy'), Path('/tmp/hpi_test/dir1/zzz'), Path('/tmp/hpi_test/dir3/ttt'), )
def inputs() -> Sequence[Tuple[datetime, Path]]: # type: ignore[misc] rss_backups = get_files(config.export_path) for rssf in rss_backups: dt = datetime.strptime(rssf.stem, "%Y%m%dT%H%M%SZ") yield (dt, rssf)
def inputs() -> List[Path]: return list(get_files(config.takeout_path))
def inputs() -> Sequence[Path]: return get_files(config.takeout_path)
def inputs() -> Sequence[Path]: return get_files(config.export_path, glob="*.gpx")
def inputs() -> Sequence[Path]: return get_files(config.gdpr_dir, glob="*.csv")
def inputs() -> Sequence[Path]: return get_files(config.export_path)
def _cachew_depends_on() -> List[str]: return [str(p) for p in get_files(config.export_path)]