def test_walk(ctx, topdown): contents = b"meow!" with ctx() as path: dirpath = bf.dirname(path) a_path = bf.join(dirpath, "a") with bf.BlobFile(a_path, "wb") as w: w.write(contents) bf.makedirs(bf.join(dirpath, "c/d")) b_path = bf.join(dirpath, "c/d/b") with bf.BlobFile(b_path, "wb") as w: w.write(contents) expected = [ (dirpath, ["c"], ["a"]), (bf.join(dirpath, "c"), ["d"], []), (bf.join(dirpath, "c", "d"), [], ["b"]), ] if not topdown: expected = list(reversed(expected)) assert list(bf.walk(dirpath, topdown=topdown)) == expected dirpath = _convert_https_to_az(dirpath) assert list(bf.walk(dirpath, topdown=topdown)) == expected
def parallel_copy_recursive(src_dir, dst_dir, max_workers=16, overwrite=False): """Similar to `gsutil -m cp -r $local_dir/'*' $remote_dir/`""" futures = [] # NOTE: if we use ProcessPoolExecutor, this can't be used within pytorch workers with concurrent.futures.ThreadPoolExecutor( max_workers=max_workers) as executor: for root, _, filenames in bf.walk(src_dir): assert root.startswith(src_dir) for filename in filenames: src_file = bf.join(root, filename) dst_file = bf.join(dst_dir, root[len(src_dir) + 1:], filename) print("copying", src_file, dst_file) future = executor.submit(bf.copy, src_file, dst_file, overwrite=overwrite) futures.append(future) for future in futures: future.result()
def test_invalid_paths(base_path): for suffix in ["", "/", "//", "/invalid.file", "/invalid/dir/"]: path = base_path + suffix print(path) if path.endswith("/"): expected_error = IsADirectoryError else: expected_error = FileNotFoundError list(bf.glob(path)) if suffix == "": for pattern in ["*", "**"]: try: list(bf.glob(path + pattern)) except bf.Error as e: assert "Wildcards cannot be used" in e.message else: for pattern in ["*", "**"]: list(bf.glob(path + pattern)) with pytest.raises(FileNotFoundError): list(bf.listdir(path)) assert not bf.exists(path) assert not bf.isdir(path) with pytest.raises(expected_error): bf.remove(path) if suffix in ("", "/"): try: bf.rmdir(path) except bf.Error as e: assert "Cannot delete bucket" in e.message else: bf.rmdir(path) with pytest.raises(NotADirectoryError): bf.rmtree(path) with pytest.raises(FileNotFoundError): bf.stat(path) if base_path == AZURE_INVALID_CONTAINER_NO_ACCOUNT: with pytest.raises(bf.Error): bf.get_url(path) else: bf.get_url(path) with pytest.raises(FileNotFoundError): bf.md5(path) with pytest.raises(bf.Error): bf.makedirs(path) list(bf.walk(path)) with tempfile.TemporaryDirectory() as tmpdir: local_path = os.path.join(tmpdir, "test.txt") with pytest.raises(expected_error): bf.copy(path, local_path) with open(local_path, "w") as f: f.write("meow") with pytest.raises(expected_error): bf.copy(local_path, path) for streaming in [False, True]: with pytest.raises(expected_error): with bf.BlobFile(path, "rb", streaming=streaming) as f: f.read() with pytest.raises(expected_error): with bf.BlobFile(path, "wb", streaming=streaming) as f: f.write(b"meow")
def assert_listing_equal(path, desired): actual = list(bf.walk(path)) # ordering of os walk is weird, only compare sorted order assert sorted(actual) == sorted(desired), f"{actual} != {desired}"