示例#1
0
async def test_rmtree(any_dir):
    assert [p async for p in bbb.listdir(any_dir)] == []

    N = 5
    await asyncio.wait([
        helpers.unsafe_create_file(any_dir / "alpha" / str(i))
        for i in range(N)
    ] + [
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / str(i))
        for i in range(N)
    ])
    with pytest.raises(NotADirectoryError):
        async with bbb.BoostExecutor(N) as e:
            await bbb.rmtree(any_dir / "alpha" / "0", e)

    async with bbb.BoostExecutor(N) as e:
        assert len([p async for p in bbb.listdir(any_dir / "alpha")]) == N + 1
        assert len([p async for p in bbb.listtree(any_dir / "alpha")]) == 2 * N
        await bbb.rmtree(any_dir / "alpha", e)

    with pytest.raises(FileNotFoundError):
        async with bbb.BoostExecutor(N) as e:
            await bbb.rmtree(any_dir / "alpha", e)

    with pytest.raises(FileNotFoundError):
        [p async for p in bbb.listdir(any_dir / "alpha")]
    assert [p async for p in bbb.listdir(any_dir)] == []
    assert [p async for p in bbb.listtree(any_dir)] == []
示例#2
0
async def test_map_ordered_identity():
    N = 20
    results = []
    async with bbb.BoostExecutor(N // 2) as e:
        it = e.map_ordered(identity, iter(range(N)))
        await collect(it, results)
    assert results == list(range(N))

    results = []
    async with bbb.BoostExecutor(N // 2) as e:
        it = e.map_ordered(identity, iter(range(N)))
        asyncio.create_task(collect(it, results))
    assert results == list(range(N))
示例#3
0
async def test_google_chunking():
    with helpers.tmp_google_dir() as google_dir:
        async with bbb.BoostExecutor(10) as e:
            contents = [b"abc", b"def", b"ghi"]
            with pytest.raises(ValueError, match="chunked incorrectly"):
                await bbb.write.write_stream(google_dir / "alpha",
                                             iter(contents), e)
示例#4
0
async def test_map_eager_async_iterator():
    N = 30

    async def iterator() -> AsyncIterator[int]:
        for i in range(N):
            yield i

    loop = asyncio.get_event_loop()
    future = loop.create_future()
    started = []

    async def identity_wait(x: int) -> int:
        started.append(x)
        if not future.done():
            await future
        return x

    results = []
    async with bbb.BoostExecutor(N // 3) as e:
        it = e.map_ordered(identity_wait,
                           bbb.boost.EagerAsyncIterator(iterator()))
        asyncio.create_task(collect(it, results))
        assert started == []
        await pause()
        assert started == [0]
        # BoostExecutor currently sleeps for a minimum of 0.01 seconds if the underlying async
        # iterator is not ready
        await asyncio.sleep(0.02)
        assert started == list(range(N // 3))
        future.set_result(None)
        await asyncio.sleep(0.02)
        assert started == list(range(N))
    assert results == list(range(N))
示例#5
0
async def test_map_ordered_single():
    futures = {}
    async with bbb.BoostExecutor(1) as e:
        assert e.semaphore._value == 0  # type: ignore
        it = e.map_ordered(get_futures_fn(futures), iter([0, 1])).__aiter__()
        assert not futures
        await pause()
        assert e.semaphore._value == 0  # type: ignore
        assert set(futures) == set()

        next_task = asyncio.create_task(it.__anext__())
        await pause()
        assert set(futures) == {0}

        assert not next_task.done()
        futures[0].set_result(None)
        assert not next_task.done()
        await pause()
        # one might expect a task to be scheduled here, since we have one unused concurrency and
        # boostedblob is generally eager. however, in the single concurrency case, the executor
        # doesn't run to avoid deadlock. TODO(shantanu): consider changing this
        assert set(futures) == set()
        assert next_task.done()
        assert (await next_task) == 0

        next_task = asyncio.create_task(it.__anext__())
        await pause()
        assert not next_task.done()
        futures[1].set_result(None)
        assert (await next_task) == 1
        assert not futures
示例#6
0
async def cp(
    srcs: List[str], dst: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY
) -> None:
    dst_obj = bbb.BasePath.from_str(dst)
    dst_is_dirlike = dst_obj.is_directory_like() or await bbb.isdir(dst_obj)

    async with bbb.BoostExecutor(concurrency) as executor:
        if len(srcs) > 1 and not dst_is_dirlike:
            raise NotADirectoryError(dst_obj)

        async def copy_wrapper(src: str) -> None:
            src_obj = bbb.BasePath.from_str(src)
            if is_glob(src):
                if not dst_is_dirlike:
                    raise NotADirectoryError(dst_obj)
                async for path in bbb.copying.copyglob_iterator(src_obj, dst_obj, executor):
                    if not quiet:
                        print(path)
                return

            dst_file_obj = dst_obj / src_obj.name if dst_is_dirlike else dst_obj
            await bbb.copyfile(src_obj, dst_file_obj, executor, overwrite=True)
            if not quiet:
                print(src_obj)

        await bbb.boost.consume(executor.map_unordered(copy_wrapper, iter(srcs)))
示例#7
0
async def test_map_ordered():
    futures = {}
    results = []
    async with bbb.BoostExecutor(2) as e:
        assert e.semaphore._value == 1  # type: ignore
        it = e.map_ordered(get_futures_fn(futures), iter(range(4)))
        asyncio.create_task(collect(it, results))
        await pause()
        assert e.semaphore._value == 0  # type: ignore
        assert set(futures) == {0, 1}

        futures[1].set_result(None)
        await pause()
        assert results == []

        futures[0].set_result(None)
        await pause()
        assert results == [0, 1]
        assert set(futures) == {2, 3}

        futures[2].set_result(None)
        await pause()
        assert results == [0, 1, 2]

        futures[3].set_result(None)
        await pause()
        assert results == [0, 1, 2, 3]
示例#8
0
async def test_copytree(any_dir, other_any_dir):
    await asyncio.wait([
        helpers.unsafe_create_file(any_dir / "f1"),
        helpers.unsafe_create_file(any_dir / "f2"),
        helpers.unsafe_create_file(any_dir / "f3"),
        helpers.unsafe_create_file(any_dir / "alpha" / "f4"),
        helpers.unsafe_create_file(any_dir / "alpha" / "f5"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f6"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f7"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "gamma" /
                                   "f8"),
        helpers.unsafe_create_file(any_dir / "delta" / "f9"),
        helpers.unsafe_create_file(any_dir / "delta" / "epsilon" / "f10"),
    ])

    async with bbb.BoostExecutor(100) as e:
        if sys.version_info < (3, 8) and isinstance(other_any_dir, LocalPath):
            os.rmdir(other_any_dir)
        await bbb.copytree(any_dir, other_any_dir, e)

    async def _listtree(d, base):
        return sorted([p.relative_to(base) async for p in bbb.listtree(d)])

    assert await _listtree(any_dir,
                           any_dir) == await _listtree(other_any_dir,
                                                       other_any_dir)
示例#9
0
async def test_copy(any_dir, other_any_dir):
    MIN_CHUNK_SIZE = 256 * 1024

    with open("/dev/random", "rb") as f:
        contents_medium = f.read(16 * MIN_CHUNK_SIZE)
    helpers.create_file(any_dir / "original_medium", contents_medium)

    contents_known_small = b"abcdefgh"
    helpers.create_file(any_dir / "original_small", contents_known_small)

    async with bbb.BoostExecutor(100) as e:
        with bbb.globals.configure(chunk_size=MIN_CHUNK_SIZE):
            await bbb.copyfile(any_dir / "original_medium",
                               other_any_dir / "copied_medium", e)
            with blobfile.BlobFile(str(other_any_dir / "copied_medium"),
                                   "rb") as f:
                assert f.read() == contents_medium

        await bbb.copyfile(
            any_dir / "original_small",
            other_any_dir / "copied_small",
            e,
            size=len(contents_known_small),
        )
        with blobfile.BlobFile(str(other_any_dir / "copied_small"), "rb") as f:
            assert f.read() == contents_known_small
示例#10
0
async def test_read_write(any_dir):
    async with bbb.BoostExecutor(10) as e:
        # test reading and writing an empty stream
        await bbb.write.write_stream(any_dir / "empty", iter([]), e)
        stream = await bbb.read.read_stream(any_dir / "empty", e)
        async for _ in bbb.boost.iter_underlying(stream):
            raise AssertionError
示例#11
0
async def cptree(
    src: str, dst: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY
) -> None:
    src_obj = bbb.BasePath.from_str(src)
    async with bbb.BoostExecutor(concurrency) as executor:
        async for p in bbb.copying.copytree_iterator(src_obj, dst, executor):
            if not quiet:
                print(p)
示例#12
0
async def test_boost_executor_shutdown():
    async with bbb.BoostExecutor(1) as e:
        e.map_ordered(asyncio.sleep, iter([0]))

    async with bbb.BoostExecutor(4) as e:
        e.map_ordered(asyncio.sleep,
                      (random.random() * 0.1 for _ in range(10)))
    assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == {
        "test_boost_executor_shutdown"
    }

    async with bbb.BoostExecutor(4) as e:
        e.map_unordered(asyncio.sleep,
                        (random.random() * 0.1 for _ in range(10)))
    assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == {
        "test_boost_executor_shutdown"
    }
示例#13
0
async def test_map_unordered_random_sleep():
    async def random_sleep(i):
        await asyncio.sleep(random.random() * 0.3)
        return i

    N = 20
    results = []
    async with bbb.BoostExecutor(N // 2) as e:
        it = e.map_unordered(random_sleep, iter(range(N)))
        await collect(it, results)
    results.sort()
    assert results == list(range(N))
示例#14
0
async def test_composition_nested_unordered():
    N = 10
    results = []
    async with bbb.BoostExecutor(3) as e:

        async def work_spawner(n):
            await pause()
            return [x async for x in e.map_unordered(identity, iter(range(n)))]

        it = e.map_unordered(work_spawner, iter(range(N)))
        asyncio.create_task(collect(it, results))
    assert sorted(map(len, results)) == list(range(10))
示例#15
0
async def test_boost_executor_exception():
    with pytest.raises(ValueError):
        async with bbb.BoostExecutor(10):
            assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == {
                "test_boost_executor_exception",
                "run",
            }
            assert len(asyncio.all_tasks()) > 1
            raise ValueError

    await pause()
    assert set(get_coro(t).__name__ for t in asyncio.all_tasks()) == {
        "test_boost_executor_exception"
    }
示例#16
0
async def test_map_unordered_many_reversed():
    N = 500
    futures = {}
    results = []
    async with bbb.BoostExecutor(N * 2) as e:
        it = e.map_unordered(get_futures_fn(futures), iter(range(N)))
        asyncio.create_task(collect(it, results))
        while not N - 1 in futures:
            await pause(
            )  # take a couple dozen pauses to get everything scheduled
        for i in reversed(range(N)):
            futures[i].set_result(None)
            await pause()
        assert results == list(reversed(range(N)))
示例#17
0
async def edit(path: str) -> None:
    with tempfile.TemporaryDirectory() as tmpdir:
        path_obj = bbb.BasePath.from_str(path)
        local = bbb.LocalPath(tmpdir) / path_obj.name
        async with bbb.BoostExecutor(DEFAULT_CONCURRENCY) as executor:
            await bbb.copyfile(path_obj, local, executor)
            pre_stat = await bbb.stat(local)
            subprocess.check_call([os.environ.get("EDITOR", "vi"), local])
            post_stat = await bbb.stat(local)
            if pre_stat != post_stat:
                await bbb.copyfile(local, path_obj, executor, overwrite=True)
                print(f"Updated {path_obj}")
            else:
                print("File unmodified, skipping reupload...")
示例#18
0
async def test_map_unordered_many_low_concurrency():
    N = 500
    futures = {}
    results = []
    async with bbb.BoostExecutor(10) as e:
        it = e.map_unordered(get_futures_fn(futures), iter(range(N)))
        asyncio.create_task(collect(it, results))
        await pause()
        for i in range(1, N):
            futures[i].set_result(None)
            await pause()
            assert len(results) == i
        futures[0].set_result(None)
        await pause()
        assert results == list(range(1, N)) + [0]
示例#19
0
async def rm(paths: List[str], quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY) -> None:
    async with bbb.BoostExecutor(concurrency) as executor:

        async def remove_wrapper(path: str) -> None:
            path_obj = bbb.BasePath.from_str(path)
            if is_glob(path):
                async for p in bbb.delete.glob_remove(path_obj, executor):
                    if not quiet:
                        print(p)
                return
            await bbb.remove(path_obj)
            if not quiet:
                print(path_obj)

        await bbb.boost.consume(executor.map_unordered(remove_wrapper, iter(paths)))
示例#20
0
async def rmtree(path: str, quiet: bool = False, concurrency: int = DEFAULT_CONCURRENCY) -> None:
    path_obj = bbb.BasePath.from_str(path)
    async with bbb.BoostExecutor(concurrency) as executor:
        if is_glob(path):
            # this will fail if the glob matches a directory, which is a little contra the spirit of
            # rmtree. but maybe the best way to do that (and least likely to result in accidents) is
            # through recursive wildcards
            async for p in bbb.delete.glob_remove(path_obj, executor):
                if not quiet:
                    print(p)
        elif isinstance(path_obj, bbb.CloudPath):
            async for p in bbb.delete.rmtree_iterator(path_obj, executor):
                if not quiet:
                    print(p)
        else:
            await bbb.rmtree(path_obj, executor)
示例#21
0
async def sync(
    src: str,
    dst: str,
    delete: bool = False,
    quiet: bool = False,
    concurrency: int = DEFAULT_CONCURRENCY,
) -> None:
    src_obj = bbb.BasePath.from_str(src)
    dst_obj = bbb.BasePath.from_str(dst)

    src_is_dirlike = src_obj.is_directory_like() or await bbb.isdir(src_obj)
    if not src_is_dirlike:
        raise ValueError(f"{src_obj} is not a directory")
    async with bbb.BoostExecutor(concurrency) as executor:
        async for p in bbb.sync(src_obj, dst_obj, executor, delete=delete):
            if not quiet:
                print(p)
示例#22
0
async def test_composition_ordered_ordered():
    N = 500
    inner_futures = {}
    outer_futures = {}
    results = []
    async with bbb.BoostExecutor(N // 5) as e:
        inner_it = e.map_ordered(get_futures_fn(inner_futures), iter(range(N)))
        outer_it = e.map_ordered(get_futures_fn(outer_futures), inner_it)
        asyncio.create_task(collect(outer_it, results))
        await pause()

        while outer_futures or inner_futures:
            futures = random.choice(
                [fs for fs in (outer_futures, inner_futures) if fs])
            futures[next(iter(futures))].set_result(None)
            await pause()
        assert results == list(range(N))
示例#23
0
async def test_sync(any_dir, other_any_dir):
    await asyncio.wait([
        helpers.unsafe_create_file(any_dir / "f1", b"samesize"),
        helpers.unsafe_create_file(any_dir / "f2"),
        helpers.unsafe_create_file(any_dir / "f3"),
        helpers.unsafe_create_file(any_dir / "alpha" / "f4"),
        helpers.unsafe_create_file(any_dir / "alpha" / "f5"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f6"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "f7"),
        helpers.unsafe_create_file(any_dir / "alpha" / "beta" / "gamma" /
                                   "f8"),
        helpers.unsafe_create_file(any_dir / "delta" / "f9", b"samesize"),
        helpers.unsafe_create_file(any_dir / "delta" / "epsilon" / "f10"),
    ])

    async def _listtree(d, base):
        return sorted([p.relative_to(base) async for p in bbb.listtree(d)])

    async with bbb.BoostExecutor(100) as e:
        # sleep since if we run sync too soon, we run into limits of mtime accuracy and end up
        # syncing more than what we need...
        await asyncio.sleep(1)
        await bbb.boost.consume(bbb.sync(any_dir, other_any_dir, e))
        assert await _listtree(any_dir, any_dir) == await _listtree(
            other_any_dir, other_any_dir)

        await asyncio.wait([
            cast(Any, bbb.remove(any_dir / "f2")),
            helpers.unsafe_create_file(any_dir / "f1", b"sizesame"),
            helpers.unsafe_create_file(any_dir / "delta" / "f9",
                                       b"differentsize"),
        ])

        actions = sorted(await bbb.syncing.sync_action_iterator(
            any_dir, other_any_dir),
                         key=lambda x: x.relpath)
        assert actions == [
            bbb.syncing.CopyAction("delta/f9", 13),
            bbb.syncing.CopyAction("f1", 8),
            bbb.syncing.DeleteAction("f2"),
        ]
        await bbb.boost.consume(
            bbb.sync(any_dir, other_any_dir, e, delete=True))
        assert await _listtree(any_dir, any_dir) == await _listtree(
            other_any_dir, other_any_dir)
示例#24
0
async def test_map_multiple():
    N = 20
    r1 = []
    r2 = []
    r3 = []
    async with bbb.BoostExecutor(N // 2) as e:
        it1 = e.map_unordered(identity, iter(range(N)))
        t1 = asyncio.create_task(collect(it1, r1))

        it2 = e.map_ordered(identity, iter(range(N)))
        t2 = asyncio.create_task(collect(it2, r2))

        it3 = e.map_ordered(identity, iter(range(N)))
        t3 = asyncio.create_task(collect(it3, r3))

        await asyncio.gather(t1, t2, t3)
        r1.sort()
        assert r1 == r2 == r3
示例#25
0
async def test_map_unordered_many_random():
    N = 500
    futures = {}
    results = []
    async with bbb.BoostExecutor(N * 2) as e:
        it = e.map_unordered(get_futures_fn(futures), iter(range(N)))
        task = asyncio.create_task(collect(it, results))
        while not N - 1 in futures:
            await pause(
            )  # take a couple dozen pauses to get everything scheduled
        shuffled = list(reversed(range(N)))
        random.shuffle(shuffled)
        for i in shuffled:
            futures[i].set_result(None)
            if random.random() < 0.3:
                await pause()
        await task
        assert sorted(results) == list(range(N))
示例#26
0
async def test_copyglob():
    with helpers.tmp_azure_dir() as dir1:
        with helpers.tmp_azure_dir() as dir2:
            await asyncio.wait([
                helpers.unsafe_create_file(dir1 / "f1"),
                helpers.unsafe_create_file(dir1 / "f2"),
                helpers.unsafe_create_file(dir1 / "g3"),
            ])

            async with bbb.BoostExecutor(100) as e:
                copied = [
                    p async for p in bbb.copying.copyglob_iterator(
                        dir1 / "f*", dir2, e)
                ]
                assert sorted([p.relative_to(dir1)
                               for p in copied]) == ["f1", "f2"]
                contents = sorted(
                    [p.relative_to(dir2) async for p in bbb.listtree(dir2)])
                assert contents == ["f1", "f2"]
示例#27
0
async def test_map_ordered_many_low_concurrency():
    N = 500
    futures = {}
    results = []
    loop = asyncio.get_event_loop()
    async with bbb.BoostExecutor(N // 50) as e:
        it = e.map_ordered(get_futures_fn(futures), iter(range(N)))
        asyncio.create_task(collect(it, results))
        await pause()
        for i in range(1, N):
            # create the future if it doesn't exist, due to backpressure
            if i not in futures:
                assert i > N // 25
                futures[i] = loop.create_future()
            futures[i].set_result(None)
            await pause()
        assert results == []
        futures[0].set_result(None)
        await asyncio.sleep(0.1)  # wait for backpressure to subside
        assert results == list(range(N))
示例#28
0
async def test_map_eager_async_iterator_slow():
    N = 30

    loop = asyncio.get_event_loop()
    futures = [loop.create_future() for _ in range(N)]

    async def iterator() -> AsyncIterator[int]:
        for i in range(N):
            await futures[i]
            yield i

    results = []
    async with bbb.BoostExecutor(N) as e:
        it = e.map_ordered(identity, bbb.boost.EagerAsyncIterator(iterator()))
        asyncio.create_task(collect(it, results))
        await pause()

        for i in range(N):
            futures[i].set_result(None)
            await pause()
    assert results == list(range(N))
示例#29
0
async def test_composition_ordered_unordered():
    N = 500
    inner_futures = {}
    outer_futures = {}
    results = []
    async with bbb.BoostExecutor(N * 2) as e:
        inner_it = e.map_unordered(get_futures_fn(inner_futures),
                                   iter(range(N)))
        outer_it = e.map_ordered(get_futures_fn(outer_futures), inner_it)
        asyncio.create_task(collect(outer_it, results))
        while not N - 1 in inner_futures:
            await pause(
            )  # take a couple dozen pauses to get everything scheduled
        for i in reversed(range(N)):
            if outer_futures:
                assert set(outer_futures) == {i + 1}
            inner_futures[i].set_result(None)
            await pause()
            assert set(outer_futures) == {i}
            outer_futures[i].set_result(None)
        await pause()
        assert results == list(reversed(range(N)))
示例#30
0
async def cat(path: str, concurrency: int = DEFAULT_CONCURRENCY) -> None:
    loop = asyncio.get_event_loop()
    async with bbb.BoostExecutor(concurrency) as executor:
        stream = await bbb.read.read_stream(path, executor)
        async for data in bbb.boost.iter_underlying(stream):
            await loop.run_in_executor(None, sys.stdout.buffer.write, data)