示例#1
0
    async def test_mutableasyncchain(self):
        m = MutableAsyncChain(self.g1(), as_async_generator(range(3, 7)))
        m.extend(self.g2())
        m.extend(self.g3())

        self.assertEqual(await m.__anext__(), 0)
        results = await collect_asyncgen(m)
        self.assertEqual(results, list(range(1, 10)))
示例#2
0
 def get_async_iterable(length):
     # simulate a simple callback without delays between results
     return as_async_generator(range(length))
示例#3
0
 async def test_as_async_generator(self):
     ag = as_async_generator(range(42))
     results = []
     async for i in ag:
         results.append(i)
     self.assertEqual(results, list(range(42)))
示例#4
0
 async def test_collect_asyncgen(self):
     ag = as_async_generator(range(42))
     results = await collect_asyncgen(ag)
     self.assertEqual(results, list(range(42)))
示例#5
0
async def _async_chain(
        *iterables: Union[Iterable, AsyncIterable]) -> AsyncGenerator:
    for it in iterables:
        async for o in as_async_generator(it):
            yield o
示例#6
0
    def _process_spider_output(self,
                               response: Response,
                               spider: Spider,
                               result: Union[Iterable, AsyncIterable],
                               start_index: int = 0) -> Deferred:
        # items in this iterable do not need to go through the process_spider_output
        # chain, they went through it already from the process_spider_exception method
        recovered: Union[MutableChain, MutableAsyncChain]
        last_result_is_async = isinstance(result, AsyncIterable)
        if last_result_is_async:
            recovered = MutableAsyncChain()
        else:
            recovered = MutableChain()

        # There are three cases for the middleware: def foo, async def foo, def foo + async def foo_async.
        # 1. def foo. Sync iterables are passed as is, async ones are downgraded.
        # 2. async def foo. Sync iterables are upgraded, async ones are passed as is.
        # 3. def foo + async def foo_async. Iterables are passed to the respective method.
        # Storing methods and method tuples in the same list is weird but we should be able to roll this back
        # when we drop this compatibility feature.

        method_list = islice(self.methods['process_spider_output'],
                             start_index, None)
        for method_index, method_pair in enumerate(method_list,
                                                   start=start_index):
            if method_pair is None:
                continue
            need_upgrade = need_downgrade = False
            if isinstance(method_pair, tuple):
                # This tuple handling is only needed until _async compatibility methods are removed.
                method_sync, method_async = method_pair
                method = method_async if last_result_is_async else method_sync
            else:
                method = method_pair
                if not last_result_is_async and isasyncgenfunction(method):
                    need_upgrade = True
                elif last_result_is_async and not isasyncgenfunction(method):
                    need_downgrade = True
            try:
                if need_upgrade:
                    # Iterable -> AsyncIterable
                    result = as_async_generator(result)
                elif need_downgrade:
                    if not self.downgrade_warning_done:
                        logger.warning(
                            f"Async iterable passed to {method.__qualname__} "
                            f"was downgraded to a non-async one")
                        self.downgrade_warning_done = True
                    assert isinstance(result, AsyncIterable)
                    # AsyncIterable -> Iterable
                    result = yield deferred_from_coro(collect_asyncgen(result))
                    if isinstance(recovered, AsyncIterable):
                        recovered_collected = yield deferred_from_coro(
                            collect_asyncgen(recovered))
                        recovered = MutableChain(recovered_collected)
                # might fail directly if the output value is not a generator
                result = method(response=response,
                                result=result,
                                spider=spider)
            except Exception as ex:
                exception_result = self._process_spider_exception(
                    response, spider, Failure(ex), method_index + 1)
                if isinstance(exception_result, Failure):
                    raise
                return exception_result
            if _isiterable(result):
                result = self._evaluate_iterable(response, spider, result,
                                                 method_index + 1, recovered)
            else:
                if iscoroutine(result):
                    result.close()  # Silence warning about not awaiting
                    msg = (f"{method.__qualname__} must be an asynchronous "
                           f"generator (i.e. use yield)")
                else:
                    msg = (
                        f"{method.__qualname__} must return an iterable, got "
                        f"{type(result)}")
                raise _InvalidOutput(msg)
            last_result_is_async = isinstance(result, AsyncIterable)

        if last_result_is_async:
            return MutableAsyncChain(result, recovered)
        else:
            return MutableChain(result, recovered)  # type: ignore[arg-type]