Пример #1
0
async def test_wait_if_too_many_ready_tasks():
    ti = OrderedTaskPreparation(OnePrereq,
                                identity,
                                lambda x: x - 1,
                                max_tasks=1)
    ti.set_finished_dependency(3)
    ti.register_tasks((4, ))

    # This should raise a timeout error because it gets locked waiting for the task to be finished
    with pytest.raises(asyncio.TimeoutError):
        await wait(ti.wait_add_tasks((5, )))

    ti.finish_prereq(OnePrereq.ONE, (4, ))

    # This should raise a timeout error because it gets locked waiting for the task to be picked up
    with pytest.raises(asyncio.TimeoutError):
        await wait(ti.wait_add_tasks((5, )))

    completed = await wait(ti.ready_tasks())
    assert completed == (4, )

    # Now we can add the other task
    await wait(ti.wait_add_tasks((5, )))

    # ... and finish & pick up the completed one
    ti.finish_prereq(OnePrereq.ONE, (5, ))
    completed = await wait(ti.ready_tasks())
    assert completed == (5, )
async def test_random_pruning(ignore_duplicates, recomplete_idx, batch_size,
                              task_series, prune_depth):

    ti = OrderedTaskPreparation(
        NoPrerequisites,
        identity,
        lambda x: x - 1,
        accept_dangling_tasks=True,
        max_depth=prune_depth,
    )
    ti.set_finished_dependency(task_series[0])

    for idx, task_batch in enumerate(partition_all(batch_size, task_series)):
        if ignore_duplicates:
            registerable_tasks = task_batch
        else:
            registerable_tasks = set(task_batch)

        if idx == recomplete_idx:
            task_to_mark_finished = task_batch[0] - 1
            if task_to_mark_finished not in ti._tasks:
                ti.set_finished_dependency(task_to_mark_finished)

        try:
            ti.register_tasks(registerable_tasks,
                              ignore_duplicates=ignore_duplicates)
        except DuplicateTasks:
            if ignore_duplicates:
                raise
            else:
                continue
        if ti.has_ready_tasks():
            await wait(ti.ready_tasks())
Пример #3
0
async def test_simplest_path():
    ti = OrderedTaskPreparation(TwoPrereqs, identity, lambda x: x - 1)
    ti.set_finished_dependency(3)

    assert ti.num_unready() == 0
    assert ti.num_ready() == 0
    assert ti.num_tasks() == 0

    ti.register_tasks((4, ))

    assert ti.num_unready() == 1
    assert ti.num_ready() == 0
    assert ti.num_tasks() == 1

    ti.finish_prereq(TwoPrereqs.PREREQ1, (4, ))

    assert ti.num_unready() == 1
    assert ti.num_ready() == 0
    assert ti.num_tasks() == 1

    ti.finish_prereq(TwoPrereqs.PREREQ2, (4, ))

    assert ti.num_unready() == 0
    assert ti.num_ready() == 1
    assert ti.num_tasks() == 1

    ready = await wait(ti.ready_tasks())

    assert ti.num_unready() == 0
    assert ti.num_ready() == 0
    assert ti.num_tasks() == 0

    assert ready == (4, )
Пример #4
0
async def test_no_prereq_tasks():
    ti = OrderedTaskPreparation(NoPrerequisites, identity, lambda x: x - 1)
    ti.set_finished_dependency(1)
    ti.register_tasks((2, 3))

    # with no prerequisites, tasks are *immediately* finished, as long as they are in order
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3)
Пример #5
0
async def test_simplest_path():
    ti = OrderedTaskPreparation(TwoPrereqs, identity, lambda x: x - 1)
    ti.set_finished_dependency(3)
    ti.register_tasks((4, ))
    ti.finish_prereq(TwoPrereqs.Prereq1, (4, ))
    ti.finish_prereq(TwoPrereqs.Prereq2, (4, ))
    ready = await wait(ti.ready_tasks())
    assert ready == (4, )
Пример #6
0
async def test_wait_forever():
    ti = OrderedTaskPreparation(OnePrereq, identity, lambda x: x - 1)
    try:
        finished = await wait(ti.ready_tasks())
    except asyncio.TimeoutError:
        pass
    else:
        assert False, f"No steps should complete, but got {finished!r}"
Пример #7
0
async def test_register_out_of_order():
    ti = OrderedTaskPreparation(OnePrereq, identity, lambda x: x - 1, accept_dangling_tasks=True)
    ti.set_finished_dependency(1)
    ti.register_tasks((4, 5))
    ti.finish_prereq(OnePrereq.one, (4, 5))

    try:
        finished = await wait(ti.ready_tasks())
    except asyncio.TimeoutError:
        pass
    else:
        assert False, f"No steps should be ready, but got {finished!r}"

    ti.register_tasks((2, 3))
    ti.finish_prereq(OnePrereq.one, (2, 3))
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3, 4, 5)
Пример #8
0
async def test_two_steps_simultaneous_complete():
    ti = OrderedTaskPreparation(OnePrereq, identity, lambda x: x - 1)
    ti.set_finished_dependency(3)
    ti.register_tasks((4, 5))
    ti.finish_prereq(OnePrereq.one, (4, ))
    ti.finish_prereq(OnePrereq.one, (5, ))

    completed = await wait(ti.ready_tasks())
    assert completed == (4, 5)
Пример #9
0
async def test_pruning_consecutive_finished_deps():
    ti = OrderedTaskPreparation(NoPrerequisites, identity, lambda x: x - 1, max_depth=2)
    ti.set_finished_dependency(3)
    ti.set_finished_dependency(4)
    ti.register_tasks((5, 6))

    assert 3 in ti._tasks
    assert 4 in ti._tasks

    # trigger pruning by requesting the ready tasks through 6, then "finishing" them
    # by requesting the next batch of ready tasks (7)
    completed = await wait(ti.ready_tasks())
    assert completed == (5, 6)
    ti.register_tasks((7, ))
    completed = await wait(ti.ready_tasks())
    assert completed == (7, )

    assert 3 not in ti._tasks
    assert 4 in ti._tasks
Пример #10
0
async def test_ignore_duplicates():
    ti = OrderedTaskPreparation(NoPrerequisites, identity, lambda x: x - 1)
    ti.set_finished_dependency(1)
    ti.register_tasks((2, ))
    # this will ignore the 2 task:
    ti.register_tasks((2, 3), ignore_duplicates=True)
    # this will be completely ignored:
    ti.register_tasks((2, 3), ignore_duplicates=True)

    # with no prerequisites, tasks are *immediately* finished, as long as they are in order
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3)
Пример #11
0
async def test_register_out_of_order():
    ti = OrderedTaskPreparation(OnePrereq, identity, lambda x: x - 1, accept_dangling_tasks=True)
    ti.set_finished_dependency(1)
    ti.register_tasks((4, 5))
    ti.finish_prereq(OnePrereq.one, (4, 5))

    await assert_nothing_ready(ti)

    ti.register_tasks((2, 3))
    ti.finish_prereq(OnePrereq.one, (2, 3))
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3, 4, 5)
Пример #12
0
async def test_wait_to_prune_until_yielded():
    """
    We need to be able to mark dependencies as finished, after task completion
    """
    ti = OrderedTaskPreparation(NoPrerequisites, identity, lambda x: x - 1, max_depth=2)
    ti.set_finished_dependency(-1)
    ti.register_tasks(range(10))
    # the old tasks aren't pruned yet, so duplicates with known parents are fine
    ti.register_tasks((3, ), ignore_duplicates=True)
    ready = await wait(ti.ready_tasks())
    assert ready == tuple(range(10))

    # old tasks STILL aren't pruned, until we indicate that we are finished processing
    # them by calling ready_tasks on the *next* batch
    ti.register_tasks((10, ))
    ready = await wait(ti.ready_tasks())
    assert ready == (10, )

    # now old tasks are pruned
    with pytest.raises(MissingDependency):
        ti.register_tasks((3, ), ignore_duplicates=True)
Пример #13
0
async def test_pruning():
    # make a number task depend on the mod10, so 4 and 14 both depend on task 3
    ti = OrderedTaskPreparation(OnePrereq,
                                identity,
                                lambda x: (x % 10) - 1,
                                max_depth=2)
    ti.set_finished_dependency(3)
    ti.register_tasks((4, 5, 6, 7, 8))
    ti.finish_prereq(OnePrereq.one, (4, 5, 6))

    # trigger pruning by requesting the ready tasks through 6, then "finishing" them
    # by requesting the next batch of ready tasks (7)
    completed = await wait(ti.ready_tasks())
    assert completed == (4, 5, 6)
    ti.finish_prereq(OnePrereq.one, (7, ))
    completed = await wait(ti.ready_tasks())
    assert completed == (7, )

    # it's fine to prepare a task that depends up to two back in history
    # this depends on 5
    ti.register_tasks((16, ))
    # this depends on 4
    ti.register_tasks((15, ))

    # but depending 3 back in history should raise a validation error, because it's pruned
    with pytest.raises(MissingDependency):
        # this depends on 3
        ti.register_tasks((14, ))

    # test the same concept, but after pruning tasks that weren't the starting tasks
    # trigger pruning from the head at 7 by completing the one *after* 7
    ti.finish_prereq(OnePrereq.one, (8, ))
    completed = await wait(ti.ready_tasks())
    assert completed == (8, )

    ti.register_tasks((26, ))
    ti.register_tasks((27, ))
    with pytest.raises(MissingDependency):
        ti.register_tasks((25, ))
Пример #14
0
async def test_no_prereq_tasks_out_of_order():
    ti = OrderedTaskPreparation(
        NoPrerequisites,
        identity,
        lambda x: x - 1,
        accept_dangling_tasks=True,
    )
    ti.set_finished_dependency(1)
    ti.register_tasks((4, 5))

    try:
        finished = await wait(ti.ready_tasks())
    except asyncio.TimeoutError:
        pass
    else:
        assert False, f"No steps should be ready, but got {finished!r}"

    ti.register_tasks((2, 3))

    # with no prerequisites, tasks are *immediately* finished, as long as they are in order
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3, 4, 5)
Пример #15
0
async def test_finished_dependency_midstream():
    """
    We need to be able to mark dependencies as finished, after task completion
    """
    ti = OrderedTaskPreparation(TwoPrereqs, identity, lambda x: x - 1)
    ti.set_finished_dependency(3)
    ti.register_tasks((4, ))
    ti.finish_prereq(TwoPrereqs.Prereq1, (4, ))
    ti.finish_prereq(TwoPrereqs.Prereq2, (4, ))
    ready = await wait(ti.ready_tasks())
    assert ready == (4, )

    # now start in a discontinuous series of tasks
    with pytest.raises(MissingDependency):
        ti.register_tasks((6, ))

    ti.set_finished_dependency(5)
    ti.register_tasks((6, ))
    ti.finish_prereq(TwoPrereqs.Prereq1, (6, ))
    ti.finish_prereq(TwoPrereqs.Prereq2, (6, ))
    ready = await wait(ti.ready_tasks())
    assert ready == (6, )
Пример #16
0
async def test_no_prereq_tasks_out_of_order():
    ti = OrderedTaskPreparation(
        NoPrerequisites,
        identity,
        lambda x: x - 1,
        accept_dangling_tasks=True,
    )
    ti.set_finished_dependency(1)
    ti.register_tasks((4, 5))

    await assert_nothing_ready(ti)

    ti.register_tasks((2, 3))

    # with no prerequisites, tasks are *immediately* finished, as long as they are in order
    finished = await wait(ti.ready_tasks())
    assert finished == (2, 3, 4, 5)
Пример #17
0
async def test_finish_different_entry_at_same_step():
    def previous_even_number(num):
        return ((num - 1) // 2) * 2

    ti = OrderedTaskPreparation(OnePrereq, identity, previous_even_number)

    ti.set_finished_dependency(2)

    ti.register_tasks((3, 4))

    # depends on 2
    ti.finish_prereq(OnePrereq.one, (3, ))

    # also depends on 2
    ti.finish_prereq(OnePrereq.one, (4, ))

    completed = await wait(ti.ready_tasks())
    assert completed == (3, 4)
Пример #18
0
async def test_return_original_entry():
    # for no particular reason, the id is 3 before the number
    ti = OrderedTaskPreparation(OnePrereq, lambda x: x - 3, lambda x: x - 4)

    # translates to id -1
    ti.set_finished_dependency(2)

    ti.register_tasks((3, 4))

    # translates to id 0
    ti.finish_prereq(OnePrereq.one, (3, ))

    # translates to id 1
    ti.finish_prereq(OnePrereq.one, (4, ))

    entries = await wait(ti.ready_tasks())

    # make sure that the original task is returned, not the id
    assert entries == (3, 4)
Пример #19
0
class BaseHeaderChainSyncer(BaseService, HeaderSyncerAPI, Generic[TChainPeer]):
    """
    Generate a skeleton header, then use all peers to fill in the headers
    returned by the skeleton syncer.
    """
    _meat: HeaderMeatSyncer[TChainPeer]

    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncHeaderDB,
                 peer_pool: BaseChainPeerPool,
                 token: CancelToken = None) -> None:
        super().__init__(token)
        self._db = db
        self._chain = chain
        self._peer_pool = peer_pool
        self._tip_monitor = self.tip_monitor_class(peer_pool,
                                                   token=self.cancel_token)
        self._last_target_header_hash: Hash32 = None
        self._skeleton: SkeletonSyncer[TChainPeer] = None

        # Track if there is capacity for syncing more headers
        self._buffer_capacity = asyncio.Event()

        self._reset_buffer()

    def _reset_buffer(self) -> None:
        # stitch together headers as they come in
        self._stitcher = OrderedTaskPreparation(
            # we don't have to do any prep work on the headers, just linearize them, so empty enum
            OrderedTaskPreparation.NoPrerequisites,
            id_extractor=attrgetter('hash'),
            # make sure that a header is not returned in new_sync_headers until its parent has been
            dependency_extractor=attrgetter('parent_hash'),
            # headers will come in out of order
            accept_dangling_tasks=True,
        )
        # When downloading the headers into the gaps left by the syncer, they must be linearized
        # by the stitcher
        self._meat = HeaderMeatSyncer(
            self._chain,
            self._peer_pool,
            self._stitcher,
            self.cancel_token,
        )

        # Queue has reset, so always start with capacity
        self._buffer_capacity.set()

    async def new_sync_headers(
            self,
            max_batch_size: int = None
    ) -> AsyncIterator[Tuple[BlockHeader, ...]]:

        while self.is_operational:
            headers = await self.wait(
                self._stitcher.ready_tasks(max_batch_size))
            if self._stitcher.has_ready_tasks():
                # Even after clearing out a big batch, there is no available capacity, so
                # pause any coroutines that might wait for capacity
                self._buffer_capacity.clear()

            while headers:
                split_idx = first_nonconsecutive_header(headers)
                consecutive_batch, headers = headers[:split_idx], headers[
                    split_idx:]
                if headers:
                    # Note lack of capacity if the headers are non-consecutive
                    self._buffer_capacity.clear()
                yield consecutive_batch

            if not self._stitcher.has_ready_tasks():
                # There is available capacity, let any waiting coroutines continue
                self._buffer_capacity.set()

    def get_target_header_hash(self) -> Hash32:
        if not self._is_syncing_skeleton and self._last_target_header_hash is None:
            raise ValidationError(
                "Cannot check the target hash before the first sync has started"
            )
        elif self._is_syncing_skeleton:
            return self._skeleton.peer.head_hash
        else:
            return self._last_target_header_hash

    @property
    @abstractmethod
    def tip_monitor_class(self) -> Type[BaseChainTipMonitor]:
        pass

    async def _run(self) -> None:
        self.run_daemon(self._tip_monitor)
        self.run_daemon(self._meat)
        await self.wait(self._build_skeleton())

    async def _build_skeleton(self) -> None:
        """
        Find best peer to build a skeleton, and build it immediately
        """
        # iterator yields the peer with the highest TD in our pool
        async for peer in self._tip_monitor.wait_tip_info():
            try:
                await self._validate_peer_is_ahead(peer)
            except _PeerBehind:
                self.logger.info(
                    "At or behind peer %s, skipping skeleton sync", peer)
            else:
                async with self._get_skeleton_syncer(peer) as syncer:
                    await self._full_skeleton_sync(syncer)

    @asynccontextmanager
    async def _get_skeleton_syncer(
            self,
            peer: TChainPeer) -> AsyncIterator[SkeletonSyncer[TChainPeer]]:
        if self._is_syncing_skeleton:
            raise ValidationError(
                "Cannot sync skeleton headers from two peers at the same time")

        self._skeleton = SkeletonSyncer(
            self._chain,
            self._db,
            peer,
            self.cancel_token,
        )
        self.run_child_service(self._skeleton)
        await self._skeleton.events.started.wait()
        try:
            yield self._skeleton
        except OperationCancelled:
            pass
        else:
            if self._skeleton.is_operational:
                self._skeleton.cancel_nowait()
        finally:
            self.logger.debug("Skeleton sync with %s ended", peer)
            self._last_target_header_hash = peer.head_hash
            self._skeleton = None

    @property
    def _is_syncing_skeleton(self) -> bool:
        return self._skeleton is not None

    async def _full_skeleton_sync(
            self, skeleton_syncer: SkeletonSyncer[TChainPeer]) -> None:
        skeleton_generator = skeleton_syncer.next_skeleton_segment()
        try:
            first_segment = await skeleton_generator.__anext__()
        except StopAsyncIteration:
            self.logger.debug(
                "Skeleton %s was cancelled before first header was returned",
                skeleton_syncer.peer,
            )
            return

        self.logger.debug(
            "Skeleton syncer asserts that parent (%s) of the first header (%s) is already present",
            humanize_hash(first_segment[0].parent_hash),
            first_segment[0],
        )
        first_parent = await self._db.coro_get_block_header_by_hash(
            first_segment[0].parent_hash)
        try:
            self._stitcher.set_finished_dependency(first_parent)
        except DuplicateTasks:
            # the first header of this segment was already registered: no problem, carry on
            pass

        self._stitcher.register_tasks(first_segment, ignore_duplicates=True)

        previous_segment = first_segment
        async for segment in self.wait_iter(skeleton_generator):
            self._stitcher.register_tasks(segment, ignore_duplicates=True)

            gap_length = segment[0].block_number - previous_segment[
                -1].block_number - 1
            if gap_length > MAX_HEADERS_FETCH:
                raise ValidationError(
                    f"Header skeleton gap of {gap_length} > {MAX_HEADERS_FETCH}"
                )
            elif gap_length == 0:
                # no need to fill in when there is no gap, just verify against previous header
                await self.wait(
                    self._chain.coro_validate_chain(
                        previous_segment[-1],
                        segment,
                        SEAL_CHECK_RANDOM_SAMPLE_RATE,
                    ))
            elif gap_length < 0:
                raise ValidationError(
                    f"Invalid headers: {gap_length} gap from {previous_segment} to {segment}"
                )
            else:
                # if the header filler is overloaded, this will pause
                await self.wait(
                    self._meat.schedule_segment(
                        previous_segment[-1],
                        gap_length,
                        skeleton_syncer.peer,
                    ))
            previous_segment = segment

            # Don't race ahead if the consumer is lagging
            await self._buffer_capacity.wait()

    async def _validate_peer_is_ahead(self, peer: BaseChainPeer) -> None:
        head = await self.wait(self._db.coro_get_canonical_head())
        head_td = await self.wait(self._db.coro_get_score(head.hash))
        if peer.head_td <= head_td:
            self.logger.info(
                "Head TD (%d) announced by %s not higher than ours (%d), not syncing",
                peer.head_td, peer, head_td)
            raise _PeerBehind(
                f"{peer} is behind us, not a valid target for sync")
        else:
            self.logger.debug(
                "%s announced Head TD %d, which is higher than ours (%d), starting sync",
                peer, peer.head_td, head_td)
            pass
Пример #20
0
class RegularChainBodySyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and importing them.

    Here, the run() method will execute the sync loop forever, until our CancelToken is triggered.
    """
    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, token)

        self._header_syncer = header_syncer

        # track when block bodies are downloaded, so that blocks can be imported
        self._block_import_tracker = OrderedTaskPreparation(
            BlockImportPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not imported until the parent block is imported
            dependency_extractor=attrgetter('parent_hash'),
        )

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self._block_import_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._import_ready_blocks())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer list
        super().register_peer(peer)
        self._body_peers.put_nowait(cast(ETHPeer, peer))

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks (downloading block bodies) as they become available.
        """
        async for headers in self.wait_iter(
                self._header_syncer.new_sync_headers()):
            self._block_import_tracker.register_tasks(headers)

            new_headers = tuple(h for h in headers
                                if h not in self._block_body_tasks)

            # if the output queue gets full, hang until there is room
            await self.wait(self._block_body_tasks.add(new_headers))

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_import_tracker.finish_prereq(
            BlockImportPrereqs.StoreBlockBodies,
            completed_headers,
        )

    async def _import_ready_blocks(self) -> None:
        """
        Wait for block bodies to be downloaded, then import the blocks.
        """
        while self.is_operational:
            timer = Timer()

            # wait for block bodies to become ready for execution
            completed_headers = await self.wait(
                self._block_import_tracker.ready_tasks())

            await self._import_blocks(completed_headers)

            head = await self.wait(self.db.coro_get_canonical_head())
            self.logger.info(
                "Synced chain segment with %d blocks in %.2f seconds, new head: %s",
                len(completed_headers),
                timer.elapsed,
                head,
            )

    async def _import_blocks(self, headers: Tuple[BlockHeader, ...]) -> None:
        """
        Import the blocks for the corresponding headers

        :param headers: headers that have the block bodies downloaded
        """
        for header in headers:
            vm_class = self.chain.get_vm_class(header)
            block_class = vm_class.get_block_class()

            if _is_body_empty(header):
                transactions: List[BaseTransaction] = []
                uncles: List[BlockHeader] = []
            else:
                body = self._pending_bodies.pop(header)
                tx_class = block_class.get_transaction_class()
                transactions = [
                    tx_class.from_base_transaction(tx)
                    for tx in body.transactions
                ]
                uncles = body.uncles

            block = block_class(header, transactions, uncles)
            timer = Timer()
            _, new_canonical_blocks, old_canonical_blocks = await self.wait(
                self.chain.coro_import_block(block, perform_validation=True))

            if new_canonical_blocks == (block, ):
                # simple import of a single new block.
                self.logger.info("Imported block %d (%d txs) in %.2f seconds",
                                 block.number, len(transactions),
                                 timer.elapsed)
            elif not new_canonical_blocks:
                # imported block from a fork.
                self.logger.info(
                    "Imported non-canonical block %d (%d txs) in %.2f seconds",
                    block.number, len(transactions), timer.elapsed)
            elif old_canonical_blocks:
                self.logger.info(
                    "Chain Reorganization: Imported block %d (%d txs) in %.2f "
                    "seconds, %d blocks discarded and %d new canonical blocks added",
                    block.number,
                    len(transactions),
                    timer.elapsed,
                    len(old_canonical_blocks),
                    len(new_canonical_blocks),
                )
            else:
                raise Exception("Invariant: unreachable code path")
Пример #21
0
class FastChainBodySyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and storing them in our DB.

    Here, the run() method returns as soon as we complete a sync with the peer that announced the
    highest TD, at which point we must run the StateDownloader to fetch the state for our chain
    head.
    """
    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, token)

        # queue up any idle peers, in order of how fast they return receipts
        self._receipt_peers: WaitingPeers[ETHPeer] = WaitingPeers(
            commands.Receipts)

        self._header_syncer = header_syncer

        # Track receipt download tasks
        # - arbitrarily allow several requests-worth of headers queued up
        # - try to get receipts from lower block numbers first
        buffer_size = MAX_RECEIPTS_FETCH * REQUEST_BUFFER_MULTIPLIER
        self._receipt_tasks = TaskQueue(buffer_size,
                                        attrgetter('block_number'))

        # track when both bodies and receipts are collected, so that blocks can be persisted
        self._block_persist_tracker = OrderedTaskPreparation(
            BlockPersistPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not persisted until the parent block is persisted
            dependency_extractor=attrgetter('parent_hash'),
        )
        # Track whether the fast chain syncer completed its goal
        self.is_complete = False

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self.tracker = ChainSyncPerformanceTracker(head)

        self._block_persist_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_receipt_download_to_peers())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._persist_ready_blocks())
        self.run_daemon_task(self._display_stats())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer lists
        super().register_peer(peer)
        peer = cast(ETHPeer, peer)
        self._body_peers.put_nowait(peer)
        self._receipt_peers.put_nowait(peer)

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks as they become available.
        """
        async for headers in self.wait_iter(
                self._header_syncer.new_sync_headers()):
            try:
                # We might end up with duplicates that can be safely ignored.
                # Likely scenario: switched which peer downloads headers, and the new peer isn't
                # aware of some of the in-progress headers
                self._block_persist_tracker.register_tasks(
                    headers, ignore_duplicates=True)
            except MissingDependency:
                # The parent of this header is not registered as a dependency yet.
                # Some reasons this might happen, in rough descending order of likelihood:
                #   - a normal fork: the canonical head isn't the parent of the first header synced
                #   - a bug: the DB has inconsistent state, say saved headers but not block bodies
                #   - a bug: headers were queued out of order in new_sync_headers

                # If the parent header doesn't exist yet, this is a legit bug instead of a fork,
                # let the HeaderNotFound exception bubble up
                parent_header = await self.wait(
                    self.db.coro_get_block_header_by_hash(
                        headers[0].parent_hash))

                # This appears to be a fork, since the parent header is persisted,
                self.logger.info(
                    "Fork found while starting fast sync. Canonical head was %s, but the next "
                    "header %s, has parent %s. Importing fork in case it's the longest chain.",
                    await self.db.coro_get_canonical_head(),
                    headers[0],
                    parent_header,
                )
                # Set first header's parent as finished
                self._block_persist_tracker.set_finished_dependency(
                    parent_header)
                # Re-register the header tasks, which will now succeed
                self._block_persist_tracker.register_tasks(headers)

            # Sometimes duplicates are added to the queue, when switching from one sync to another.
            # We can simply ignore them.
            new_body_tasks = tuple(h for h in headers
                                   if h not in self._block_body_tasks)
            new_receipt_tasks = tuple(h for h in headers
                                      if h not in self._receipt_tasks)

            # if any one of the output queues gets full, hang until there is room
            await self.wait(
                asyncio.gather(
                    self._block_body_tasks.add(new_body_tasks),
                    self._receipt_tasks.add(new_receipt_tasks),
                ))

    async def _display_stats(self) -> None:
        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "(in progress, queued, max size) of bodies, receipts: %r",
                [(q.num_in_progress(), len(q), q._maxsize) for q in (
                    self._block_body_tasks,
                    self._receipt_tasks,
                )],
            )

            stats = self.tracker.report()
            utcnow = int(datetime.datetime.utcnow().timestamp())
            head_age = utcnow - stats.latest_head.timestamp
            self.logger.info(
                ("blks=%-4d  "
                 "txs=%-5d  "
                 "bps=%-3d  "
                 "tps=%-4d  "
                 "elapsed=%0.1f  "
                 "head=#%d %s  "
                 "age=%s"),
                stats.num_blocks,
                stats.num_transactions,
                stats.blocks_per_second,
                stats.transactions_per_second,
                stats.elapsed,
                stats.latest_head.block_number,
                humanize_hash(stats.latest_head.hash),
                humanize_elapsed(head_age),
            )

    async def _persist_ready_blocks(self) -> None:
        """
        Persist blocks as soon as all their prerequisites are done: body and receipt downloads.
        Persisting must happen in order, so that the block's parent has already been persisted.

        Also, determine if fast sync with this peer should end, having reached (or surpassed)
        its target hash. If so, shut down this service.
        """
        while self.is_operational:
            # jhis tracker waits for all prerequisites to be complete, and returns headers in
            # order, so that each header's parent is already persisted.
            completed_headers = await self.wait(
                self._block_persist_tracker.ready_tasks())

            await self.wait(self._persist_blocks(completed_headers))

            target_hash = self._header_syncer.get_target_header_hash()

            if target_hash in [header.hash for header in completed_headers]:
                # exit the service when reaching the target hash
                self._mark_complete()
                break

    def _mark_complete(self) -> None:
        self.is_complete = True
        self.cancel_nowait()

    async def _persist_blocks(self, headers: Tuple[BlockHeader, ...]) -> None:
        """
        Persist blocks for the given headers, directly to the database

        :param headers: headers for which block bodies and receipts have been downloaded
        """
        for header in headers:
            vm_class = self.chain.get_vm_class(header)
            block_class = vm_class.get_block_class()

            if _is_body_empty(header):
                transactions: List[BaseTransaction] = []
                uncles: List[BlockHeader] = []
            else:
                body = self._pending_bodies.pop(header)
                uncles = body.uncles

                # transaction data was already persisted in _block_body_bundle_processing, but
                # we need to include the transactions for them to be added to the hash->txn lookup
                tx_class = block_class.get_transaction_class()
                transactions = [
                    tx_class.from_base_transaction(tx)
                    for tx in body.transactions
                ]

                # record progress in the tracker
                self.tracker.record_transactions(len(transactions))

            block = block_class(header, transactions, uncles)
            await self.wait(self.db.coro_persist_block(block))
            self.tracker.set_latest_head(header)

    async def _assign_receipt_download_to_peers(self) -> None:
        """
        Loop indefinitely, assigning idle peers to download receipts needed for syncing.
        """
        while self.is_operational:
            # from all the peers that are not currently downloading receipts, get the fastest
            peer = await self.wait(self._receipt_peers.get_fastest())

            # get headers for receipts that we need to download, preferring lowest block number
            batch_id, headers = await self.wait(
                self._receipt_tasks.get(MAX_RECEIPTS_FETCH))

            # schedule the receipt download and move on
            peer.run_task(
                self._run_receipt_download_batch(peer, batch_id, headers))

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_persist_tracker.finish_prereq(
            BlockPersistPrereqs.StoreBlockBodies,
            completed_headers,
        )

    async def _run_receipt_download_batch(
            self, peer: ETHPeer, batch_id: int, headers: Tuple[BlockHeader,
                                                               ...]) -> None:
        """
        Given a single batch retrieved from self._receipt_tasks, get as many of the receipt bundles
        as possible, and mark them as complete.
        """
        # If there is an exception during _process_receipts, prepare to mark the task as finished
        # with no headers collected:
        completed_headers: Tuple[BlockHeader, ...] = tuple()
        try:
            completed_headers = await peer.wait(
                self._process_receipts(peer, headers))

            self._block_persist_tracker.finish_prereq(
                BlockPersistPrereqs.StoreReceipts,
                completed_headers,
            )
        except BaseP2PError as exc:
            self.logger.info(
                "Unexpected p2p perror while downloading receipt from peer: %s",
                exc)
            self.logger.debug(
                "Problem downloading receipt from peer, dropping...",
                exc_info=True)
        else:
            # peer completed successfully, so have it get back in line for processing
            if len(completed_headers) > 0:
                # peer completed successfully, so have it get back in line for processing
                self._receipt_peers.put_nowait(peer)
            else:
                # peer returned no results, wait a while before trying again
                delay = EMPTY_PEER_RESPONSE_PENALTY
                self.logger.debug(
                    "Pausing %s for %.1fs, for sending 0 receipts", peer,
                    delay)
                self.call_later(delay, self._receipt_peers.put_nowait, peer)
        finally:
            self._receipt_tasks.complete(batch_id, completed_headers)

    async def _block_body_bundle_processing(
            self, bundles: Tuple[BlockBodyBundle, ...]) -> None:
        """
        Fast sync writes all the block body bundle data directly to the database,
        in order to make it... fast.
        """
        for (_, (_, trie_data_dict), _) in bundles:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data_dict)
                            )

    async def _process_receipts(
            self, peer: ETHPeer,
            all_headers: Tuple[BlockHeader, ...]) -> Tuple[BlockHeader, ...]:
        """
        Downloads and persists the receipts for the given set of block headers.
        Some receipts may be trivial, having a blank root hash, and will not be requested.

        :param peer: to issue the receipt request to
        :param all_headers: attempt to get receipts for as many of these headers as possible
        :return: the headers for receipts that were successfully downloaded (or were trivial)
        """
        # Post-Byzantium blocks may have identical receipt roots (e.g. when they have the same
        # number of transactions and all succeed/failed: ropsten blocks 2503212 and 2503284),
        # so we do this to avoid requesting the same receipts multiple times.

        # combine headers with the same receipt root, so we can mark them as completed, later
        receipt_root_to_headers = groupby(attrgetter('receipt_root'),
                                          all_headers)

        # Ignore headers that have an empty receipt root
        trivial_headers = tuple(
            receipt_root_to_headers.pop(BLANK_ROOT_HASH, tuple()))

        # pick one of the headers for each missing receipt root
        unique_headers_needed = tuple(
            first(headers)
            for root, headers in receipt_root_to_headers.items())

        if not unique_headers_needed:
            return trivial_headers

        receipt_bundles = await self._request_receipts(peer,
                                                       unique_headers_needed)

        if not receipt_bundles:
            return trivial_headers

        try:
            await self._validate_receipts(unique_headers_needed,
                                          receipt_bundles)
        except ValidationError as err:
            self.logger.info(
                "Disconnecting from %s: sent invalid receipt: %s",
                peer,
                err,
            )
            await peer.disconnect(DisconnectReason.bad_protocol)
            return trivial_headers

        # process all of the returned receipts, storing their trie data
        # dicts in the database
        receipts, trie_roots_and_data_dicts = zip(*receipt_bundles)
        receipt_roots, trie_data_dicts = zip(*trie_roots_and_data_dicts)
        for trie_data in trie_data_dicts:
            await self.wait(self.db.coro_persist_trie_data_dict(trie_data))

        # Identify which headers have the receipt roots that are now complete.
        completed_header_groups = tuple(
            headers for root, headers in receipt_root_to_headers.items()
            if root in receipt_roots)
        newly_completed_headers = tuple(concat(completed_header_groups))

        self.logger.debug(
            "Got receipts for %d/%d headers from %s, with %d trivial headers",
            len(newly_completed_headers),
            len(all_headers) - len(trivial_headers),
            peer,
            len(trivial_headers),
        )
        return newly_completed_headers + trivial_headers

    async def _validate_receipts(
            self, headers: Tuple[BlockHeader, ...],
            receipt_bundles: Tuple[ReceiptBundle, ...]) -> None:

        header_by_root = {
            header.receipt_root: header
            for header in headers if not _is_receipts_empty(header)
        }
        receipts_by_root = {
            receipt_root: receipts
            for (receipts, (receipt_root, _)) in receipt_bundles
            if receipt_root != BLANK_ROOT_HASH
        }
        for receipt_root, header in header_by_root.items():
            if receipt_root not in receipts_by_root:
                # this receipt group was not returned by the peer, skip validation
                continue
            for receipt in receipts_by_root[receipt_root]:
                await self.chain.coro_validate_receipt(receipt, header)

    async def _request_receipts(
            self, peer: ETHPeer,
            batch: Tuple[BlockHeader, ...]) -> Tuple[ReceiptBundle, ...]:
        """
        Requests the batch of receipts from the given peer, returning the
        received receipt data.
        """
        self.logger.debug("Requesting receipts for %d headers from %s",
                          len(batch), peer)
        try:
            receipt_bundles = await peer.requests.get_receipts(batch)
        except TimeoutError as err:
            self.logger.debug(
                "Timed out requesting receipts for %d headers from %s",
                len(batch),
                peer,
            )
            return tuple()
        except CancelledError:
            self.logger.debug("Pending receipts call to %r future cancelled",
                              peer)
            return tuple()
        except OperationCancelled:
            self.logger.debug2(
                "Pending receipts call to %r operation cancelled", peer)
            return tuple()
        except PeerConnectionLost:
            self.logger.debug(
                "Peer went away, cancelling the receipts request and moving on..."
            )
            return tuple()
        except Exception:
            self.logger.exception("Unknown error when getting receipts")
            raise

        if not receipt_bundles:
            return tuple()

        return receipt_bundles
Пример #22
0
class RegularChainBodySyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and importing them.

    Here, the run() method will execute the sync loop forever, until our CancelToken is triggered.
    """

    def __init__(self,
                 chain: AsyncChainAPI,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 block_importer: BaseBlockImporter,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, header_syncer, token)

        # track when block bodies are downloaded, so that blocks can be imported
        self._block_import_tracker = OrderedTaskPreparation(
            BlockImportPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not imported until the parent block is imported
            dependency_extractor=attrgetter('parent_hash'),
            # Avoid problems by keeping twice as much data as the import queue size
            max_depth=BLOCK_IMPORT_QUEUE_SIZE * 2,
        )
        self._block_importer = block_importer

        # Track if any headers have been received yet
        self._got_first_header = asyncio.Event()

        # Rate limit the block import logs
        self._import_log_limiter = TokenBucket(
            0.33,  # show about one log per 3 seconds
            5,  # burst up to 5 logs after a lag
        )

        # the queue of blocks that are downloaded and ready to be imported
        self._import_queue: 'asyncio.Queue[BlockAPI]' = asyncio.Queue(BLOCK_IMPORT_QUEUE_SIZE)

        self._import_active = asyncio.Lock()

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self._block_import_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._import_ready_blocks())
        self.run_daemon_task(self._preview_ready_blocks())
        self.run_daemon_task(self._display_stats())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer list
        super().register_peer(peer)
        self._body_peers.put_nowait(cast(ETHPeer, peer))

    async def _should_skip_header(self, header: BlockHeaderAPI) -> bool:
        """
        Should we skip trying to import this header?
        Return True if the syncing of header appears to be complete.
        This is fairly relaxed about the definition, preferring speed over slow precision.
        """
        return await self.db.coro_exists(header.state_root)

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks (downloading block bodies) as they become available.
        """
        async for headers in self._sync_from_headers(
                self._block_import_tracker,
                self._should_skip_header):

            # Sometimes duplicates are added to the queue, when switching from one sync to another.
            # We can simply ignore them.
            new_headers = tuple(h for h in headers if h not in self._block_body_tasks)

            # if the output queue gets full, hang until there is room
            await self.wait(self._block_body_tasks.add(new_headers))

    def _mark_body_download_complete(
            self,
            batch_id: int,
            completed_headers: Sequence[BlockHeaderAPI]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_import_tracker.finish_prereq(
            BlockImportPrereqs.STORE_BLOCK_BODIES,
            completed_headers,
        )

    async def _preview_ready_blocks(self) -> None:
        """
        Wait for block bodies to be downloaded, then compile the blocks and
        preview them to the importer.

        It's important to do this in a separate step from importing so that
        previewing can get ahead of import by a few blocks.
        """
        await self.wait(self._got_first_header.wait())
        while self.is_operational:
            # This tracker waits for all prerequisites to be complete, and returns headers in
            # order, so that each header's parent is already persisted.
            get_ready_coro = self._block_import_tracker.ready_tasks(1)
            completed_headers = await self.wait(get_ready_coro)

            if self._block_import_tracker.has_ready_tasks():
                # Even after clearing out a big batch, there is no available capacity, so
                # pause any coroutines that might wait for capacity
                self._db_buffer_capacity.clear()
            else:
                # There is available capacity, let any waiting coroutines continue
                self._db_buffer_capacity.set()

            header = completed_headers[0]
            block = self._header_to_block(header)

            # Put block in short queue for import, wait here if queue is full
            await self.wait(self._import_queue.put(block))

            # Load the state root of the parent header
            try:
                parent_state_root = self._block_hash_to_state_root[header.parent_hash]
            except KeyError:
                # For the very first header that we load, we have to look up the parent's
                # state from the database:
                parent = await self.chain.coro_get_block_header_by_hash(header.parent_hash)
                parent_state_root = parent.state_root

            # Emit block for preview
            #   - look up the addresses referenced by the transaction (eg~ sender and recipient)
            #   - execute the block ahead of time to start collecting any missing state
            #   - store the header (for future evm execution that might look up old block hashes)
            await self._block_importer.preview_transactions(
                header,
                block.transactions,
                parent_state_root,
            )

    async def _import_ready_blocks(self) -> None:
        """
        Wait for block bodies to be downloaded, then compile the blocks and
        preview them to the importer.
        """
        await self.wait(self._got_first_header.wait())
        while self.is_operational:
            if self._import_queue.empty():
                if self._import_active.locked():
                    self._import_active.release()
                waiting_for_next_block = Timer()

            block = await self.wait(self._import_queue.get())
            if not self._import_active.locked():
                self.logger.info(
                    "Waited %.1fs for %s body",
                    waiting_for_next_block.elapsed,
                    block.header,
                )
                await self._import_active.acquire()

            await self._import_block(block)

    async def _import_block(self, block: BlockAPI) -> None:
        timer = Timer()
        _, new_canonical_blocks, old_canonical_blocks = await self.wait(
            self._block_importer.import_block(block)
        )
        # how much is the imported block's header behind the current time?
        lag = time.time() - block.header.timestamp
        humanized_lag = humanize_seconds(lag)

        if new_canonical_blocks == (block,):
            # simple import of a single new block.

            # decide whether to log to info or debug, based on log rate
            if self._import_log_limiter.can_take(1):
                log_fn = self.logger.info
                self._import_log_limiter.take_nowait(1)
            else:
                log_fn = self.logger.debug
            log_fn(
                "Imported block %d (%d txs) in %.2f seconds, with %s lag",
                block.number,
                len(block.transactions),
                timer.elapsed,
                humanized_lag,
            )
        elif not new_canonical_blocks:
            # imported block from a fork.
            self.logger.info(
                "Imported non-canonical block %d (%d txs) in %.2f seconds, with %s lag",
                block.number,
                len(block.transactions),
                timer.elapsed,
                humanized_lag,
            )
        elif old_canonical_blocks:
            self.logger.info(
                "Chain Reorganization: Imported block %d (%d txs) in %.2f seconds, "
                "%d blocks discarded and %d new canonical blocks added, with %s lag",
                block.number,
                len(block.transactions),
                timer.elapsed,
                len(old_canonical_blocks),
                len(new_canonical_blocks),
                humanized_lag,
            )
        else:
            raise Exception("Invariant: unreachable code path")

    def _header_to_block(self, header: BlockHeaderAPI) -> BlockAPI:
        """
        This method converts a header that was queued up for sync into its full block
        representation. It may not be called until after the body is marked as fully
        downloaded, as tracked by self._block_import_tracker.
        """
        vm_class = self.chain.get_vm_class(header)
        block_class = vm_class.get_block_class()

        if _is_body_empty(header):
            transactions: List[SignedTransactionAPI] = []
            uncles: List[BlockHeaderAPI] = []
        else:
            body = self._pending_bodies.pop(header)
            tx_class = block_class.get_transaction_class()
            transactions = [tx_class.from_base_transaction(tx)
                            for tx in body.transactions]
            uncles = body.uncles

        return block_class(header, transactions, uncles)

    async def _display_stats(self) -> None:
        self.logger.debug("Regular sync waiting for first header to arrive")
        await self.wait(self._got_first_header.wait())
        self.logger.debug("Regular sync first header arrived")

        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "(progress, queued, max) of bodies, receipts: %r. Write capacity? %s Importing? %s",
                [(q.num_in_progress(), len(q), q._maxsize) for q in (
                    self._block_body_tasks,
                )],
                self._db_buffer_capacity.is_set(),
                self._import_active.locked(),
            )
Пример #23
0
class RegularChainBodySyncer(BaseBodyChainSyncer):
    """
    Sync with the Ethereum network by fetching block headers/bodies and importing them.

    Here, the run() method will execute the sync loop forever, until our CancelToken is triggered.
    """
    def __init__(self,
                 chain: BaseAsyncChain,
                 db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 header_syncer: HeaderSyncerAPI,
                 block_importer: BaseBlockImporter,
                 token: CancelToken = None) -> None:
        super().__init__(chain, db, peer_pool, header_syncer, token)

        # track when block bodies are downloaded, so that blocks can be imported
        self._block_import_tracker = OrderedTaskPreparation(
            BlockImportPrereqs,
            id_extractor=attrgetter('hash'),
            # make sure that a block is not imported until the parent block is imported
            dependency_extractor=attrgetter('parent_hash'),
        )
        self._block_importer = block_importer

        # Track if any headers have been received yet
        self._got_first_header = asyncio.Event()

    async def _run(self) -> None:
        head = await self.wait(self.db.coro_get_canonical_head())
        self._block_import_tracker.set_finished_dependency(head)
        self.run_daemon_task(self._launch_prerequisite_tasks())
        self.run_daemon_task(self._assign_body_download_to_peers())
        self.run_daemon_task(self._import_ready_blocks())
        self.run_daemon_task(self._display_stats())
        await super()._run()

    def register_peer(self, peer: BasePeer) -> None:
        # when a new peer is added to the pool, add it to the idle peer list
        super().register_peer(peer)
        self._body_peers.put_nowait(cast(ETHPeer, peer))

    async def _should_skip_header(self, header: BlockHeader) -> bool:
        """
        Should we skip trying to import this header?
        Return True if the syncing of header appears to be complete.
        This is fairly relaxed about the definition, preferring speed over slow precision.
        """
        return await self.db.coro_exists(header.state_root)

    async def _launch_prerequisite_tasks(self) -> None:
        """
        Watch for new headers to be added to the queue, and add the prerequisite
        tasks (downloading block bodies) as they become available.
        """
        async for headers in self._sync_from_headers(
                self._block_import_tracker, self._should_skip_header):

            # Sometimes duplicates are added to the queue, when switching from one sync to another.
            # We can simply ignore them.
            new_headers = tuple(h for h in headers
                                if h not in self._block_body_tasks)

            # if the output queue gets full, hang until there is room
            await self.wait(self._block_body_tasks.add(new_headers))

    def _mark_body_download_complete(
            self, batch_id: int, completed_headers: Tuple[BlockHeader,
                                                          ...]) -> None:
        super()._mark_body_download_complete(batch_id, completed_headers)
        self._block_import_tracker.finish_prereq(
            BlockImportPrereqs.StoreBlockBodies,
            completed_headers,
        )

    async def _import_ready_blocks(self) -> None:
        """
        Wait for block bodies to be downloaded, then import the blocks.
        """
        await self.wait(self._got_first_header.wait())
        while self.is_operational:
            timer = Timer()

            # wait for block bodies to become ready for execution
            completed_headers = await self.wait(
                self._block_import_tracker.ready_tasks())

            if self._block_import_tracker.has_ready_tasks():
                # Even after clearing out a big batch, there is no available capacity, so
                # pause any coroutines that might wait for capacity
                self._db_buffer_capacity.clear()
            else:
                # There is available capacity, let any waiting coroutines continue
                self._db_buffer_capacity.set()

            await self._import_blocks(completed_headers)

            head = await self.wait(self.db.coro_get_canonical_head())
            self.logger.info(
                "Synced chain segment with %d blocks in %.2f seconds, new head: %s",
                len(completed_headers),
                timer.elapsed,
                head,
            )

    async def _import_blocks(self, headers: Tuple[BlockHeader, ...]) -> None:
        """
        Import the blocks for the corresponding headers

        :param headers: headers that have the block bodies downloaded
        """
        unimported_blocks = self._headers_to_blocks(headers)

        for block in unimported_blocks:
            timer = Timer()
            _, new_canonical_blocks, old_canonical_blocks = await self.wait(
                self._block_importer.import_block(block))

            if new_canonical_blocks == (block, ):
                # simple import of a single new block.
                self.logger.info("Imported block %d (%d txs) in %.2f seconds",
                                 block.number, len(block.transactions),
                                 timer.elapsed)
            elif not new_canonical_blocks:
                # imported block from a fork.
                self.logger.info(
                    "Imported non-canonical block %d (%d txs) in %.2f seconds",
                    block.number, len(block.transactions), timer.elapsed)
            elif old_canonical_blocks:
                self.logger.info(
                    "Chain Reorganization: Imported block %d (%d txs) in %.2f "
                    "seconds, %d blocks discarded and %d new canonical blocks added",
                    block.number,
                    len(block.transactions),
                    timer.elapsed,
                    len(old_canonical_blocks),
                    len(new_canonical_blocks),
                )
            else:
                raise Exception("Invariant: unreachable code path")

    @to_tuple
    def _headers_to_blocks(
            self, headers: Iterable[BlockHeader]) -> Iterable[BaseBlock]:
        for header in headers:
            vm_class = self.chain.get_vm_class(header)
            block_class = vm_class.get_block_class()

            if _is_body_empty(header):
                transactions: List[BaseTransaction] = []
                uncles: List[BlockHeader] = []
            else:
                body = self._pending_bodies.pop(header)
                tx_class = block_class.get_transaction_class()
                transactions = [
                    tx_class.from_base_transaction(tx)
                    for tx in body.transactions
                ]
                uncles = body.uncles

            yield block_class(header, transactions, uncles)

    async def _display_stats(self) -> None:
        self.logger.debug("Regular sync waiting for first header to arrive")
        await self.wait(self._got_first_header.wait())
        self.logger.debug("Regular sync first header arrived")

        while self.is_operational:
            await self.sleep(5)
            self.logger.debug(
                "(in progress, queued, max size) of bodies, receipts: %r. Write capacity? %s",
                [(q.num_in_progress(), len(q), q._maxsize)
                 for q in (self._block_body_tasks, )],
                "yes" if self._db_buffer_capacity.is_set() else "no",
            )