示例#1
0
文件: chain.py 项目: Elnaril/trinity
class BeamSyncer(Service):
    """
    Organizes several moving parts to coordinate beam sync. Roughly:

        - Sync *only* headers up until you have caught up with a peer, ie~ the launchpoint
        - Launch a service responsible for serving event bus requests for missing state data
        - When you catch up with a peer, start downloading transactions needed to execute a block
        - At the launchpoint, switch to full block imports, with a custom importer

    This syncer relies on a seperately orchestrated beam sync component, which:

        - listens for DoStatelessBlockImport events
        - emits events when data is missing, like CollectMissingAccount
        - emits StatelessBlockImportDone when the block import is completed in the DB

    There is an option, currently only used for testing, to force beam sync at a particular
    block number (rather than trigger it when catching up with a peer).
    """
    def __init__(
            self,
            chain: AsyncChainAPI,
            db: AtomicDatabaseAPI,
            chain_db: BaseAsyncChainDB,
            peer_pool: ETHPeerPool,
            event_bus: EndpointAPI,
            metrics_registry: MetricsRegistry,
            checkpoint: Checkpoint = None,
            force_beam_block_number: BlockNumber = None,
            enable_backfill: bool = True,
            enable_state_backfill: bool = True) -> None:
        self.logger = get_logger('trinity.sync.beam.chain.BeamSyncer')

        self.metrics_registry = metrics_registry
        self._body_for_header_exists = body_for_header_exists(chain_db, chain)

        if checkpoint is None:
            self._launch_strategy: SyncLaunchStrategyAPI = FromGenesisLaunchStrategy(chain_db)
        else:
            self._launch_strategy = FromCheckpointLaunchStrategy(
                chain_db,
                chain,
                checkpoint,
                peer_pool,
            )

        self._header_syncer = ETHHeaderChainSyncer(
            chain,
            chain_db,
            peer_pool,
            self._launch_strategy,
        )
        self._header_persister = HeaderOnlyPersist(
            self._header_syncer,
            chain_db,
            force_beam_block_number,
            self._launch_strategy,
        )

        self._backfiller = BeamStateBackfill(db, peer_pool)

        if enable_state_backfill:
            self._queen_queue: QueenTrackerAPI = self._backfiller
        else:
            self._queen_queue = QueeningQueue(peer_pool)

        self._state_downloader = BeamDownloader(
            db,
            peer_pool,
            self._queen_queue,
            event_bus,
        )
        self._data_hunter = MissingDataEventHandler(
            self._state_downloader,
            event_bus,
            self.metrics_registry,
        )

        self._block_importer = BeamBlockImporter(
            chain,
            db,
            self._state_downloader,
            self._backfiller,
            event_bus,
            self.metrics_registry,
        )
        self._launchpoint_header_syncer = HeaderLaunchpointSyncer(self._header_syncer)
        self._body_syncer = RegularChainBodySyncer(
            chain,
            chain_db,
            peer_pool,
            self._launchpoint_header_syncer,
            self._block_importer,
        )

        self._manual_header_syncer = ManualHeaderSyncer()
        self._fast_syncer = RigorousFastChainBodySyncer(
            chain,
            chain_db,
            peer_pool,
            self._manual_header_syncer,
        )

        self._header_backfill = SequentialHeaderChainGapSyncer(chain, chain_db, peer_pool)
        self._block_backfill = BodyChainGapSyncer(chain, chain_db, peer_pool)

        self._chain = chain
        self._enable_backfill = enable_backfill
        self._enable_state_backfill = enable_state_backfill

    async def run(self) -> None:

        try:
            await self._launch_strategy.fulfill_prerequisites()
        except asyncio.TimeoutError as exc:
            self.logger.exception(
                "Timed out while trying to fulfill prerequisites of "
                f"sync launch strategy: {exc} from {self._launch_strategy}"
            )
            self.manager.cancel()
            return

        self.manager.run_daemon_child_service(self._block_importer)
        self.manager.run_daemon_child_service(self._header_syncer)

        # Kick off the body syncer early (it hangs on the launchpoint header syncer anyway)
        # It needs to start early because we want to "re-run" the header at the tip,
        # which it gets grumpy about. (it doesn't want to receive the canonical header tip
        # as a header to process)
        self.manager.run_daemon_child_service(self._body_syncer)

        # Launch the state syncer endpoint early
        self.manager.run_daemon_child_service(self._data_hunter)

        # Only persist headers at start
        async with background_asyncio_service(self._header_persister) as manager:
            await manager.wait_finished()
        # When header store exits, we have caught up

        # We want to trigger beam sync on the last block received,
        # not wait for the next one to be broadcast
        final_headers = self._header_persister.get_final_headers()

        # First, download block bodies for previous 6 blocks, for validation
        await self._download_blocks(final_headers[0])

        # Now, tell the MissingDataEventHandler about the minimum acceptable block number for
        # data requests. This helps during pivots to quickly reject requests from old block imports
        self._data_hunter.minimum_beam_block_number = min(
            header.block_number for header in final_headers
        )

        # Now let the beam sync importer kick in
        self._launchpoint_header_syncer.set_launchpoint_headers(final_headers)

        # We wait until beam sync has launched before starting backfill, because
        #   they both request block bodies, but beam sync needs them urgently.
        if self._enable_backfill:
            # There's no chance to introduce new gaps after this point. Therefore we can run this
            # until it has filled all gaps and let it finish.
            self.manager.run_child_service(self._header_backfill)

            # In contrast, block gap fill needs to run indefinitely because of beam sync pivoting.
            self.manager.run_daemon_child_service(self._block_backfill)

            # Now we can check the lag (presumably ~0) and start backfill
            self.manager.run_daemon_task(self._monitor_historical_backfill)

        # Will start the state background service or the basic queen queue
        self.manager.run_child_service(self._queen_queue)

        # TODO wait until first header with a body comes in?...
        # Start state downloader service
        self.manager.run_daemon_child_service(self._state_downloader)

        # run sync until cancelled
        await self.manager.wait_finished()

    def get_block_count_lag(self) -> int:
        """
        :return: the difference in block number between the currently importing block and
            the latest known block
        """
        return self._body_syncer.get_block_count_lag()

    async def _download_blocks(self, before_header: BlockHeaderAPI) -> None:
        """
        When importing a block, we need to validate uncles against the previous
        six blocks, so download those bodies and persist them to the database.
        """
        parents_needed = FULL_BLOCKS_NEEDED_TO_START_BEAM

        self.logger.info(
            "Downloading %d block bodies for uncle validation, before %s",
            parents_needed,
            before_header,
        )

        # select the recent ancestors to sync block bodies for
        parent_headers = tuple(reversed([
            header async for header
            in self._get_ancestors(parents_needed, header=before_header)
        ]))

        # identify starting tip and headers with possible uncle conflicts for validation
        if len(parent_headers) < parents_needed:
            self.logger.info(
                "Collecting %d blocks to genesis for uncle validation",
                len(parent_headers),
            )
            sync_from_tip = await self._chain.coro_get_canonical_block_header_by_number(
                BlockNumber(0)
            )
            uncle_conflict_headers = parent_headers
        else:
            sync_from_tip = parent_headers[0]
            uncle_conflict_headers = parent_headers[1:]

        # check if we already have the blocks for the uncle conflict headers
        if await self._all_verification_bodies_present(uncle_conflict_headers):
            self.logger.debug("All needed block bodies are already available")
        else:
            # tell the header syncer to emit those headers
            self._manual_header_syncer.emit(uncle_conflict_headers)

            # tell the fast syncer which tip to start from
            self._fast_syncer.set_starting_tip(sync_from_tip)

            # run the fast syncer (which downloads block bodies and then exits)
            self.logger.info("Getting recent block data for uncle validation")
            async with background_asyncio_service(self._fast_syncer) as manager:
                await manager.wait_finished()

        # When this completes, we have all the uncles needed to validate
        self.logger.info("Have all data needed for Beam validation, continuing...")

    async def _get_ancestors(self,
                             limit: int,
                             header: BlockHeaderAPI) -> AsyncIterator[BlockHeaderAPI]:
        """
        Return `limit` number of ancestor headers from the specified header.
        """
        headers_returned = 0
        while header.parent_hash != GENESIS_PARENT_HASH and headers_returned < limit:
            parent = await self._chain.coro_get_block_header_by_hash(header.parent_hash)
            yield parent
            headers_returned += 1
            header = parent

    async def _all_verification_bodies_present(
            self,
            headers_with_potential_conflicts: Iterable[BlockHeaderAPI]) -> bool:

        for header in headers_with_potential_conflicts:
            if not await self._body_for_header_exists(header):
                return False
        return True

    async def _monitor_historical_backfill(self) -> None:
        while self.manager.is_running:
            await asyncio.sleep(PREDICTED_BLOCK_TIME)
            if self._block_backfill.get_manager().is_cancelled:
                return
            else:
                lag = self.get_block_count_lag()
                if lag >= PAUSE_BACKFILL_AT_LAG and not self._block_backfill.is_paused:
                    self.logger.debug(
                        "Pausing historical header/block sync because we lag %s blocks",
                        lag,
                    )
                    self._block_backfill.pause()
                    self._header_backfill.pause()
                elif lag <= RESUME_BACKFILL_AT_LAG and self._block_backfill.is_paused:
                    self.logger.debug(
                        "Resuming historical header/block sync because we lag %s blocks",
                        lag,
                    )
                    self._block_backfill.resume()
                    self._header_backfill.resume()
示例#2
0
class BeamSyncer(BaseService):
    """
    Organizes several moving parts to coordinate beam sync. Roughly:

        - Sync *only* headers up until you have caught up with a peer, ie~ the checkpoint
        - Launch a service responsible for serving event bus requests for missing state data
        - When you catch up with a peer, start downloading transactions needed to execute a block
        - At the checkpoint, switch to full block imports, with a custom importer

    This syncer relies on a seperately orchestrated beam sync component, which:

        - listens for DoStatelessBlockImport events
        - emits events when data is missing, like CollectMissingAccount
        - emits StatelessBlockImportDone when the block import is completed in the DB

    There is an option, currently only used for testing, to force beam sync at a particular
    block number (rather than trigger it when catching up with a peer).
    """
    def __init__(self,
                 chain: AsyncChainAPI,
                 db: AtomicDatabaseAPI,
                 chain_db: BaseAsyncChainDB,
                 peer_pool: ETHPeerPool,
                 event_bus: EndpointAPI,
                 checkpoint: Checkpoint = None,
                 force_beam_block_number: BlockNumber = None,
                 token: CancelToken = None) -> None:
        super().__init__(token=token)

        if checkpoint is None:
            self._launch_strategy: SyncLaunchStrategyAPI = FromGenesisLaunchStrategy(
                chain_db, chain)
        else:
            self._launch_strategy = FromCheckpointLaunchStrategy(
                chain_db,
                chain,
                checkpoint,
                peer_pool,
            )

        self._header_syncer = ETHHeaderChainSyncer(chain, chain_db, peer_pool,
                                                   self._launch_strategy,
                                                   self.cancel_token)
        self._header_persister = HeaderOnlyPersist(
            self._header_syncer,
            chain_db,
            force_beam_block_number,
            self._launch_strategy,
            self.cancel_token,
        )

        self._backfiller = BeamStateBackfill(db,
                                             peer_pool,
                                             token=self.cancel_token)

        self._state_downloader = BeamDownloader(
            db,
            peer_pool,
            self._backfiller,
            event_bus,
            self.cancel_token,
        )
        self._data_hunter = MissingDataEventHandler(
            self._state_downloader,
            event_bus,
            token=self.cancel_token,
        )

        self._block_importer = BeamBlockImporter(
            chain,
            db,
            self._state_downloader,
            self._backfiller,
            event_bus,
            self.cancel_token,
        )
        self._checkpoint_header_syncer = HeaderCheckpointSyncer(
            self._header_syncer)
        self._body_syncer = RegularChainBodySyncer(
            chain,
            chain_db,
            peer_pool,
            self._checkpoint_header_syncer,
            self._block_importer,
            self.cancel_token,
        )

        self._manual_header_syncer = ManualHeaderSyncer()
        self._fast_syncer = RigorousFastChainBodySyncer(
            chain,
            chain_db,
            peer_pool,
            self._manual_header_syncer,
            self.cancel_token,
        )

        self._chain = chain

    async def _run(self) -> None:

        try:
            await self.wait(self._launch_strategy.fulfill_prerequisites())
        except asyncio.TimeoutError as exc:
            self.logger.exception(
                "Timed out while trying to fulfill prerequisites of "
                f"sync launch strategy: {exc} from {self._launch_strategy}")
            self.cancel_nowait()

        self.run_daemon(self._header_syncer)

        # Kick off the body syncer early (it hangs on the checkpoint header syncer anyway)
        # It needs to start early because we want to "re-run" the header at the tip,
        # which it gets grumpy about. (it doesn't want to receive the canonical header tip
        # as a header to process)
        self.run_daemon(self._body_syncer)

        # Launch the state syncer endpoint early
        self.run_daemon(self._data_hunter)

        # Only persist headers at start
        await self.wait(self._header_persister.run())
        # When header store exits, we have caught up

        # We want to trigger beam sync on the last block received,
        # not wait for the next one to be broadcast
        final_headers = self._header_persister.get_final_headers()

        # First, download block bodies for previous 6 blocks, for validation
        await self.wait(self._download_blocks(final_headers[0]))

        # Now, tell the MissingDataEventHandler about the minimum acceptable block number for
        # data requests. This helps during pivots to quickly reject requests from old block imports
        self._data_hunter.minimum_beam_block_number = min(
            header.block_number for header in final_headers)

        # Now let the beam sync importer kick in
        self._checkpoint_header_syncer.set_checkpoint_headers(final_headers)

        # TODO wait until first header with a body comes in?...
        # Start state downloader service
        self.run_daemon(self._state_downloader)

        # Start state background service
        self.run_daemon(self._backfiller)

        # run sync until cancelled
        await self.cancellation()

    def get_block_count_lag(self) -> int:
        """
        :return: the difference in block number between the currently importing block and
            the latest known block
        """
        return self._body_syncer.get_block_count_lag()

    async def _download_blocks(self, before_header: BlockHeader) -> None:
        """
        When importing a block, we need to validate uncles against the previous
        six blocks, so download those bodies and persist them to the database.
        """
        parents_needed = FULL_BLOCKS_NEEDED_TO_START_BEAM

        self.logger.info(
            "Downloading %d block bodies for uncle validation, before %s",
            parents_needed,
            before_header,
        )

        # select the recent ancestors to sync block bodies for
        parent_headers = tuple(
            reversed([
                header
                async for header in self._get_ancestors(parents_needed,
                                                        header=before_header)
            ]))

        # identify starting tip and headers with possible uncle conflicts for validation
        if len(parent_headers) < parents_needed:
            self.logger.info(
                "Collecting %d blocks to genesis for uncle validation",
                len(parent_headers),
            )
            sync_from_tip = await self._chain.coro_get_canonical_block_by_number(
                BlockNumber(0))
            uncle_conflict_headers = parent_headers
        else:
            sync_from_tip = parent_headers[0]
            uncle_conflict_headers = parent_headers[1:]

        # check if we already have the blocks for the uncle conflict headers
        if await self._all_verification_bodies_present(uncle_conflict_headers):
            self.logger.debug("All needed block bodies are already available")
        else:
            # tell the header syncer to emit those headers
            self._manual_header_syncer.emit(uncle_conflict_headers)

            # tell the fast syncer which tip to start from
            self._fast_syncer.set_starting_tip(sync_from_tip)

            # run the fast syncer (which downloads block bodies and then exits)
            self.logger.info("Getting recent block data for uncle validation")
            await self._fast_syncer.run()

        # When this completes, we have all the uncles needed to validate
        self.logger.info(
            "Have all data needed for Beam validation, continuing...")

    async def _get_ancestors(
            self, limit: int,
            header: BlockHeader) -> AsyncIterator[BlockHeader]:
        """
        Return `limit` number of ancestor headers from the specified header.
        """
        headers_returned = 0
        while header.parent_hash != GENESIS_PARENT_HASH and headers_returned < limit:
            parent = await self._chain.coro_get_block_header_by_hash(
                header.parent_hash)
            yield parent
            headers_returned += 1
            header = parent

    async def _all_verification_bodies_present(
            self,
            headers_with_potential_conflicts: Iterable[BlockHeader]) -> bool:

        for header in headers_with_potential_conflicts:
            if not await self._fast_syncer._should_skip_header(header):
                return False
        return True