def extract_tokens(contracts, provider_uri, output, max_workers, values_as_strings=False): """Extracts tokens from contracts file.""" set_max_field_size_limit() with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) converters = [ IntToStringItemConverter(keys=['decimals', 'total_supply']) ] if values_as_strings else [] job = ExtractTokensJob( contracts_iterable=contracts_iterable, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), max_workers=max_workers, item_exporter=tokens_item_exporter(output, converters)) job.run()
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri, genesis_traces, daofork_traces, timeout=60, chain='ethereum'): """Exports traces from parity node.""" if chain == 'classic' and daofork_traces == True: raise ValueError( 'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.' ) job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy(lambda: Web3( get_provider_from_uri(provider_uri, timeout=timeout))), item_exporter=traces_item_exporter(output), max_workers=max_workers, include_genesis_traces=genesis_traces, include_daofork_traces=daofork_traces) job.run()
def stream(last_synced_block_file, lag, provider_uri, output, start_block, entity_types, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() entity_types = parse_entity_types(entity_types) from blockchainetl.streaming.streaming_utils import get_item_exporter from ethereumetl.streaming.eth_streamer_adapter import EthStreamerAdapter from blockchainetl.streaming.streamer import Streamer # TODO: Implement fallback mechanism for provider uris instead of picking randomly provider_uri = pick_random_provider_uri(provider_uri) logging.info('Using ' + provider_uri) streamer_adapter = EthStreamerAdapter( batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=get_item_exporter(output), batch_size=batch_size, max_workers=max_workers, entity_types=entity_types ) streamer = Streamer( blockchain_streamer_adapter=streamer_adapter, last_synced_block_file=last_synced_block_file, lag=lag, start_block=start_block, period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file ) streamer.stream()
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output, chain='ethereum'): """Exports blocks and transactions.""" provider_uri = check_classic_provider_uri(chain, provider_uri) if blocks_output is None and transactions_output is None: raise ValueError( 'Either --blocks-output or --transactions-output options must be provided' ) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_output, transactions_output), export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def export_tokens(token_addresses, output, max_workers, provider_uri): """Exports ERC20/ERC721 tokens.""" with smart_open(token_addresses, 'r') as token_addresses_file: job = ExportTokensJob( token_addresses_iterable=(token_address.strip() for token_address in token_addresses_file), web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=tokens_item_exporter(output), max_workers=max_workers) job.run()
def get_block_range_for_date(provider_uri, date, output): """Outputs start and end blocks for given date.""" provider = get_provider_from_uri(provider_uri) web3 = Web3(provider) eth_service = EthService(web3) start_block, end_block = eth_service.get_block_range_for_date(date) with smart_open(output, 'w') as output_file: output_file.write('{},{}\n'.format(start_block, end_block))
def export_geth_traces(start_block, end_block, batch_size, output, max_workers, provider_uri): """Exports traces from geth node.""" job = ExportGethTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=geth_traces_item_exporter(output)) job.run()
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'): """Outputs start and end blocks for given date.""" provider_uri = check_classic_provider_uri(chain, provider_uri) provider = get_provider_from_uri(provider_uri) web3 = build_web3(provider) eth_service = EthService(web3) start_block, end_block = eth_service.get_block_range_for_date(date) with smart_open(output, 'w') as output_file: output_file.write('{},{}\n'.format(start_block, end_block))
def get_partitions(start, end, partition_batch_size, provider_uri): """Yield partitions based on input data type.""" if is_date_range(start, end) or is_unix_time_range(start, end): if is_date_range(start, end): start_date = datetime.strptime(start, '%Y-%m-%d').date() end_date = datetime.strptime(end, '%Y-%m-%d').date() elif is_unix_time_range(start, end): if len(start) == 10 and len(end) == 10: start_date = datetime.utcfromtimestamp(int(start)).date() end_date = datetime.utcfromtimestamp(int(end)).date() elif len(start) == 13 and len(end) == 13: start_date = datetime.utcfromtimestamp(int(start) / 1e3).date() end_date = datetime.utcfromtimestamp(int(end) / 1e3).date() day = timedelta(days=1) provider = get_provider_from_uri(provider_uri) web3 = Web3(provider) eth_service = EthService(web3) while start_date <= end_date: batch_start_block, batch_end_block = eth_service.get_block_range_for_date( start_date) partition_dir = '/date={start_date!s}/'.format( start_date=start_date) yield batch_start_block, batch_end_block, partition_dir start_date += day elif is_block_range(start, end): start_block = int(start) end_block = int(end) for batch_start_block in range(start_block, end_block + 1, partition_batch_size): batch_end_block = batch_start_block + partition_batch_size - 1 if batch_end_block > end_block: batch_end_block = end_block padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) yield batch_start_block, batch_end_block, partition_dir else: raise ValueError( 'start and end must be either block numbers or ISO dates or Unix times' )
def export_token_transfers(start_block, end_block, batch_size, output, max_workers, provider_uri, tokens): """Exports ERC20/ERC721 transfers.""" job = ExportTokenTransfersJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=token_transfers_item_exporter(output), max_workers=max_workers, tokens=tokens) job.run()
def export_contracts(batch_size, contract_addresses, output, max_workers, provider_uri): """Exports contracts bytecode and sighashes.""" with smart_open(contract_addresses, 'r') as contract_addresses_file: contract_addresses = (contract_address.strip() for contract_address in contract_addresses_file if contract_address.strip()) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(output), max_workers=max_workers) job.run()
def export_origin(start_block, end_block, batch_size, marketplace_output, shop_output, max_workers, provider_uri): """Exports Origin Protocol data.""" job = ExportOriginJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), ipfs_client=get_origin_ipfs_client(), marketplace_listing_exporter=origin_marketplace_listing_item_exporter( marketplace_output), shop_product_exporter=origin_shop_product_item_exporter(shop_output), max_workers=max_workers) job.run()
def export_traces(start_block, end_block, batch_size, output, max_workers, provider_uri, genesis_traces, daofork_traces): """Exports traces from parity node.""" job = ExportTracesJob( start_block=start_block, end_block=end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=traces_item_exporter(output), max_workers=max_workers, include_genesis_traces=genesis_traces, include_daofork_traces=daofork_traces) job.run()
def export_receipts_and_logs(batch_size, transaction_hashes, provider_uri, max_workers, receipts_output, logs_output, chain='ethereum'): """Exports receipts and logs.""" provider_uri = check_classic_provider_uri(chain, provider_uri) with smart_open(transaction_hashes, 'r') as transaction_hashes_file: job = ExportReceiptsJob( transaction_hashes_iterable=(transaction_hash.strip() for transaction_hash in transaction_hashes_file), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter(receipts_output, logs_output), export_receipts=receipts_output is not None, export_logs=logs_output is not None) job.run()
def extract_tokens(contracts, provider_uri, output, max_workers): """Extracts tokens from contracts file.""" set_max_field_size_limit() with smart_open(contracts, 'r') as contracts_file: if contracts.endswith('.json'): contracts_iterable = (json.loads(line) for line in contracts_file) else: contracts_iterable = csv.DictReader(contracts_file) job = ExtractTokensJob( contracts_iterable=contracts_iterable, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), max_workers=max_workers, item_exporter=tokens_item_exporter(output)) job.run()
def _extract(self, start_block, end_block): logging.info("Running extraction job for block range {}-{}".format( start_block, end_block)) job = ExportOriginJob( start_block=start_block, end_block=end_block, batch_size=JOB_BLOCK_BATCH_SIZE, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(self.provider_url))), ipfs_client=get_origin_ipfs_client(), marketplace_listing_exporter= origin_marketplace_listing_item_exporter( JOB_MARKETPLACE_OUTPUT_FILENAME), shop_product_exporter=origin_shop_product_item_exporter( JOB_DSHOP_OUTPUT_FILENAME), max_workers=JOB_MAX_WORKERS) job.run() logging.info("Extraction done.")
def connection_check_recursice(uri, backup_uri_list): """ This function recursively check the connection of the backup uris. input: backup_uri_list is backup_provider_uri_list variable. needs to be comma seperated uris. returns: a live uri """ if not backup_uri_list: raise ConnectionError("No ethereum node is responding") backup_provider_uri = backup_uri_list.pop() provider = get_provider_from_uri(uri) web3 = Web3(provider) block_timesstamp_graph = BlockTimestampGraph(web3) try: block_timesstamp_graph.get_first_point() return uri except: return connection_check_recursice(backup_provider_uri, backup_uri_list)
'--max-workers', default=5, type=int, help='The maximum number of workers.') parser.add_argument('-p', '--provider-uri', default=None, type=str, help='The URI of the web3 provider e.g. ' 'file://$HOME/Library/Ethereum/geth.ipc') parser.add_argument('-t', '--tokens', default=None, type=str, nargs='+', help='The list of token addresses to filter by.') args = parser.parse_args() job = ExportErc20TransfersJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(args.provider_uri))), item_exporter=export_erc20_transfers_job_item_exporter(args.output), max_workers=args.max_workers, tokens=args.tokens) job.run()
def main() -> None: """Main function.""" args = create_parser().parse_args() thread_proxy = ThreadLocalProxy(lambda: get_provider_from_uri( args.provider_uri, timeout=args.timeout, batch=True)) adapter = EthStreamerAdapter(thread_proxy, batch_size=50) start_block = 0 if args.start_block is None: if args.continue_export: block_files = sorted(pathlib.Path(args.dir).rglob("block*")) if block_files: last_file = block_files[-1].name print(f"Last exported file: {block_files[-1]}") start_block = (int(re.match(r".*-(\d+)", last_file).group(1)) + 1) else: start_block = args.start_block end_block = get_last_synced_block(thread_proxy) print(f"Last synced block: {end_block:,}") if args.end_block is not None: end_block = args.end_block if args.prev_day: end_block = get_last_block_yesterday(thread_proxy) time1 = datetime.now() count = 0 block_bucket_size = args.file_batch_size if args.file_batch_size % args.batch_size != 0: print("Error: file_batch_size is not a multiple of batch_size") raise SystemExit(1) if args.partition_batch_size % args.file_batch_size != 0: print( "Error: partition_batch_size is not a multiple of file_batch_size") raise SystemExit(1) rounded_start_block = start_block // block_bucket_size * block_bucket_size rounded_end_block = (end_block + 1) // block_bucket_size * block_bucket_size - 1 if rounded_start_block > rounded_end_block: print("No blocks to export") raise SystemExit(0) block_range = ( rounded_start_block, rounded_start_block + block_bucket_size - 1, ) path = pathlib.Path(args.dir) try: path.mkdir(parents=True, exist_ok=True) except (PermissionError, NotADirectoryError) as exception: print(exception) raise SystemExit(1) from exception block_file = "block_%08d-%08d.csv.gz" % block_range tx_file = "tx_%08d-%08d.csv.gz" % block_range trace_file = "trace_%08d-%08d.csv.gz" % block_range print(f"[{time1}] Processing block range " f"{rounded_start_block:,}:{rounded_end_block:,}") block_list = [] tx_list = [] trace_list = [] for block_id in range(rounded_start_block, rounded_end_block + 1, args.batch_size): current_end_block = min(end_block, block_id + args.batch_size - 1) blocks, txs = adapter.export_blocks_and_transactions( block_id, current_end_block) receipts, _ = adapter.export_receipts_and_logs(txs) traces = adapter.export_traces(block_id, current_end_block, True, True) enriched_txs = enrich_transactions(txs, receipts) block_list.extend(format_blocks(blocks)) tx_list.extend(format_transactions(enriched_txs, TX_HASH_PREFIX_LEN)) trace_list.extend(format_traces(traces)) count += args.batch_size if count >= 1000: time2 = datetime.now() time_delta = (time2 - time1).total_seconds() print(f"[{time2}] Last processed block {current_end_block} " f"({count/time_delta:.1f} blocks/s)") time1 = time2 count = 0 if (block_id + args.batch_size) % block_bucket_size == 0: time3 = datetime.now() partition_start = block_id - (block_id % args.partition_batch_size) partition_end = partition_start + args.partition_batch_size - 1 sub_dir = f"{partition_start:08d}-{partition_end:08d}" full_path = path / sub_dir full_path.mkdir(parents=True, exist_ok=True) write_csv(full_path / trace_file, trace_list, TRACE_HEADER) write_csv(full_path / tx_file, tx_list, TX_HEADER) write_csv(full_path / block_file, block_list, BLOCK_HEADER) print(f"[{time3}] " f"Exported blocks: {block_range[0]:,}:{block_range[1]:,} ") block_range = ( block_id + args.batch_size, block_id + args.batch_size + block_bucket_size - 1, ) block_file = "block_%08d-%08d.csv.gz" % block_range tx_file = "tx_%08d-%08d.csv.gz" % block_range trace_file = "trace_%08d-%08d.csv.gz" % block_range block_list.clear() tx_list.clear() trace_list.clear() print(f"[{datetime.now()}] Processed block range " f"{rounded_start_block:,}:{rounded_end_block:,}")
def export_all_common(partitions, output_dir, provider_uri, max_workers, batch_size): for batch_start_block, batch_end_block, partition_dir in partitions: # # # start # # # start_time = time() padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) # # # blocks_and_transactions # # # blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True) transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True) blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format( blocks_output_dir=blocks_output_dir, file_name_suffix=file_name_suffix, ) transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting blocks {block_range} to {blocks_file}'.format( block_range=block_range, blocks_file=blocks_file, )) logger.info( 'Exporting transactions from blocks {block_range} to {transactions_file}' .format( block_range=block_range, transactions_file=transactions_file, )) job = ExportBlocksJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None) job.run() # # # token_transfers # # # token_transfers_file = None if is_log_filter_supported(provider_uri): token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(token_transfers_output_dir), exist_ok=True) token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format( token_transfers_output_dir=token_transfers_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}' .format( block_range=block_range, token_transfers_file=token_transfers_file, )) job = ExportTokenTransfersJob( start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=token_transfers_item_exporter( token_transfers_file), max_workers=max_workers) job.run() # # # receipts_and_logs # # # cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True) transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting hash column from transaction file {transactions_file}'. format(transactions_file=transactions_file, )) extract_csv_column_unique(transactions_file, transaction_hashes_file, 'hash') receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True) logs_output_dir = '{output_dir}/logs{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True) receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format( receipts_output_dir=receipts_output_dir, file_name_suffix=file_name_suffix, ) logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format( logs_output_dir=logs_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}' .format( block_range=block_range, receipts_file=receipts_file, logs_file=logs_file, )) with smart_open(transaction_hashes_file, 'r') as transaction_hashes: job = ExportReceiptsJob( transaction_hashes_iterable=( transaction_hash.strip() for transaction_hash in transaction_hashes), batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), max_workers=max_workers, item_exporter=receipts_and_logs_item_exporter( receipts_file, logs_file), export_receipts=receipts_file is not None, export_logs=logs_file is not None) job.run() # # # contracts # # # contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting contract_address from receipt file {receipts_file}'. format(receipts_file=receipts_file)) extract_csv_column_unique(receipts_file, contract_addresses_file, 'contract_address') contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True) contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format( contracts_output_dir=contracts_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting contracts from blocks {block_range} to {contracts_file}' .format( block_range=block_range, contracts_file=contracts_file, )) with smart_open(contract_addresses_file, 'r') as contract_addresses_file: contract_addresses = ( contract_address.strip() for contract_address in contract_addresses_file if contract_address.strip()) job = ExportContractsJob( contract_addresses_iterable=contract_addresses, batch_size=batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(provider_uri, batch=True)), item_exporter=contracts_item_exporter(contracts_file), max_workers=max_workers) job.run() # # # tokens # # # if token_transfers_file is not None: token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format( cache_output_dir=cache_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Extracting token_address from token_transfers file {token_transfers_file}' .format(token_transfers_file=token_transfers_file, )) extract_csv_column_unique(token_transfers_file, token_addresses_file, 'token_address') tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True) tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format( tokens_output_dir=tokens_output_dir, file_name_suffix=file_name_suffix, ) logger.info( 'Exporting tokens from blocks {block_range} to {tokens_file}'. format( block_range=block_range, tokens_file=tokens_file, )) with smart_open(token_addresses_file, 'r') as token_addresses: job = ExportTokensJob( token_addresses_iterable=( token_address.strip() for token_address in token_addresses), web3=ThreadLocalProxy( lambda: Web3(get_provider_from_uri(provider_uri))), item_exporter=tokens_item_exporter(tokens_file), max_workers=max_workers) job.run() # # # finish # # # shutil.rmtree(os.path.dirname(cache_output_dir)) end_time = time() time_diff = round(end_time - start_time, 5) logger.info( 'Exporting blocks {block_range} took {time_diff} seconds'.format( block_range=block_range, time_diff=time_diff, ))
parser = argparse.ArgumentParser( description='Outputs the start block and end block for a given date.') parser.add_argument( '-p', '--provider-uri', default=None, type=str, help='The URI of the web3 provider e.g. ' 'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/') parser.add_argument('-d', '--date', required=True, type=lambda d: datetime.strptime(d, '%Y-%m-%d'), help='The date e.g. 2018-01-01.') parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.') args = parser.parse_args() provider = get_provider_from_uri(args.provider_uri) web3 = Web3(provider) eth_service = EthService(web3) start_block, end_block = eth_service.get_block_range_for_date(args.date) with smart_open(args.output, 'w') as output_file: output_file.write('{},{}'.format(start_block, end_block))
description='Exports ERC20 transfers using eth_newFilter and eth_getFilterLogs JSON RPC APIs.') parser.add_argument('-s', '--start-block', default=0, type=int, help='Start block') parser.add_argument('-e', '--end-block', required=True, type=int, help='End block') parser.add_argument('-b', '--batch-size', default=100, type=int, help='The number of blocks to filter at a time.') parser.add_argument('-o', '--output', default='-', type=str, help='The output file. If not specified stdout is used.') parser.add_argument('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.') parser.add_argument('-p', '--provider-uri', required=True, type=str, help='The URI of the web3 provider e.g. ' 'file://$HOME/Library/Ethereum/geth.ipc or http://localhost:8545/') parser.add_argument('-t', '--tokens', default=None, type=str, nargs='+', help='The list of token addresses to filter by.') args = parser.parse_args() tlp = None if re.compile('^https?://').match(args.provider_uri) == None: tlp = ThreadLocalProxy(lambda: Web3(HTTPProvider(args.provider_uri))) else: tlp = ThreadLocalProxy(lambda: Web3(get_provider_from_uri(args.provider_uri))) job = ExportErc20TransfersJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, web3=ThreadLocalProxy(lambda: Web3(get_provider_from_uri(args.provider_uri))), item_exporter=erc20_transfers_item_exporter(args.output), max_workers=args.max_workers, tokens=args.tokens) job.run()
def main() -> None: """Main function.""" args = create_parser().parse_args() thread_proxy = ThreadLocalProxy( lambda: get_provider_from_uri( args.provider_uri, timeout=args.timeout, batch=True ) ) cluster = Cluster(args.db_nodes) session = cluster.connect(args.keyspace) last_synced_block = get_last_synced_block(thread_proxy) last_ingested_block = get_last_ingested_block(session) print_block_info(last_synced_block, last_ingested_block) if args.info: cluster.shutdown() raise SystemExit(0) adapter = EthStreamerAdapter(thread_proxy, batch_size=50) start_block = 0 if args.start_block is None: if last_ingested_block is not None: start_block = last_ingested_block + 1 else: start_block = args.start_block end_block = last_synced_block if args.end_block is not None: end_block = args.end_block if args.prev_day: end_block = get_last_block_yesterday(thread_proxy) if start_block > end_block: print("No blocks to ingest") raise SystemExit(0) time1 = datetime.now() count = 0 print( f"[{time1}] Ingesting block range " f"{start_block:,}:{end_block:,} " f"into Cassandra nodes {args.db_nodes}" ) prep_stmt = { elem: get_prepared_statement(session, args.keyspace, elem) for elem in ["trace", "transaction", "block"] } for block_id in range(start_block, end_block + 1, args.batch_size): current_end_block = min(end_block, block_id + args.batch_size - 1) blocks, txs = adapter.export_blocks_and_transactions( block_id, current_end_block ) receipts, _ = adapter.export_receipts_and_logs(txs) traces = adapter.export_traces(block_id, current_end_block, True, True) enriched_txs = enrich_transactions(txs, receipts) # ingest into Cassandra ingest_traces(traces, session, prep_stmt["trace"], BLOCK_BUCKET_SIZE) ingest_transactions( enriched_txs, session, prep_stmt["transaction"], TX_HASH_PREFIX_LEN ) ingest_blocks(blocks, session, prep_stmt["block"], BLOCK_BUCKET_SIZE) count += args.batch_size if count % 1000 == 0: time2 = datetime.now() time_delta = (time2 - time1).total_seconds() print( f"[{time2}] " f"Last processed block: {current_end_block:,} " f"({count/time_delta:.1f} blocks/s)" ) time1 = time2 count = 0 print( f"[{datetime.now()}] Processed block range " f"{start_block:,}:{end_block:,}" ) # store configuration details ingest_configuration( session, args.keyspace, int(BLOCK_BUCKET_SIZE), int(TX_HASH_PREFIX_LEN) ) cluster.shutdown()
parser.add_argument( '--receipts-output', default=None, type=str, help= 'The output file for receipts. If not provided receipts will not be exported. ' 'Use "-" for stdout') parser.add_argument( '--logs-output', default=None, type=str, help= 'The output file for receipt logs. If not provided receipt logs will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() with smart_open(args.tx_hashes, 'r') as tx_hashes_file: job = ExportReceiptsJob( tx_hashes_iterable=(tx_hash.strip() for tx_hash in tx_hashes_file), batch_size=args.batch_size, batch_web3_provider=ThreadLocalProxy( lambda: get_provider_from_uri(args.provider_uri, batch=True)), max_workers=args.max_workers, item_exporter=receipts_and_logs_item_exporter(args.receipts_output, args.logs_output), export_receipts=args.receipts_output is not None, export_logs=args.logs_output is not None) job.run()
from ethereumetl.providers.auto import get_provider_from_uri parser = argparse.ArgumentParser(description='Export blocks and transactions.') parser.add_argument('-s', '--start-block', default=0, type=int, help='Start block') parser.add_argument('-e', '--end-block', required=True, type=int, help='End block') parser.add_argument('-b', '--batch-size', default=100, type=int, help='The number of blocks to export at a time.') parser.add_argument('-p', '--provider-uri', default='https://mainnet.infura.io/', type=str, help='The URI of the web3 provider e.g. ' 'file://$HOME/Library/Ethereum/geth.ipc or https://mainnet.infura.io/') parser.add_argument('-w', '--max-workers', default=5, type=int, help='The maximum number of workers.') parser.add_argument('--blocks-output', default=None, type=str, help='The output file for blocks. If not provided blocks will not be exported. ' 'Use "-" for stdout') parser.add_argument('--transactions-output', default=None, type=str, help='The output file for transactions. If not provided transactions will not be exported. ' 'Use "-" for stdout') args = parser.parse_args() job = ExportBlocksJob( start_block=args.start_block, end_block=args.end_block, batch_size=args.batch_size, batch_web3_provider=ThreadLocalProxy(lambda: get_provider_from_uri(args.provider_uri, batch=True)), max_workers=args.max_workers, item_exporter=export_blocks_job_item_exporter(args.blocks_output, args.transactions_output), export_blocks=args.blocks_output is not None, export_transactions=args.transactions_output is not None) job.run()