def test_export_blocks_job(tmpdir, start_block, end_block, batch_size, resource_group, provider_type, chain): blocks_output_file = str(tmpdir.join('actual_block.json')) transactions_output_file = str(tmpdir.join("actual_transactions.json")) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy( lambda: get_bitcoin_rpc(provider_type, read_resource_lambda=lambda file: read_resource(resource_group, file), chain=chain)), max_workers=5, item_exporter=blocks_and_transactions_item_exporter( blocks_output_file, transactions_output_file), chain=chain, export_blocks=blocks_output_file is not None, export_transactions=transactions_output_file is not None) job.run() print('=====================') print(read_file(blocks_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.json'), read_file(blocks_output_file)) print('=====================') print(read_file(transactions_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.json'), read_file(transactions_output_file))
def test_stream(tmpdir, start_block, end_block, batch_size, resource_group, provider_type, chain): try: os.remove('last_synced_block.txt') except OSError: pass blocks_output_file = str(tmpdir.join('actual_block.json')) transactions_output_file = str(tmpdir.join("actual_transactions.json")) stream( bitcoin_rpc=ThreadLocalProxy( lambda: get_bitcoin_rpc( provider_type, read_resource_lambda=lambda file: read_resource(resource_group, file), chain=chain)), start_block=start_block, end_block=end_block, batch_size=batch_size, item_exporter=blocks_and_transactions_item_exporter(blocks_output_file, transactions_output_file) ) print('=====================') print(read_file(blocks_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.json'), read_file(blocks_output_file) ) print('=====================') print(read_file(transactions_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.json'), read_file(transactions_output_file) )
def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5, log_file=None, pid_file=None, enrich=True): """Streams all data types to console or Google Pub/Sub.""" configure_logging(log_file) configure_signals() from bitcoinetl.streaming.streaming_utils import get_item_exporter from bitcoinetl.streaming.btc_streamer_adapter import BtcStreamerAdapter from blockchainetl.streaming.streamer import Streamer streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), item_exporter=get_item_exporter(output), chain=chain, batch_size=batch_size, enable_enrich=enrich, max_workers=max_workers ) streamer = Streamer( blockchain_streamer_adapter=streamer_adapter, last_synced_block_file=last_synced_block_file, lag=lag, start_block=start_block, period_seconds=period_seconds, block_batch_size=block_batch_size, pid_file=pid_file, ) streamer.stream()
def enrich_transactions(batch_size, provider_uri, max_workers, transactions_input, transactions_output, chain): """Enrich transactions.""" with smart_open(transactions_input, 'r') as transactions_input_file: job = EnrichTransactionsJob( transactions_iterable=(json.loads(transaction) for transaction in transactions_input_file), batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( None, transactions_output), chain=chain) job.run()
def stream(last_synced_block_file, lag, provider_uri, output, start_block, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5): """Streams all data types to console or Google Pub/Sub.""" from bitcoinetl.streaming.streaming_utils import get_item_exporter from bitcoinetl.streaming.stream import stream as do_stream do_stream( bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), last_synced_block_file=last_synced_block_file, lag=lag, item_exporter=get_item_exporter(output), start_block=start_block, chain=chain, period_seconds=period_seconds, batch_size=batch_size, block_batch_size=block_batch_size, max_workers=max_workers )
def export_blocks_and_transactions(start_block, end_block, batch_size, provider_uri, max_workers, blocks_output, transactions_output, chain): """Export blocks and transactions.""" if blocks_output is None and transactions_output is None: raise ValueError( 'Either --blocks-output or --transactions-output options must be provided' ) job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_output, transactions_output), chain=chain, export_blocks=blocks_output is not None, export_transactions=transactions_output is not None) job.run()
def get_partitions(start, end, partition_batch_size, provider_uri): """Yield partitions based on input data type.""" if is_date_range(start, end): start_date = datetime.strptime(start, '%Y-%m-%d').date() end_date = datetime.strptime(end, '%Y-%m-%d').date() day = timedelta(days=1) btc_service = BtcBlockRangeService( eos_rpc=ThreadLocalProxy(lambda: EosRpc(provider_uri))) while start_date <= end_date: batch_start_block, batch_end_block = btc_service.get_block_range_for_date( start_date) partition_dir = '/date={start_date!s}/'.format( start_date=start_date) yield batch_start_block, batch_end_block, partition_dir, start_date start_date += day elif is_block_range(start, end): start_block = int(start) end_block = int(end) for batch_start_block in range(start_block, end_block + 1, partition_batch_size): batch_end_block = batch_start_block + partition_batch_size - 1 if batch_end_block > end_block: batch_end_block = end_block padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) yield batch_start_block, batch_end_block, partition_dir else: raise ValueError( 'start and end must be either block numbers or ISO dates')
def test_stream(tmpdir, start_block, end_block, batch_size, resource_group, provider_type, chain): try: os.remove('last_synced_block.txt') except OSError: pass blocks_output_file = str(tmpdir.join('actual_block.json')) transactions_output_file = str(tmpdir.join("actual_transactions.json")) streamer_adapter = BtcStreamerAdapter( bitcoin_rpc=ThreadLocalProxy( lambda: get_bitcoin_rpc(provider_type, read_resource_lambda=lambda file: read_resource(resource_group, file), chain=chain)), batch_size=batch_size, item_exporter=CompositeItemExporter( filename_mapping={ 'block': blocks_output_file, 'transaction': transactions_output_file, }), ) streamer = Streamer(blockchain_streamer_adapter=streamer_adapter, start_block=start_block, end_block=end_block, retry_errors=False) streamer.stream() print('=====================') print(read_file(blocks_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_blocks.json'), read_file(blocks_output_file)) print('=====================') print(read_file(transactions_output_file)) compare_lines_ignore_order( read_resource(resource_group, 'expected_transactions.json'), read_file(transactions_output_file))
def export_all(chain, partitions, output_dir, provider_uri, max_workers, batch_size, enrich): for batch_start_block, batch_end_block, partition_dir, *args in partitions: # # # start # # # start_time = time() padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) # # # blocks_and_transactions # # # blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True) transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True) blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format( blocks_output_dir=blocks_output_dir, file_name_suffix=file_name_suffix, ) transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting blocks {block_range} to {blocks_file}'.format( block_range=block_range, blocks_file=blocks_file, )) logger.info( 'Exporting transactions from blocks {block_range} to {transactions_file}' .format( block_range=block_range, transactions_file=transactions_file, )) job = ExportBlocksJob( chain=chain, start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None) job.run() if enrich == True: with smart_open(transactions_file, 'r') as transactions_file: job = EnrichTransactionsJob( transactions_iterable=( json.loads(transaction) for transaction in transactions_file), batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy( lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( None, enriched_transactions_file), chain=chain) job.run() if args is not None and len(args) > 0: date = args[0] logger.info('Filtering blocks {blocks_file} by date {date}'.format( blocks_file=blocks_file, date=date, )) def filter_by_date(item, field): return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \ .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d') filtered_blocks_file = blocks_file + '.filtered' filter_items(blocks_file, filtered_blocks_file, lambda item: filter_by_date(item, 'timestamp')) shutil.move(filtered_blocks_file, blocks_file) logger.info( 'Filtering transactions {transactions_file} by date {date}'. format( transactions_file=transactions_file, date=date, )) filtered_transactions_file = transactions_file + '.filtered' filter_items(transactions_file, filtered_transactions_file, lambda item: filter_by_date(item, 'block_timestamp')) shutil.move(filtered_transactions_file, transactions_file) # # # finish # # # end_time = time() time_diff = round(end_time - start_time, 5) logger.info( 'Exporting blocks {block_range} took {time_diff} seconds'.format( block_range=block_range, time_diff=time_diff, ))