def enrich_transactions(batch_size, provider_uri, max_workers, transactions_input, transactions_output, chain): """Enrich transactions.""" with smart_open(transactions_input, 'r') as transactions_input_file: job = EnrichTransactionsJob( transactions_iterable=(json.loads(transaction) for transaction in transactions_input_file), batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( None, transactions_output), chain=chain) job.run()
def export_all(self, start_block, end_block): # Export blocks and transactions blocks_and_transactions_item_exporter = InMemoryItemExporter( item_types=['block', 'transaction']) blocks_and_transactions_job = ExportBlocksJob( start_block=start_block, end_block=end_block, batch_size=self.batch_size, bitcoin_rpc=self.bitcoin_rpc, max_workers=self.max_workers, item_exporter=blocks_and_transactions_item_exporter, chain=self.chain, export_blocks=True, export_transactions=True) blocks_and_transactions_job.run() blocks = blocks_and_transactions_item_exporter.get_items('block') transactions = blocks_and_transactions_item_exporter.get_items( 'transaction') if self.enable_enrich: # Enrich transactions enriched_transactions_item_exporter = InMemoryItemExporter( item_types=['transaction']) enrich_transactions_job = EnrichTransactionsJob( transactions_iterable=transactions, batch_size=self.batch_size, bitcoin_rpc=self.bitcoin_rpc, max_workers=self.max_workers, item_exporter=enriched_transactions_item_exporter, chain=self.chain) enrich_transactions_job.run() enriched_transactions = enriched_transactions_item_exporter.get_items( 'transaction') if len(enriched_transactions) != len(transactions): raise ValueError('The number of transactions is wrong ' + str(transactions)) transactions = enriched_transactions logging.info('Exporting with ' + type(self.item_exporter).__name__) all_items = blocks + transactions self.calculate_item_ids(all_items) self.item_exporter.export_items(all_items)
def export_all(chain, partitions, output_dir, provider_uri, max_workers, batch_size, enrich): for batch_start_block, batch_end_block, partition_dir, *args in partitions: # # # start # # # start_time = time() padded_batch_start_block = str(batch_start_block).zfill(8) padded_batch_end_block = str(batch_end_block).zfill(8) block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format( padded_batch_start_block=padded_batch_start_block, padded_batch_end_block=padded_batch_end_block, ) # # # blocks_and_transactions # # # blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True) transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format( output_dir=output_dir, partition_dir=partition_dir, ) os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True) blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format( blocks_output_dir=blocks_output_dir, file_name_suffix=file_name_suffix, ) transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format( transactions_output_dir=transactions_output_dir, file_name_suffix=file_name_suffix, ) logger.info('Exporting blocks {block_range} to {blocks_file}'.format( block_range=block_range, blocks_file=blocks_file, )) logger.info( 'Exporting transactions from blocks {block_range} to {transactions_file}' .format( block_range=block_range, transactions_file=transactions_file, )) job = ExportBlocksJob( chain=chain, start_block=batch_start_block, end_block=batch_end_block, batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( blocks_file, transactions_file), export_blocks=blocks_file is not None, export_transactions=transactions_file is not None) job.run() if enrich == True: with smart_open(transactions_file, 'r') as transactions_file: job = EnrichTransactionsJob( transactions_iterable=( json.loads(transaction) for transaction in transactions_file), batch_size=batch_size, bitcoin_rpc=ThreadLocalProxy( lambda: BitcoinRpc(provider_uri)), max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter( None, enriched_transactions_file), chain=chain) job.run() if args is not None and len(args) > 0: date = args[0] logger.info('Filtering blocks {blocks_file} by date {date}'.format( blocks_file=blocks_file, date=date, )) def filter_by_date(item, field): return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \ .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d') filtered_blocks_file = blocks_file + '.filtered' filter_items(blocks_file, filtered_blocks_file, lambda item: filter_by_date(item, 'timestamp')) shutil.move(filtered_blocks_file, blocks_file) logger.info( 'Filtering transactions {transactions_file} by date {date}'. format( transactions_file=transactions_file, date=date, )) filtered_transactions_file = transactions_file + '.filtered' filter_items(transactions_file, filtered_transactions_file, lambda item: filter_by_date(item, 'block_timestamp')) shutil.move(filtered_transactions_file, transactions_file) # # # finish # # # end_time = time() time_diff = round(end_time - start_time, 5) logger.info( 'Exporting blocks {block_range} took {time_diff} seconds'.format( block_range=block_range, time_diff=time_diff, ))
def stream(bitcoin_rpc, last_synced_block_file='last_synced_block.txt', lag=0, item_exporter=ConsoleItemExporter(), start_block=None, end_block=None, chain=Chain.BITCOIN, period_seconds=10, batch_size=2, block_batch_size=10, max_workers=5): if start_block is not None or not os.path.isfile(last_synced_block_file): init_last_synced_block_file((start_block or 0) - 1, last_synced_block_file) last_synced_block = read_last_synced_block(last_synced_block_file) btc_service = BtcService(bitcoin_rpc, chain) item_exporter.open() while True and (end_block is None or last_synced_block < end_block): blocks_to_sync = 0 try: current_block = int(btc_service.get_latest_block().number) target_block = current_block - lag target_block = min(target_block, last_synced_block + block_batch_size) target_block = min( target_block, end_block) if end_block is not None else target_block blocks_to_sync = max(target_block - last_synced_block, 0) logging.info( 'Current block {}, target block {}, last synced block {}, blocks to sync {}' .format(current_block, target_block, last_synced_block, blocks_to_sync)) if blocks_to_sync == 0: logging.info( 'Nothing to sync. Sleeping for {} seconds...'.format( period_seconds)) time.sleep(period_seconds) continue # Export blocks and transactions blocks_and_transactions_item_exporter = InMemoryItemExporter( item_types=['block', 'transaction']) blocks_and_transactions_job = ExportBlocksJob( start_block=last_synced_block + 1, end_block=target_block, batch_size=batch_size, bitcoin_rpc=bitcoin_rpc, max_workers=max_workers, item_exporter=blocks_and_transactions_item_exporter, chain=chain, export_blocks=True, export_transactions=True) blocks_and_transactions_job.run() blocks = blocks_and_transactions_item_exporter.get_items('block') transactions = blocks_and_transactions_item_exporter.get_items( 'transaction') # Enrich transactions enriched_transactions_item_exporter = InMemoryItemExporter( item_types=['transaction']) enrich_transactions_job = EnrichTransactionsJob( transactions_iterable=transactions, batch_size=batch_size, bitcoin_rpc=bitcoin_rpc, max_workers=max_workers, item_exporter=enriched_transactions_item_exporter, chain=chain) enrich_transactions_job.run() enriched_transactions = enriched_transactions_item_exporter.get_items( 'transaction') if len(enriched_transactions) != len(transactions): raise ValueError('The number of transactions is wrong ' + str(transactions)) logging.info('Exporting with ' + type(item_exporter).__name__) item_exporter.export_items(blocks + enriched_transactions) logging.info('Writing last synced block {}'.format(target_block)) write_last_synced_block(last_synced_block_file, target_block) last_synced_block = target_block except Exception as e: # https://stackoverflow.com/a/4992124/1580227 logging.exception( 'An exception occurred while fetching block data.') if blocks_to_sync != block_batch_size and last_synced_block != end_block: logging.info('Sleeping {} seconds...'.format(period_seconds)) time.sleep(period_seconds) item_exporter.close()