def filter_items(input, output, predicate):
    def evaluated_predicate(item):
        eval_environment = globals()
        if 'datetime' in predicate:
            import datetime
            eval_environment['datetime'] = datetime
        return eval(predicate, eval_environment, {'item': item})

    misc_utils.filter_items(input, output, evaluated_predicate)
def test_filter_items(tmpdir):
    input_file = str(tmpdir.join('input.json'))
    open(input_file, 'w').write('''{"field1": "x1", "field2": "y1"}    
{"field1": "x2", "field2": "y2"}    
''')

    output_file = str(tmpdir.join('output.json'))
    filter_items(input_file, output_file, lambda item: item['field1'] == 'x1')

    expected_file = str(tmpdir.join('expected.json'))
    open(expected_file, 'w').write('''{"field1": "x1", "field2": "y1"}     
''')

    compare_lines_ignore_order(
        read_file(expected_file), read_file(output_file)
    )
示例#3
0
def export_all(chain, partitions, output_dir, provider_uri, max_workers,
               batch_size, enrich):
    for batch_start_block, batch_end_block, partition_dir, *args in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.json'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        enriched_transactions_file = '{transactions_output_dir}/enriched_transactions_{file_name_suffix}.json'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            chain=chain,
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            bitcoin_rpc=ThreadLocalProxy(lambda: BitcoinRpc(provider_uri)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        if enrich == True:
            with smart_open(transactions_file, 'r') as transactions_file:
                job = EnrichTransactionsJob(
                    transactions_iterable=(
                        json.loads(transaction)
                        for transaction in transactions_file),
                    batch_size=batch_size,
                    bitcoin_rpc=ThreadLocalProxy(
                        lambda: BitcoinRpc(provider_uri)),
                    max_workers=max_workers,
                    item_exporter=blocks_and_transactions_item_exporter(
                        None, enriched_transactions_file),
                    chain=chain)
                job.run()

        if args is not None and len(args) > 0:
            date = args[0]
            logger.info('Filtering blocks {blocks_file} by date {date}'.format(
                blocks_file=blocks_file,
                date=date,
            ))

            def filter_by_date(item, field):
                return datetime.datetime.fromtimestamp(item[field]).astimezone(datetime.timezone.utc) \
                           .strftime('%Y-%m-%d') == date.strftime('%Y-%m-%d')

            filtered_blocks_file = blocks_file + '.filtered'
            filter_items(blocks_file, filtered_blocks_file,
                         lambda item: filter_by_date(item, 'timestamp'))
            shutil.move(filtered_blocks_file, blocks_file)

            logger.info(
                'Filtering transactions {transactions_file} by date {date}'.
                format(
                    transactions_file=transactions_file,
                    date=date,
                ))

            filtered_transactions_file = transactions_file + '.filtered'
            filter_items(transactions_file, filtered_transactions_file,
                         lambda item: filter_by_date(item, 'block_timestamp'))
            shutil.move(filtered_transactions_file, transactions_file)

        # # # finish # # #
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))