def test_export_origin(tmpdir, start_block, end_block, batch_size,
                       output_format, resource_group, web3_provider_type,
                       ipfs_client_type):
    marketplace_output_file = str(
        tmpdir.join('actual_marketplace.' + output_format))
    shop_output_file = str(tmpdir.join('actual_shop.' + output_format))

    ipfs_client = MockIpfsClient(
        lambda file: read_resource(resource_group, file)
    ) if ipfs_client_type == 'mock' else get_origin_ipfs_client()

    job = ExportOriginJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(lambda: build_web3(
            get_web3_provider(web3_provider_type, lambda file: read_resource(
                resource_group, file)))),
        ipfs_client=ipfs_client,
        marketplace_listing_exporter=origin_marketplace_listing_item_exporter(
            marketplace_output_file),
        shop_product_exporter=origin_shop_product_item_exporter(
            shop_output_file),
        max_workers=5)

    job.run()

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_marketplace.' + output_format),
        read_file(marketplace_output_file))

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_shop.' + output_format),
        read_file(shop_output_file))
def export_traces(start_block,
                  end_block,
                  batch_size,
                  output,
                  max_workers,
                  provider_uri,
                  genesis_traces,
                  daofork_traces,
                  timeout=60,
                  chain='ethereum'):
    """Exports traces from parity node."""
    if chain == 'classic' and daofork_traces == True:
        raise ValueError(
            'Classic chain does not include daofork traces. Disable daofork traces with --no-daofork-traces option.'
        )
    job = ExportTracesJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(lambda: build_web3(
            get_provider_from_uri(provider_uri, timeout=timeout))),
        item_exporter=traces_item_exporter(output),
        max_workers=max_workers,
        include_genesis_traces=genesis_traces,
        include_daofork_traces=daofork_traces)

    job.run()
def get_block_range_for_date(provider_uri, date, output, chain='ethereum'):
    """Outputs start and end blocks for given date."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    provider = get_provider_from_uri(provider_uri)
    web3 = build_web3(provider)
    eth_service = EthService(web3)

    start_block, end_block = eth_service.get_block_range_for_date(date)

    with smart_open(output, 'w') as output_file:
        output_file.write('{},{}\n'.format(start_block, end_block))
示例#4
0
 def _extract_tokens(self, contracts):
     exporter = InMemoryItemExporter(item_types=['token'])
     job = ExtractTokensJob(
         contracts_iterable=contracts,
         web3=ThreadLocalProxy(lambda: build_web3(self.batch_web3_provider)),
         max_workers=self.max_workers,
         item_exporter=exporter
     )
     job.run()
     tokens = exporter.get_items('token')
     return tokens
示例#5
0
def get_partitions(start, end, partition_batch_size, provider_uri):
    """Yield partitions based on input data type."""
    if is_date_range(start, end) or is_unix_time_range(start, end):
        if is_date_range(start, end):
            start_date = datetime.strptime(start, '%Y-%m-%d').date()
            end_date = datetime.strptime(end, '%Y-%m-%d').date()

        elif is_unix_time_range(start, end):
            if len(start) == 10 and len(end) == 10:
                start_date = datetime.utcfromtimestamp(int(start)).date()
                end_date = datetime.utcfromtimestamp(int(end)).date()

            elif len(start) == 13 and len(end) == 13:
                start_date = datetime.utcfromtimestamp(int(start) / 1e3).date()
                end_date = datetime.utcfromtimestamp(int(end) / 1e3).date()

        day = timedelta(days=1)

        provider = get_provider_from_uri(provider_uri)
        web3 = build_web3(provider)
        eth_service = EthService(web3)

        while start_date <= end_date:
            batch_start_block, batch_end_block = eth_service.get_block_range_for_date(
                start_date)
            partition_dir = '/date={start_date!s}/'.format(
                start_date=start_date)
            yield batch_start_block, batch_end_block, partition_dir
            start_date += day

    elif is_block_range(start, end):
        start_block = int(start)
        end_block = int(end)

        for batch_start_block in range(start_block, end_block + 1,
                                       partition_batch_size):
            batch_end_block = batch_start_block + partition_batch_size - 1
            if batch_end_block > end_block:
                batch_end_block = end_block

            padded_batch_start_block = str(batch_start_block).zfill(8)
            padded_batch_end_block = str(batch_end_block).zfill(8)
            partition_dir = '/start_block={padded_batch_start_block}/end_block={padded_batch_end_block}'.format(
                padded_batch_start_block=padded_batch_start_block,
                padded_batch_end_block=padded_batch_end_block,
            )
            yield batch_start_block, batch_end_block, partition_dir

    else:
        raise ValueError(
            'start and end must be either block numbers or ISO dates or Unix times'
        )
示例#6
0
 def _export_traces(self, start_block, end_block):
     exporter = InMemoryItemExporter(item_types=['trace'])
     job = ExportTracesJob(
         start_block=start_block,
         end_block=end_block,
         batch_size=self.batch_size,
         web3=ThreadLocalProxy(lambda: build_web3(self.batch_web3_provider)),
         max_workers=self.max_workers,
         item_exporter=exporter
     )
     job.run()
     traces = exporter.get_items('trace')
     return traces
def export_token_transfers(start_block, end_block, batch_size, output,
                           max_workers, provider_uri, tokens):
    """Exports ERC20/ERC721 transfers."""
    job = ExportTokenTransfersJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(
            lambda: build_web3(get_provider_from_uri(provider_uri))),
        item_exporter=token_transfers_item_exporter(output),
        max_workers=max_workers,
        tokens=tokens)
    job.run()
def export_origin(start_block, end_block, batch_size, marketplace_output,
                  shop_output, max_workers, provider_uri):
    """Exports Origin Protocol data."""
    job = ExportOriginJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(
            lambda: build_web3(get_provider_from_uri(provider_uri))),
        ipfs_client=get_origin_ipfs_client(),
        marketplace_listing_exporter=origin_marketplace_listing_item_exporter(
            marketplace_output),
        shop_product_exporter=origin_shop_product_item_exporter(shop_output),
        max_workers=max_workers)
    job.run()
示例#9
0
def test_export_tokens_job(tmpdir, token_addresses, resource_group,
                           web3_provider_type):
    output_file = str(tmpdir.join('tokens.csv'))

    job = ExportTokensJob(
        token_addresses_iterable=token_addresses,
        web3=ThreadLocalProxy(lambda: build_web3(
            get_web3_provider(web3_provider_type, lambda file: read_resource(
                resource_group, file)))),
        item_exporter=tokens_item_exporter(output_file),
        max_workers=5)
    job.run()

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_tokens.csv'),
        read_file(output_file))
def export_tokens(token_addresses,
                  output,
                  max_workers,
                  provider_uri,
                  chain='ethereum'):
    """Exports ERC20/ERC721 tokens."""
    provider_uri = check_classic_provider_uri(chain, provider_uri)
    with smart_open(token_addresses, 'r') as token_addresses_file:
        job = ExportTokensJob(
            token_addresses_iterable=(
                token_address.strip()
                for token_address in token_addresses_file),
            web3=ThreadLocalProxy(
                lambda: build_web3(get_provider_from_uri(provider_uri))),
            item_exporter=tokens_item_exporter(output),
            max_workers=max_workers)

        job.run()
示例#11
0
def extract_tokens(contracts, provider_uri, output, max_workers, values_as_strings=False):
    """Extracts tokens from contracts file."""

    set_max_field_size_limit()

    with smart_open(contracts, 'r') as contracts_file:
        if contracts.endswith('.json'):
            contracts_iterable = (json.loads(line) for line in contracts_file)
        else:
            contracts_iterable = csv.DictReader(contracts_file)
        converters = [IntToStringItemConverter(keys=['decimals', 'total_supply'])] if values_as_strings else []
        job = ExtractTokensJob(
            contracts_iterable=contracts_iterable,
            web3=ThreadLocalProxy(lambda: build_web3(get_provider_from_uri(provider_uri))),
            max_workers=max_workers,
            item_exporter=tokens_item_exporter(output, converters))

        job.run()
def test_export_token_transfers_job(tmpdir, start_block, end_block, batch_size,
                                    resource_group, web3_provider_type):
    output_file = str(tmpdir.join('token_transfers.csv'))

    job = ExportTokenTransfersJob(
        start_block=start_block,
        end_block=end_block,
        batch_size=batch_size,
        web3=ThreadLocalProxy(lambda: build_web3(
            get_web3_provider(web3_provider_type, lambda file: read_resource(
                resource_group, file)))),
        item_exporter=token_transfers_item_exporter(output_file),
        max_workers=5)
    job.run()

    compare_lines_ignore_order(
        read_resource(resource_group, 'expected_token_transfers.csv'),
        read_file(output_file))
示例#13
0
 def get_current_block_number(self):
     w3 = build_web3(self.batch_web3_provider)
     return int(w3.eth.getBlock("latest").number)
示例#14
0
def export_all_common(partitions, output_dir, provider_uri, max_workers,
                      batch_size):

    for batch_start_block, batch_end_block, partition_dir in partitions:
        # # # start # # #

        start_time = time()

        padded_batch_start_block = str(batch_start_block).zfill(8)
        padded_batch_end_block = str(batch_end_block).zfill(8)
        block_range = '{padded_batch_start_block}-{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )
        file_name_suffix = '{padded_batch_start_block}_{padded_batch_end_block}'.format(
            padded_batch_start_block=padded_batch_start_block,
            padded_batch_end_block=padded_batch_end_block,
        )

        # # # blocks_and_transactions # # #

        blocks_output_dir = '{output_dir}/blocks{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(blocks_output_dir), exist_ok=True)

        transactions_output_dir = '{output_dir}/transactions{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(transactions_output_dir), exist_ok=True)

        blocks_file = '{blocks_output_dir}/blocks_{file_name_suffix}.csv'.format(
            blocks_output_dir=blocks_output_dir,
            file_name_suffix=file_name_suffix,
        )
        transactions_file = '{transactions_output_dir}/transactions_{file_name_suffix}.csv'.format(
            transactions_output_dir=transactions_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info('Exporting blocks {block_range} to {blocks_file}'.format(
            block_range=block_range,
            blocks_file=blocks_file,
        ))
        logger.info(
            'Exporting transactions from blocks {block_range} to {transactions_file}'
            .format(
                block_range=block_range,
                transactions_file=transactions_file,
            ))

        job = ExportBlocksJob(
            start_block=batch_start_block,
            end_block=batch_end_block,
            batch_size=batch_size,
            batch_web3_provider=ThreadLocalProxy(
                lambda: get_provider_from_uri(provider_uri, batch=True)),
            max_workers=max_workers,
            item_exporter=blocks_and_transactions_item_exporter(
                blocks_file, transactions_file),
            export_blocks=blocks_file is not None,
            export_transactions=transactions_file is not None)
        job.run()

        # # # token_transfers # # #

        token_transfers_file = None
        if is_log_filter_supported(provider_uri):
            token_transfers_output_dir = '{output_dir}/token_transfers{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(token_transfers_output_dir),
                        exist_ok=True)

            token_transfers_file = '{token_transfers_output_dir}/token_transfers_{file_name_suffix}.csv'.format(
                token_transfers_output_dir=token_transfers_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting ERC20 transfers from blocks {block_range} to {token_transfers_file}'
                .format(
                    block_range=block_range,
                    token_transfers_file=token_transfers_file,
                ))

            job = ExportTokenTransfersJob(
                start_block=batch_start_block,
                end_block=batch_end_block,
                batch_size=batch_size,
                web3=ThreadLocalProxy(
                    lambda: build_web3(get_provider_from_uri(provider_uri))),
                item_exporter=token_transfers_item_exporter(
                    token_transfers_file),
                max_workers=max_workers)
            job.run()

        # # # receipts_and_logs # # #

        cache_output_dir = '{output_dir}/.tmp{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(cache_output_dir), exist_ok=True)

        transaction_hashes_file = '{cache_output_dir}/transaction_hashes_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting hash column from transaction file {transactions_file}'.
            format(transactions_file=transactions_file, ))
        extract_csv_column_unique(transactions_file, transaction_hashes_file,
                                  'hash')

        receipts_output_dir = '{output_dir}/receipts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(receipts_output_dir), exist_ok=True)

        logs_output_dir = '{output_dir}/logs{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(logs_output_dir), exist_ok=True)

        receipts_file = '{receipts_output_dir}/receipts_{file_name_suffix}.csv'.format(
            receipts_output_dir=receipts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logs_file = '{logs_output_dir}/logs_{file_name_suffix}.csv'.format(
            logs_output_dir=logs_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting receipts and logs from blocks {block_range} to {receipts_file} and {logs_file}'
            .format(
                block_range=block_range,
                receipts_file=receipts_file,
                logs_file=logs_file,
            ))

        with smart_open(transaction_hashes_file, 'r') as transaction_hashes:
            job = ExportReceiptsJob(
                transaction_hashes_iterable=(
                    transaction_hash.strip()
                    for transaction_hash in transaction_hashes),
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                max_workers=max_workers,
                item_exporter=receipts_and_logs_item_exporter(
                    receipts_file, logs_file),
                export_receipts=receipts_file is not None,
                export_logs=logs_file is not None)
            job.run()

        # # # contracts # # #

        contract_addresses_file = '{cache_output_dir}/contract_addresses_{file_name_suffix}.csv'.format(
            cache_output_dir=cache_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Extracting contract_address from receipt file {receipts_file}'.
            format(receipts_file=receipts_file))
        extract_csv_column_unique(receipts_file, contract_addresses_file,
                                  'contract_address')

        contracts_output_dir = '{output_dir}/contracts{partition_dir}'.format(
            output_dir=output_dir,
            partition_dir=partition_dir,
        )
        os.makedirs(os.path.dirname(contracts_output_dir), exist_ok=True)

        contracts_file = '{contracts_output_dir}/contracts_{file_name_suffix}.csv'.format(
            contracts_output_dir=contracts_output_dir,
            file_name_suffix=file_name_suffix,
        )
        logger.info(
            'Exporting contracts from blocks {block_range} to {contracts_file}'
            .format(
                block_range=block_range,
                contracts_file=contracts_file,
            ))

        with smart_open(contract_addresses_file,
                        'r') as contract_addresses_file:
            contract_addresses = (
                contract_address.strip()
                for contract_address in contract_addresses_file
                if contract_address.strip())
            job = ExportContractsJob(
                contract_addresses_iterable=contract_addresses,
                batch_size=batch_size,
                batch_web3_provider=ThreadLocalProxy(
                    lambda: get_provider_from_uri(provider_uri, batch=True)),
                item_exporter=contracts_item_exporter(contracts_file),
                max_workers=max_workers)
            job.run()

        # # # tokens # # #

        if token_transfers_file is not None:
            token_addresses_file = '{cache_output_dir}/token_addresses_{file_name_suffix}'.format(
                cache_output_dir=cache_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Extracting token_address from token_transfers file {token_transfers_file}'
                .format(token_transfers_file=token_transfers_file, ))
            extract_csv_column_unique(token_transfers_file,
                                      token_addresses_file, 'token_address')

            tokens_output_dir = '{output_dir}/tokens{partition_dir}'.format(
                output_dir=output_dir,
                partition_dir=partition_dir,
            )
            os.makedirs(os.path.dirname(tokens_output_dir), exist_ok=True)

            tokens_file = '{tokens_output_dir}/tokens_{file_name_suffix}.csv'.format(
                tokens_output_dir=tokens_output_dir,
                file_name_suffix=file_name_suffix,
            )
            logger.info(
                'Exporting tokens from blocks {block_range} to {tokens_file}'.
                format(
                    block_range=block_range,
                    tokens_file=tokens_file,
                ))

            with smart_open(token_addresses_file, 'r') as token_addresses:
                job = ExportTokensJob(
                    token_addresses_iterable=(
                        token_address.strip()
                        for token_address in token_addresses),
                    web3=ThreadLocalProxy(lambda: build_web3(
                        get_provider_from_uri(provider_uri))),
                    item_exporter=tokens_item_exporter(tokens_file),
                    max_workers=max_workers)
                job.run()

        # # # finish # # #
        shutil.rmtree(os.path.dirname(cache_output_dir))
        end_time = time()
        time_diff = round(end_time - start_time, 5)
        logger.info(
            'Exporting blocks {block_range} took {time_diff} seconds'.format(
                block_range=block_range,
                time_diff=time_diff,
            ))
def get_new_eth_service():
    provider_url = os.environ.get('PROVIDER_URL', 'https://mainnet.infura.io/v3/7aef3f0cd1f64408b163814b22cc643c')
    web3 = build_web3(HTTPProvider(provider_url))
    return EthService(web3)