def setUp(self):
     self.client = Client('localhost')
     self.client.execute('DROP TABLE IF EXISTS test')
     self.client.execute(
         'CREATE TABLE test (id String, x Int32, dict String) ENGINE = ReplacingMergeTree() ORDER BY id'
     )
     self.new_client = CustomClickhouse()
class ClickhouseTokenHolders():
    def __init__(self, indices=INDICES):
        self.indices = indices
        self.client = CustomClickhouse()

    def extract_token_transactions(self):
        """
        Creates materialized view with token transactions extracted from Transfer events

        This function is an entry point for prepare-erc-transactions-view operation
        """
        value_sql = utils.generate_sql_for_value("data")
        sql = """
      CREATE MATERIALIZED VIEW IF NOT EXISTS {index} 
      ENGINE = ReplacingMergeTree() ORDER BY id
      POPULATE
      AS 
      (
        SELECT 
          concat('0x', substring(topics[2], 27, 40)) AS from,
          concat('0x', substring(topics[3], 27, 40)) AS to,
          {value_sql},
          data_value AS value,
          id,
          address AS token,
          transactionHash,
          blockNumber
        FROM {event}
        ANY INNER JOIN (
          SELECT id AS address, decimals
          FROM {contract} 
        )
        USING address
        WHERE
          topics[1] = '{transfer_topic}'
      )
    """.format(
            index=self.indices["token_transaction"],
            value_sql=value_sql,
            transfer_topic=TRANSFER_EVENT,
            event=self.indices["event"],
            contract=self.indices["contract_description"],
        )
        self.client.send_sql_request(sql)
Пример #3
0
class ClickhouseIndices:
    def __init__(self, indices=INDICES):
        self.client = CustomClickhouse()
        self.indices = indices

    def _create_index(self, index, fields={}, primary_key=["id"]):
        """
        Create specified index in database with specified field types and primary key

        Parameters
        ----------
        index : str
            Name of index
        fields : dict
            Fields and their types and index
        primary_key : list
            All possible primary keys in index
        """
        fields["id"] = "String"
        fields_string = ", ".join(
            ["{} {}".format(name, type) for name, type in fields.items()])
        primary_key_string = ",".join(primary_key)
        create_sql = """
            CREATE TABLE IF NOT EXISTS {} ({}) ENGINE = ReplacingMergeTree() ORDER BY ({})
        """.format(index, fields_string, primary_key_string)
        self.client.send_sql_request(create_sql)

    def prepare_indices(self):
        """
        Create all indices specified in schema/schema.py

        This function is an entry point for prepare-indices operation
        """
        for key, index in self.indices.items():
            if key in INDEX_FIELDS:
                self._create_index(index, INDEX_FIELDS[key],
                                   PRIMARY_KEYS.get(key, ["id"]))
Пример #4
0
 def __init__(self, indices=INDICES):
     self.client = CustomClickhouse()
     self.indices = indices
class ClickhouseBancorTrades:
    def __init__(self, indices=INDICES):
        self.indices = indices
        self.client = CustomClickhouse()

    def extract_trades(self):
        return_raw_sql = utils.generate_sql_for_value("return_raw")
        amount_raw_sql = utils.generate_sql_for_value("amount_raw")
        self.client.send_sql_request("""
        CREATE VIEW {trades_index}
        AS (
            SELECT id, from_token, to_token, trader, amount, return, transactionHash
            FROM (
                SELECT
                    id,
                    from_token,
                    to_token,
                    trader,
                    amount,
                    substring(data, 65, 66) AS return_raw,
                    {return_raw_sql},
                    return_raw_value AS return,
                    transactionHash
                FROM (
                    SELECT
                        id,
                        concat('0x', substring(topics[2], 27)) AS from_token,
                        concat('0x', substring(topics[3], 27)) AS to_token,
                        concat('0x', substring(topics[4], 27)) AS trader,
                        data,
                        substring(data, 3, 64) AS amount_raw,
                        {amount_raw_sql},
                        amount_raw_value AS amount,
                        transactionHash
                    FROM (
                        SELECT *
                        FROM {events_index}
                        WHERE topics[1] = '{conversion_event}'
                        AND address IN(
                            SELECT address
                            FROM {contracts_index}
                            WHERE standard_bancor_converter = 1
                        )
                    )
                    ANY LEFT JOIN (
                        SELECT id AS from_token, decimals
                        FROM {tokens_index}
                    )
                    USING from_token
                )
                ANY LEFT JOIN (
                    SELECT id AS to_token, decimals
                    FROM {tokens_index}
                )
                USING to_token
            )
        )
        """.format(trades_index=self.indices["bancor_trade"],
                   events_index=self.indices["event"],
                   tokens_index=self.indices["contract_description"],
                   contracts_index=self.indices["contract"],
                   transactions_index=self.indices["internal_transaction"],
                   conversion_event=CONVERSION_EVENT,
                   amount_raw_sql=amount_raw_sql,
                   return_raw_sql=return_raw_sql))
Пример #6
0
class ClickhouseContracts(utils.ClickhouseContractTransactionsIterator):
    doc_type = "itx"
    index = "internal_transaction"
    block_prefix = "abi_extracted"

    def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
        self.indices = indices
        self.client = CustomClickhouse()
        self.pool = Pool(processes=NUMBER_OF_PROCESSES)
        self.parity_hosts = parity_hosts

    def _split_on_chunks(self, iterable, size):
        """
        Split given iterable onto chunks
        """
        return utils.split_on_chunks(iterable, size)

    def _get_contracts_abi(self, all_addresses):
        """
        Get ABI for specified contracts in parallel mode

        Parameters
        ----------
        all_addresses : list
            Contract addresses
        Returns
        -------
        list
            List of ABIs for each contract in list
        """
        chunks = self._split_on_chunks(list(enumerate(all_addresses)),
                                       NUMBER_OF_PROCESSES)
        dict_chunks = [dict(chunk) for chunk in chunks]
        abis = {
            key: abi
            for abis_dict in self.pool.map(_get_contracts_abi_sync,
                                           dict_chunks)
            for key, abi in abis_dict.items()
        }
        return [abis[key] for key in sorted(abis.keys())]

    def _get_range_query(self):
        """
        Get range query based on all specified blocks range in config.py

        Returns
        -------
        str
            SQL query for blockNumber located in specified range
        """
        ranges = [range_tuple[0:2] for range_tuple in self.parity_hosts]
        range_query = utils.make_range_query("blockNumber", *ranges)
        return range_query

    def _iterate_contracts_without_abi(self):
        """
        Iterate through contracts without previous attemp to extract ABI from etherscan.io
        within block range specified in config.py.

        Returns
        -------
        generator
            Generator that iterates through contracts by conditions above
        """
        query = 'ANY LEFT JOIN {} USING id WHERE abi_extracted IS NULL AND {}'.format(
            self.indices["contract_abi"], self._get_range_query())
        return self._iterate_contracts(partial_query=query, fields=["address"])

    def _convert_abi(self, abi):
        """
        Return JSON string for given ABI if it is not empty. Otherwise return None
        """
        if abi:
            return json.dumps(abi)
        else:
            return None

    def save_contracts_abi(self):
        """
        Save contracts ABI to a database

        This function is an entry point for download-contracts-abi operation
        """
        for contracts in self._iterate_contracts_without_abi():
            abis = self._get_contracts_abi(
                [contract["_source"]["address"] for contract in contracts])
            documents = [{
                'abi': self._convert_abi(abis[index]),
                'abi_extracted': True,
                "id": contract["_id"]
            } for index, contract in enumerate(contracts)]
            self.client.bulk_index(index=self.indices["contract_abi"],
                                   docs=documents)
Пример #7
0
 def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
     self.indices = indices
     self.client = CustomClickhouse()
     self.pool = Pool(processes=NUMBER_OF_PROCESSES)
     self.parity_hosts = parity_hosts
Пример #8
0
 def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]):
     super().__init__(indices, CustomClickhouse(), parity_host)
Пример #9
0
class ClickhouseInputs(utils.ClickhouseContractTransactionsIterator):
    _contracts_abi = {}
    block_prefix = "inputs_decoded"

    def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
        self.indices = indices
        self.client = CustomClickhouse()
        self.pool = Pool(processes=NUMBER_OF_PROCESSES)
        self.parity_hosts = parity_hosts

    def _set_contracts_abi(self, abis):
        """Sets current contracts ABI for this object"""
        self._contracts_abi = {
            address: json.loads(abi)
            for address, abi in abis.items()
        }

    def _split_on_chunks(self, iterable, size):
        """
        Split given iterable onto chunks
        """
        return utils.split_on_chunks(iterable, size)

    def _decode_inputs_batch(self, encoded_params):
        """
        Decode inputs in parallel mode

        Parameters
        ----------
        encoded_params : dict
            Transaction hashes and attached tuples with contract ABI and transaction input

        Returns
        -------
        dict
            Transaction hashes and parsed inputs for each transaction
        """
        chunks = list(self._split_on_chunks(list(encoded_params.items()), NUMBER_OF_PROCESSES))
        chunks = [dict(chunk) for chunk in chunks]
        decoded_inputs = self.pool.map(_decode_inputs_batch_sync, chunks)
        return {hash: input for chunk in decoded_inputs for hash, input in chunk.items()}

    def _get_range_query(self):
        """
        Get range query based on all specified blocks range in config.py

        Returns
        -------
        str
            SQL query to find blocks located in range specified in conig
        """
        ranges = [range_tuple[0:2] for range_tuple in self.parity_hosts]
        range_query = utils.make_range_query("blockNumber", *ranges)
        return range_query

    def _iterate_contracts_with_abi(self, max_block):
        """
        Iterate through contracts with non-empty ABI
        within block range specified in config.py
        with unprocessed transactions before specified block

        Parameters
        ----------
        max_block : int
            Block number

        Returns
        -------
        generator
            Generator that iterates through contracts by conditions above
        """
        query = "ANY INNER JOIN {} USING id WHERE abi IS NOT NULL AND {}".format(
            self.indices["contract_abi"],
            self._get_range_query()
        )
        return self._iterate_contracts(max_block, query, fields=["abi", "address"])

    def _add_id_to_inputs(self, decoded_inputs):
        """
        Add transaction hash as id to decoded_inputs

        Parameters
        ----------
        decoded_inputs : dict
            Dictionary with transaction hashes and input info dicts
        """
        for hash, input in decoded_inputs.items():
            input.update({
                "id": hash
            })

    def _decode_inputs_for_contracts(self, contracts, max_block):
        """
        Decode inputs for specified contracts before specified block

        Treats exceptions during parsing

        Parameters
        ----------
        contracts : list
            Contracts info in JSON format, i.e.
            {"_id": TRANSACTION_ID, "_source": {"document": "fields"}}
        max_block : int
            Block number
        """
        for transactions in self._iterate_transactions_by_targets(contracts, max_block):
            try:
                inputs = {
                    transaction["_id"]: (
                        self._contracts_abi[transaction["_source"][self.contract_field]],
                        transaction["_source"]["input"]
                    )
                    for transaction in transactions
                }
                decoded_inputs = self._decode_inputs_batch(inputs)
                self._add_id_to_inputs(decoded_inputs)
                self.client.bulk_index(index=self.indices[self.input_index], docs=list(decoded_inputs.values()))
            except Exception as exception:
                print(exception)

    def decode_inputs(self):
        """
        Decode inputs for all transactions to contracts with ABI in ElasticSearch

        This function is an entry point for parse-*-inputs operation
        """
        max_block = self._get_max_block({self.block_flag_name: 1})
        for contracts in self._iterate_contracts_with_abi(max_block):
            self._set_contracts_abi(
                {contract["_source"]["address"]: contract["_source"]["abi"] for contract in contracts})
            self._decode_inputs_for_contracts(contracts, max_block)
            self._save_max_block([contract["_id"] for contract in contracts], max_block)
Пример #10
0
class ClickhouseEvents:
    def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
        self.client = CustomClickhouse()
        self.indices = indices
        self.web3 = Web3(
            HTTPProvider(parity_hosts[0][-1], request_kwargs={'timeout': 100}))

    def _iterate_block_ranges(self, range_size=EVENTS_RANGE_SIZE):
        """
        Iterate over unprocessed block ranges with given size

        Parameters
        ----------
        range_size : list
            Size of each block range
        Returns
        -------
        generator
            Generator that iterates through unprocessed block ranges
        """
        range_query = "distinct(toInt32(floor(number / {}))) AS range".format(
            range_size)
        flags_query = "ANY LEFT JOIN (SELECT id, value FROM {} FINAL WHERE name = 'events_extracted') USING id WHERE value IS NULL".format(
            self.indices["block_flag"])
        for ranges_chunk in self.client.iterate(index=self.indices["block"],
                                                fields=[range_query],
                                                query=flags_query,
                                                return_id=False):
            for range in ranges_chunk:
                range_bounds = (range["_source"]["range"] * range_size,
                                (range["_source"]["range"] + 1) * range_size)
                yield range_bounds

    def _get_events(self, block_range):
        """
        Get events from parity for given block range

        Parameters
        ----------
        block_range : tuple
            Start and end of block range
        Returns
        -------
        list
            Events inside given block range (not including end block)
        """
        event_filter = self.web3.eth.filter({
            "fromBlock": block_range[0],
            "toBlock": block_range[1] - 1
        })
        events = event_filter.get_all_entries()
        return events

    def _save_events(self, events):
        """
        Prepare and save each event to a database

        Parameters
        ----------
        events : list
            Events extracted from parity
        """
        events = [self._process_event(event) for event in events]
        if events:
            self.client.bulk_index(index=self.indices["event"], docs=events)

    def _process_event(self, event):
        """
        Prepare event - parse hexadecimal numbers, assign id, lowercase each string

        Parameters
        ----------
        event : dict
            Event extracted from parity

        Returns
        -------
        dict
            Prepared event
        """
        processed_event = event.copy()
        processed_event["transactionLogIndex"] = int(
            event["transactionLogIndex"], 0)
        processed_event["id"] = "{}.{}".format(
            event['transactionHash'].hex(),
            processed_event["transactionLogIndex"])
        processed_event["address"] = event["address"].lower()
        processed_event["blockHash"] = event["blockHash"].hex()
        processed_event["transactionHash"] = event["transactionHash"].hex()
        processed_event["topics"] = [topic.hex() for topic in event["topics"]]
        return processed_event

    def _save_processed_blocks(self, block_range):
        """
        Save events_extracted flag for processed blocks

        Parameters
        ----------
        block_range : tuple
            Start and end of processed block range
        """
        block_flags = [{
            "id": block,
            "name": "events_extracted",
            "value": 1
        } for block in range(*block_range)]
        self.client.bulk_index(index=self.indices["block_flag"],
                               docs=block_flags)

    def extract_events(self):
        """
        Extract parity events to a database

        This function is an entry point for extract-events operation
        """
        for block_range in self._iterate_block_ranges():
            events = self._get_events(block_range)
            self._save_events(events)
            self._save_processed_blocks(block_range)
Пример #11
0
 def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
     self.client = CustomClickhouse()
     self.indices = indices
     self.web3 = Web3(
         HTTPProvider(parity_hosts[0][-1], request_kwargs={'timeout': 100}))
class ClickhouseTestCase(unittest.TestCase):
    def setUp(self):
        self.client = Client('localhost')
        self.client.execute('DROP TABLE IF EXISTS test')
        self.client.execute(
            'CREATE TABLE test (id String, x Int32, dict String) ENGINE = ReplacingMergeTree() ORDER BY id'
        )
        self.new_client = CustomClickhouse()

    def _add_records(self):
        documents = [{
            'x': 1,
            "id": "1"
        }, {
            'x': 2,
            "id": "2"
        }, {
            'x': 3,
            "id": "3"
        }, {
            'x': 100,
            "id": "100"
        }]
        formatted_documents = [{
            "_id": doc["id"],
            "_source": {
                'x': doc["x"]
            }
        } for doc in documents]
        self.client.execute('INSERT INTO test (id, x) VALUES', documents)
        return formatted_documents

    def test_search(self):
        formatted_documents = self._add_records()
        result = self.new_client.search(index="test", fields=["x"])
        self.assertCountEqual(formatted_documents, result)

    def test_search_with_query(self):
        formatted_documents = self._add_records()
        formatted_documents = [
            doc for doc in formatted_documents if doc["_source"]['x'] < 3
        ]
        result = self.new_client.search(index="test",
                                        query="WHERE x < 3",
                                        fields=["x"])
        self.assertSequenceEqual(formatted_documents, result)

    def test_count(self):
        formatted_documents = self._add_records()
        formatted_documents = [
            doc for doc in formatted_documents if doc["_source"]['x'] < 3
        ]
        result = self.new_client.count(index="test", query="WHERE x < 3")
        assert result == len(formatted_documents)

    def test_iterate(self):
        test_per = 2
        formatted_documents = self._add_records()
        formatted_documents = [
            doc for doc in formatted_documents if doc["_source"]['x'] < 4
        ]
        result = self.new_client.iterate(index="test",
                                         fields=["x"],
                                         query="WHERE x < 4",
                                         per=test_per)
        self.assertSequenceEqual(formatted_documents[0:test_per], next(result))
        self.assertSequenceEqual(formatted_documents[test_per:2 * test_per],
                                 next(result))

    def test_multiple_iterate(self):
        test_per = 2
        self._add_records()
        first_result = self.new_client.iterate(index="test",
                                               fields=["x"],
                                               per=test_per)
        next(first_result)
        second_result = self.new_client.iterate(index="test",
                                                fields=["x"],
                                                per=test_per)
        next(second_result)

    def test_iterate_without_id(self):
        self._add_records()
        result = self.new_client.iterate(index="test",
                                         fields=["distinct(ceiling(x / 10))"],
                                         return_id=False)
        result = next(result)
        assert len(result) == 2

    def test_iterate_with_and_without_final(self):
        self._add_records()
        self._add_records()
        result_with_final = self.new_client.iterate(index="test", fields=[])
        result_without_final = self.new_client.iterate(index="test",
                                                       fields=[],
                                                       final=False)
        assert len(next(result_without_final)) > len(next(result_with_final))

    def test_iterate_with_derived_fields(self):
        self._add_records()
        result = self.new_client.iterate(index="test", fields=["x - 1 AS y"])
        result_record = next(result)[0]
        assert "y" in result_record["_source"]

    def test_bulk_index(self):
        documents = [{"x": i} for i in range(10)]
        self.new_client.bulk_index(index="test",
                                   docs=[d.copy() for d in documents],
                                   id_field="x")
        result = self.client.execute('SELECT id FROM test')
        self.assertCountEqual(result, [(str(doc["x"]), ) for doc in documents])

    def test_bulk_index_check_schema(self):
        self.new_client.bulk_index(index="test", docs=[{"y": 1, "id": 1}])
        result = self.client.execute('SELECT id FROM test')
        self.assertCountEqual(result, [('1', )])

    def test_bulk_index_empty_fields(self):
        documents = [{"id": 1, "x": 1}]
        self.new_client.bulk_index(index="test", docs=[d for d in documents])

    def test_bulk_index_dict_values(self):
        documents = [{"x": i, "dict": {"test": i}} for i in range(10)]
        self.new_client.bulk_index(index="test",
                                   docs=[d.copy() for d in documents],
                                   id_field="x")
        result = self.client.execute('SELECT dict FROM test')
        self.assertCountEqual(result, [(json.dumps(doc["dict"]), )
                                       for doc in documents])

    def test_bulk_index_split_records(self):
        test_docs = [{"docs": True}]
        test_chunks = ["records1", "records2"]
        self.new_client._split_records = MagicMock(return_value=test_chunks)
        self.new_client.client.execute = MagicMock()
        self.new_client._set_id = MagicMock()
        self.new_client._filter_schema = MagicMock()
        self.new_client.bulk_index(index="test_index", docs=test_docs)

        self.new_client._split_records.assert_called_with(test_docs)
        calls = [call(ANY, records) for records in test_chunks]
        self.new_client.client.execute.assert_has_calls(calls)

    def test_send_sql_request(self):
        formatted_documents = self._add_records()
        result = self.new_client.send_sql_request("SELECT max(x) FROM test")
        assert result == max(doc["_source"]["x"]
                             for doc in formatted_documents)

    def test_split_records(self):
        test_record = {"test": "123"}
        test_record_size = sys.getsizeof(test_record)
        test_records = [test_record] * 5
        chunks = list(
            self.new_client._split_records(test_records,
                                           max_bytes=test_record_size * 2 + 1))
        self.assertSequenceEqual(
            chunks, [[test_record] * 2, [test_record] * 2, [test_record]])

    def test_split_records_one_record(self):
        test_record = {"test": "123"}
        test_record_size = sys.getsizeof(test_record)
        test_records = [test_record]
        chunks = list(
            self.new_client._split_records(test_records,
                                           max_bytes=test_record_size))
        self.assertSequenceEqual(chunks, [[test_record]])

    def test_split_records_same_chunk(self):
        test_record = {"test": "123"}
        test_record_size = sys.getsizeof(test_record)
        test_records = [test_record] * 6
        chunks = list(
            self.new_client._split_records(test_records,
                                           max_bytes=test_record_size * 2))
        self.assertSequenceEqual(
            chunks, [[test_record] * 2, [test_record] * 2, [test_record] * 2])
class ClickhouseContractTransactions:
    def __init__(self, indices=INDICES):
        self.indices = indices
        self.client = CustomClickhouse()

    def _extract_first_bytes(self, func):
        """
        Create contract method signature and return first 4 bytes of this signature

        Parameters
        ----------
        func: str
            String that contains function name and arguments

        Returns
        -------
        str
            String with first 4 bytes of method signature in hex format
        """
        return str(Web3.toHex(Web3.sha3(text=func)[0:4]))[2:]

    def _extract_methods_signatures(self):
        """
        Return dictionary with first bytes of standard method signatures

        Returns
        -------
        dict
            Dictionary with first 4 bytes of methods signatures in hex format
        """
        return {
            'erc20': {
                'totalSupply': self._extract_first_bytes('totalSupply()'),
                'balanceOf': self._extract_first_bytes('balanceOf(address)'),
                'allowance': self._extract_first_bytes('allowance(address,address)'),
                'transfer': self._extract_first_bytes('transfer(address,uint256)'),
                'transferFrom': self._extract_first_bytes('transferFrom(address,address,uint256)'),
                'approve': self._extract_first_bytes('approve(address,uint256)'),
            },
            'erc223': {
                'tokenFallback': self._extract_first_bytes('tokenFallback(address,uint256,bytes)')
            },
            'bancor_converter': {
                'convert': self._extract_first_bytes('convert(address,address,uint256,uint256)')
            }
        }

    def _get_standards(self):
        """
        Create dict with sql to create "standard_*" flag fields

        Returns
        -------
        dict
            Dictionary with keys "standard_*", where * is standard name like ERC20, ERC721
            and values that are queries for database to define related standard
        """
        standards = self._extract_methods_signatures()
        return {
            "standard_" + standard: " AND ".join([
                "(bytecode LIKE '%{}%')".format(signature) for signature in signatures.values()
            ])
            for standard, signatures in standards.items()
        }

    def _get_fields(self):
        """
        Get string with material view fields names and related queries

        Returns
        -------
        str
            Part of SQL request to create material view.
            Contains field names and definitions
        """
        standard_fields = self._get_standards()
        fields = {
            "id": "coalesce(address, id)",
            "blockNumber": "blockNumber",
            "address": "address",
            "owner": "from",
            "bytecode": "code"
        }
        fields.update(standard_fields)
        fields_string = ", ".join([
            "{} AS {}".format(field, alias)
            for alias, field in fields.items()
        ])
        return fields_string

    def extract_contract_addresses(self):
        """
        Create material view for contracts extracted from internal transactions table

        This function is an entry point for prepare-erc-transactions-view operation
        """
        fields_string = self._get_fields()
        engine_string = 'ENGINE = ReplacingMergeTree() ORDER BY id'
        condition = "type = 'create' AND error IS NULL AND parent_error IS NULL"
        sql = "CREATE MATERIALIZED VIEW IF NOT EXISTS {} {} POPULATE AS (SELECT {} FROM {} WHERE {})".format(
            self.indices["contract"],
            engine_string,
            fields_string,
            self.indices["internal_transaction"],
            condition
        )
        self.client.send_sql_request(sql)
Пример #14
0
 def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
     self.indices = indices
     self.client = CustomClickhouse()
     self.w3 = Web3(HTTPProvider(parity_hosts[0][2]))
     self.standard_token_abi = standard_token_abi
     self._set_external_links()
Пример #15
0
class ClickhouseContractMethods:
    """
    Check if contract is token, is it compliant with token standards and get variables from it such as name or symbol

    Parameters
    ----------
    indices: dict
        Dictionary containing exisiting database indices
    parity_hosts: list
        List of tuples that includes 3 elements: start block, end_block and Parity URL
    """
    _external_links = {}
    _constants_types = [
        ('name', {
            "string": lambda x: str(x).replace("\\x00", ""),
            "bytes32": lambda x: str(x).replace("\\x00", "")[2:-1].strip()
        }, ''),
        ('symbol', {
            "string": lambda x: str(x).replace("\\x00", ""),
            "bytes32": lambda x: str(x).replace("\\x00", "")[2:-1].strip()
        }, ''), ('decimals', {
            "uint8": None
        }, 18), ('totalSupply', {
            "uint256": None
        }, 0), ('owner', {
            "address": lambda x: x.lower()
        }, None)
    ]

    def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
        self.indices = indices
        self.client = CustomClickhouse()
        self.w3 = Web3(HTTPProvider(parity_hosts[0][2]))
        self.standard_token_abi = standard_token_abi
        self._set_external_links()

    def _set_external_links(self):
        """
        Sets website slug and cmc_id for this object
        """
        with open('{}/tokens.json'.format(CURRENT_DIR)) as json_file:
            tokens = json.load(json_file)
        for token in tokens:
            self._external_links[token["address"]] = {
                "website_slug": token["website_slug"],
                "cmc_id": token["cmc_id"],
            }

    def _iterate_unprocessed_contracts(self):
        """
        Iterate over ERC20 contracts that were not processed yet

        Returns
        -------
        generator
            Generator that iterates over contracts in database
        """
        return self.client.iterate(index=self.indices["contract"],
                                   fields=["address"],
                                   query="""
                WHERE standard_erc20 = 1
                AND id not in(
                    SELECT id
                    FROM {} 
                )
            """.format(self.indices["contract_description"]))

    def _round_supply(self, supply, decimals):
        """
        Divide supply by 10 ** decimals, and round it

        Parameters
        ----------
        supply: int
            Contract total supply
        decimals: int
            Contract decimals

        Returns
        -------
        str
            Contract total supply without decimals
        """
        if decimals > 0:
            supply = supply / math.pow(10, decimals)
            supply = Decimal(supply)
            supply = round(supply)

        return min(supply, MAX_TOTAL_SUPPLY)

    def _get_constant(self, address, constant, types, placeholder=None):
        """
        Get value through contract function marked as constant

        Tries every type from types dict and returns first value that are not empty
        If it fails, returns placeholder

        Parameters
        ----------
        address: str
            Contract address
        constant: str
            Name of constant
        types: dict
            Dict with all possible types and converter functions for target value
        placeholder
            Default value for target value

        Returns
        -------
            Value returned by a contract and converted with the function
            Placeholder, if there are no non-empty values
        """
        contract_checksum_addr = self.w3.toChecksumAddress(address)
        contract_abi = [{
            "constant": True,
            "inputs": [],
            "name": constant,
            "outputs": [{
                "name": "",
                "type": None
            }],
            "payable": False,
            "type": "function"
        }]
        response = None
        for constant_type, convert in types.items():
            try:
                contract_abi[0]["outputs"][0]["type"] = constant_type
                contract_instance = self.w3.eth.contract(
                    address=contract_checksum_addr, abi=contract_abi)
                response = getattr(contract_instance.functions,
                                   constant)().call()
                if convert:
                    response = convert(response)
                if response:
                    return response
            except Exception as e:
                pass
        if type(response) != int:
            return placeholder
        else:
            return response

    def _get_constants(self, address):
        """
        Return contract ERC20 info

        Parameters
        ----------
        address: str
            Contract address

        Returns
        -------
        list
            Name, symbol, decimals, total supply, owner address
        """
        contract_constants = []
        for constant, types, placeholder in self._constants_types:
            response = self._get_constant(address, constant, types,
                                          placeholder)
            contract_constants.append(response)
        contract_constants[3] = self._round_supply(contract_constants[3],
                                                   contract_constants[2])
        return contract_constants

    def _update_contract_descr(self, doc_id, body):
        """
        Store contract description in database

        Parameters
        ----------
        doc_id: str
          id of contract
        body: dict
          Dictionary with new values
        """
        body["id"] = doc_id
        self.client.bulk_index(self.indices['contract_description'],
                               docs=[body])

    def _get_external_links(self, address):
        """
        Add Cryptocompare and Coinmarketcap info as a field of this object
        """
        external_links = self._external_links.get(address, {
            "website_slug": None,
            "cmc_id": None
        })
        return external_links.get("website_slug"), external_links.get("cmc_id")

    def _classify_contract(self, contract):
        """
        Extract contract ERC20 info and stores it into the database

        Extracts ERC20 token description from parity and from token.json file

        Parameters
        ----------
        contract: dict
            Dictionary with contract info
        """
        name, symbol, decimals, total_supply, owner = self._get_constants(
            contract['_source']['address'])
        website_slug, cmc_id = self._get_external_links(
            contract["_source"]["address"])
        update_body = {
            'token_name': name,
            'token_symbol': symbol,
            'decimals': decimals,
            'total_supply': total_supply,
            'token_owner': owner,
            "website_slug": website_slug,
            "cmc_id": cmc_id
        }
        self._update_contract_descr(contract['_id'], update_body)

    def search_methods(self):
        """
        Extract public values for ERC20 contracts

        This function is an entry point for extract-tokens operation
        """
        for contracts_chunk in self._iterate_unprocessed_contracts():
            for contract in contracts_chunk:
                self._classify_contract(contract)
Пример #16
0
 def __init__(self, indices=INDICES, parity_hosts=PARITY_HOSTS):
     super().__init__(indices, CustomClickhouse(), parity_hosts)
     self.indices["miner_transaction"] = self.indices[
         "internal_transaction"]
 def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]):
     self.indices = indices
     self.client = CustomClickhouse()
     self.web3 = Web3(HTTPProvider(parity_host))
class ClickhouseTokenPrices(ClickhouseContractTransactionsIterator):
    doc_type = 'token'
    block_prefix = 'prices_extracted'

    def __init__(self, indices=INDICES, parity_host=PARITY_HOSTS[0][-1]):
        self.indices = indices
        self.client = CustomClickhouse()
        self.web3 = Web3(HTTPProvider(parity_host))

    def _iterate_cc_tokens(self):
        """
        Iterate over ERC20 tokens

        Returns
        -------
        generator
            Generator that iterates over ERC20 tokens
        """
        return self._iterate_contracts(partial_query='WHERE standard_erc20 = 1', fields=["address"])

    def _get_cc_tokens(self):
        """
        Extract list of tokens

        Returns
        -------
        list
            List of ERC20 contracts
        """
        tokens = [token_chunk for token_chunk in self._iterate_cc_tokens()]
        token_list = [t['_source'] for token_chunk in tokens for t in token_chunk]
        return token_list

    def _construct_bulk_insert_ops(self, docs):
        """
        Assign id to each document

        Parameters
        ----------
        docs: list
            List of price records
        """
        for doc in docs:
            doc["id"] = doc['address'] + '_' + doc['timestamp'].strftime("%Y-%m-%d")

    def _insert_multiple_docs(self, docs, index_name):
        """
        Index multiple documents simultaneously

        Parameters
        ----------
        docs: list
            List of dictionaries with new data
        doc_type: str
            Type of inserted documents
        index_name: str
            Name of the index that contains inserted documents
        """
        for chunk in bulk_chunks(docs, docs_per_chunk=1000):
            self._construct_bulk_insert_ops(chunk)
            self.client.bulk_index(index=index_name, docs=chunk)

    def _set_moving_average(self, prices, window_size=MOVING_AVERAGE_WINDOW):
        """
        Perform moving average procedure over a daily close prices

        Parameters
        ----------
        prices: list
            List of prices
        window_size: str
            Size of window

        Returns
        -------
        list
            Prices processed with moving average
        """
        prices_stack = []
        for price in prices:
            prices_stack.append(price["close"])
            if len(prices_stack) == window_size:
                price["average"] = np.mean(prices_stack)
                prices_stack.pop(0)
            else:
                price["average"] = price["close"]

    def _process_hist_prices(self, prices):
        """
        Prepare extracted prices to a database

        Performs moving average procedure over prices, sets address and timestamp fields

        Parameters
        ----------
        prices: list
            List of tokens prices

        Returns
        -------
        list
            List if prepared prices
        """
        points = []
        self._set_moving_average(prices)
        for price in prices:
            point = {}
            point['BTC'] = price["average"]
            point['BTC'] = float('{:0.10f}'.format(point['BTC']))
            point['timestamp'] = datetime.datetime.fromtimestamp(price['time'])
            point['address'] = price['address']
            points.append(point)
        return points

    def _make_historical_prices_req(self, address, days_count):
        """
        Make call to CryptoCompare API to extract token historical data

        Parameters
        ----------
        address: str
            Token address
        days_count: int
            Days limit

        Returns
        -------
        list
            List of prices for specified symbol
        """
        symbol = self._get_symbol_by_address(address)
        url = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym=BTC&limit={}'.format(symbol, days_count)
        try:
            res = requests.get(url).json()
            for point in res['Data']:
                point['address'] = address
            return res['Data']
        except:
            print("No exchange rate for {}".format(symbol))
            return

    def _get_last_avail_price_date(self):
        """
        Get last price available in token_price index

        Returns
        -------
        string
            Timestamp of last available date
        """
        return self.client.send_sql_request('SELECT MAX(timestamp) FROM {}'.format(self.indices['price']))

    def _get_days_count(self, now, last_price_date, limit=DAYS_LIMIT):
        """
        Count number of days for that prices are unavailable

        Parameters
        ----------
        now: date
            Current date
        last_price_date: date
            Timestamp of last available price

        Returns
        -------
        int
            Number of days between current date and last price in database
        """
        days_count = (now - last_price_date).days + 1
        return min(days_count, DAYS_LIMIT)

    def _get_symbol_abi(self, output_type):
        """Return mock ABI to get token symbol"""
        return [{
            "constant": True,
            "inputs": [],
            "name": "symbol",
            "outputs": [
                {
                    "name": "",
                    "type": output_type
                }
            ],
            "payable": False,
            "stateMutability": "view",
            "type": "function"
        }]

    # TODO replace with contract_methods.py call
    def _get_symbol_by_address(self, address):
        """
        Get symbol of specified token

        Parameters
        ----------
        address: str
            Address of token

        Returns
        -------
        str
           Symbol of specified token
        """
        address = self.web3.toChecksumAddress(address)
        symbols = {}
        for output_type in ['string', 'bytes32']:
            contract = self.web3.eth.contract(abi=self._get_symbol_abi(output_type), address=address)
            try:
                symbols[output_type] = contract.functions.symbol().call()
            except Exception as e:
                print(e)
                pass
        if 'string' in symbols:
            return symbols['string']
        else:
            return symbols.get('bytes32', "".encode('utf-8')).decode('utf-8').rstrip('\0')

    def _get_historical_multi_prices(self):
        """
        Extract historical token prices from CryptoCompare

        Returns
        -------
        list
            List ot token historical prices
        """
        token_addresses = [
            token['address']
            for token in self._get_cc_tokens()
        ]
        now = datetime.datetime.now()
        last_price_date = self._get_last_avail_price_date()
        days_count = self._get_days_count(now, last_price_date)
        prices = []
        for token in tqdm(token_addresses):
            price = self._make_historical_prices_req(token, days_count)
            if price != None:
                price = self._process_hist_prices(price)
                prices.append(price)
            else:
                continue
        prices = [p for price in prices for p in price]
        return prices

    def get_prices_within_interval(self):
        """
        Extract historcial token prices and then add to this prices data from Coinmarketcap

        This function is an entry point for download-prices operation
        """
        prices = self._get_historical_multi_prices()
        if prices != None:
            self._insert_multiple_docs(prices, self.indices['price'])