Пример #1
0
    def _run(self, hash_list, **api_params):
        # Make sure the input is a list (not just a single hash).
        if not isinstance(hash_list, list):
            raise ValueError("The `hash_list` input is a list, not %s." %
                             type(hash_list))

        # If there is nothing in the list, don't waste time with a query.
        if not hash_list:
            return

        # Regularize and check the types of elements in the hash list.
        if isinstance(hash_list[0], str):
            hash_list = [int(h) for h in hash_list]
        if not all([isinstance(h, int) for h in hash_list]):
            raise ValueError("Hashes must be ints or strings that can be "
                             "converted into ints.")

        # Execute the query and load the results.
        resp = submit_statement_request('post',
                                        'from_hashes',
                                        data={'hashes': hash_list},
                                        **api_params)
        self._unload_and_merge_resp(resp)
        self._compile_statements()
        return
Пример #2
0
def get_statements_by_hash(hash_list, ev_limit=100, best_first=True, tries=2):
    """Get fully formed statements from a list of hashes.

    Parameters
    ----------
    hash_list : list[int or str]
        A list of statement hashes.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 100.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can
        also help gracefully handle an unreliable connection, if you're
        willing to wait. Default is 2.
    """
    if not isinstance(hash_list, list):
        raise ValueError("The `hash_list` input is a list, not %s."
                         % type(hash_list))
    if not hash_list:
        return []
    if isinstance(hash_list[0], str):
        hash_list = [int(h) for h in hash_list]
    if not all([isinstance(h, int) for h in hash_list]):
        raise ValueError("Hashes must be ints or strings that can be "
                         "converted into ints.")
    resp = submit_statement_request('post', 'from_hashes', ev_limit=ev_limit,
                                    data={'hashes': hash_list},
                                    best_first=best_first, tries=tries)
    return stmts_from_json(resp.json()['statements'].values())
Пример #3
0
 def _run(self, ids, **api_params):
     id_l = [{'id': id_val, 'type': id_type} for id_type, id_val in ids]
     resp = submit_statement_request('post', 'from_papers',
                                     data={'ids': id_l},
                                     **api_params)
     self._unload_and_merge_resp(resp)
     self._compile_statements()
     return
Пример #4
0
def get_statements_for_paper(ids,
                             ev_limit=10,
                             best_first=True,
                             tries=2,
                             max_stmts=None):
    """Get the set of raw Statements extracted from a paper given by the id.

    Parameters
    ----------
    ids : list[(<id type>, <id value>)]
        A list of tuples with ids and their type. The type can be any one of
        'pmid', 'pmcid', 'doi', 'pii', 'manuscript id', or 'trid', which is the
        primary key id of the text references in the database.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 10.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can also
        help gracefully handle an unreliable connection, if you're willing to
        wait. Default is 2.
    max_stmts : int or None
        Select a maximum number of statements to be returned. Default is None.

    Returns
    -------
    stmts : list[:py:class:`indra.statements.Statement`]
        A list of INDRA Statement instances.
    """
    id_l = [{'id': id_val, 'type': id_type} for id_type, id_val in ids]
    resp = submit_statement_request('post',
                                    'from_papers',
                                    data={'ids': id_l},
                                    ev_limit=ev_limit,
                                    best_first=best_first,
                                    tries=tries,
                                    max_stmts=max_stmts)
    stmts_json = resp.json()['statements']
    return stmts_from_json(stmts_json.values())
Пример #5
0
def get_statements_by_hash(hash_list, ev_limit=100, best_first=True, tries=2):
    """Get fully formed statements from a list of hashes.

    Parameters
    ----------
    hash_list : list[int or str]
        A list of statement hashes.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 100.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can
        also help gracefully handle an unreliable connection, if you're
        willing to wait. Default is 2.
    """
    if not isinstance(hash_list, list):
        raise ValueError("The `hash_list` input is a list, not %s." %
                         type(hash_list))
    if not hash_list:
        return []
    if isinstance(hash_list[0], str):
        hash_list = [int(h) for h in hash_list]
    if not all([isinstance(h, int) for h in hash_list]):
        raise ValueError("Hashes must be ints or strings that can be "
                         "converted into ints.")
    resp = submit_statement_request('post',
                                    'from_hashes',
                                    ev_limit=ev_limit,
                                    data={'hashes': hash_list},
                                    best_first=best_first,
                                    tries=tries)
    return stmts_from_json(resp.json()['statements'].values())
Пример #6
0
def get_statements_for_paper(ids, ev_limit=10, best_first=True, tries=2,
                             max_stmts=None):
    """Get the set of raw Statements extracted from a paper given by the id.

    Parameters
    ----------
    ids : list[(<id type>, <id value>)]
        A list of tuples with ids and their type. The type can be any one of
        'pmid', 'pmcid', 'doi', 'pii', 'manuscript id', or 'trid', which is the
        primary key id of the text references in the database.
    ev_limit : int or None
        Limit the amount of evidence returned per Statement. Default is 10.
    best_first : bool
        If True, the preassembled statements will be sorted by the amount of
        evidence they have, and those with the most evidence will be
        prioritized. When using `max_stmts`, this means you will get the "best"
        statements. If False, statements will be queried in arbitrary order.
    tries : int > 0
        Set the number of times to try the query. The database often caches
        results, so if a query times out the first time, trying again after a
        timeout will often succeed fast enough to avoid a timeout. This can also
        help gracefully handle an unreliable connection, if you're willing to
        wait. Default is 2.
    max_stmts : int or None
        Select a maximum number of statements to be returned. Default is None.

    Returns
    -------
    stmts : list[:py:class:`indra.statements.Statement`]
        A list of INDRA Statement instances.
    """
    id_l = [{'id': id_val, 'type': id_type} for id_type, id_val in ids]
    resp = submit_statement_request('post', 'from_papers', data={'ids': id_l},
                                    ev_limit=ev_limit, best_first=best_first,
                                    tries=tries, max_stmts=max_stmts)
    stmts_json = resp.json()['statements']
    return stmts_from_json(stmts_json.values())