Exemplo n.º 1
0
 def test_ROS_agents_no_creds(self):
     """Test timing of a query for agents with text=ROS, "signed out"."""
     query = HasAgent('ROS', 'TEXT')
     self.__simple_time_test('post',
                             'agents/from_query_json',
                             'format=json-js&with_english=true',
                             query=query.to_json(),
                             with_auth=False)
Exemplo n.º 2
0
 def test_IL6_agents_with_creds(self):
     """Test the timing of a query for agents with text=IL-6, "signed in"."""
     query = HasAgent('IL-6', 'TEXT')
     self.__simple_time_test('post',
                             'agents/from_query_json',
                             'format=json-js&with_english=true',
                             query=query.to_json(),
                             with_auth=True)
Exemplo n.º 3
0
    def test_simple_json_query(self):
        query = (HasAgent('MEK', namespace='NAME')
                 & HasAgent('ERK', namespace='NAME')
                 & HasType(['Phosphorylation']))
        limit = 50
        ev_limit = 5
        qr1 = query.get_statements(limit=50, ev_limit=5)
        qj = query.to_json()
        qjs = json.dumps(qj)
        resp, dt, size = \
            self.__time_query('get', 'query/statements',
                              f'json={qjs}&limit={limit}&ev_limit={ev_limit}')
        qr2 = QueryResult.from_json(json.loads(resp.data))
        assert isinstance(qr2, StatementQueryResult)

        # Check that we got the same statements.
        assert qr1.results.keys() == qr2.results.keys()

        # Make sure elsevier and medscan were filtered out
        for s1, s2 in zip(qr1.statements(), qr2.statements()):
            if any(ev.source_api == 'medscan' for ev in s1.evidence):
                assert len(s1.evidence) > len(s2.evidence),\
                    'Medscan result not filtered out.'
                continue  # TODO: Figure out how to test elsevier in this case.
            else:
                assert len(s1.evidence) == len(s2.evidence), \
                    "Evidence counts don't match."

            for ev1, ev2 in zip(s1.evidence, s2.evidence):
                if ev1.text_refs.get('SOURCE') == 'elsevier':
                    if len(ev1.text) > 200:
                        assert ev2.text.endswith(REDACT_MESSAGE),\
                            "Elsevier text not truncated."
                        assert len(ev2.text) == (200 + len(REDACT_MESSAGE)), \
                            "Elsevier text not truncated."
                    else:
                        assert len(ev1.text) == len(ev2.text),\
                            "Evidence text lengths don't match."
Exemplo n.º 4
0
    def test_mesh_concept_ev_limit(self):
        """Test a specific bug in which evidence was duplicated.

        When querying for mesh concepts, with an evidence limit, the evidence
        was repeated numerous times.
        """
        db = get_db('primary')
        q = HasAgent('ACE2') & FromMeshIds(['C000657245'])
        resp, dt, size = self.__time_query('post',
                                           'statements/from_query_json',
                                           'limit=50&ev_limit=6',
                                           query=q.to_json(),
                                           with_auth=True)
        assert resp.status_code == 200, f"Query failed: {resp.data.decode()}"
        assert dt < 30, "Query would have timed out."
        if dt > 15:
            logger.warning(f"Query took a long time: {dt} seconds.")

        resp_json = json.loads(resp.data)
        pmids = set()
        for h, data in resp_json['results'].items():
            ev_list = data['evidence']
            assert len(ev_list) <= 6, "Evidence limit exceeded."
            ev_tuples = {(ev.get('text'), ev.get('source_hash'),
                          ev.get('source_api'), str(ev.get('text_refs')))
                         for ev in ev_list}
            assert len(ev_tuples) == len(ev_list), "Evidence is not unique."
            for ev in ev_list:
                found_pmid = False
                if 'pmid' in ev:
                    pmids.add(ev['pmid'])
                    found_pmid = True

                if 'text_refs' in ev:
                    tr_dict = ev['text_refs']
                    if 'TRID' in tr_dict:
                        tr = db.select_one(db.TextRef,
                                           db.TextRef.id == tr_dict['TRID'])
                        pmids.add(tr.pmid)
                        found_pmid = True
                    if 'PMID' in tr_dict:
                        pmids.add(tr_dict['PMID'])
                        found_pmid = True
                    if 'DOI' in tr_dict:
                        tr_list = db.select_all(
                            db.TextRef, db.TextRef.doi_in([tr_dict['DOI']]))
                        pmids |= {tr.pmid for tr in tr_list if tr.pmid}
                        found_pmid = True

                assert found_pmid,\
                    "How could this have been mapped to mesh?"
        pmids = {int(pmid) for pmid in pmids if pmid is not None}

        mesh_pmids = {
            n
            for n, in db.select_all(db.MeshRefAnnotations.pmid_num,
                                    db.MeshRefAnnotations.pmid_num.in_(pmids),
                                    db.MeshRefAnnotations.mesh_num == 657245,
                                    db.MeshRefAnnotations.is_concept.is_(True))
        }
        mesh_pmids |= {
            n
            for n, in db.select_all(
                db.MtiRefAnnotationsTest.pmid_num,
                db.MtiRefAnnotationsTest.pmid_num.in_(pmids),
                db.MtiRefAnnotationsTest.mesh_num == 657245,
                db.MtiRefAnnotationsTest.is_concept.is_(True))
        }

        assert pmids == mesh_pmids, "Not all pmids mapped ot mesh term."
Exemplo n.º 5
0
    def test_drill_down(self):
        def drill_down(relation, result_type):
            query_strs = ['with_cur_counts=true']
            query_data = {
                'agent_json': relation['agents'],
                'hashes': relation['hashes']
            }
            if result_type == 'statements':
                query_data['type'] = relation['type']
                query_strs.extend([
                    'ev_limit=10', 'format=json-js', 'filter_ev=true',
                    'with_english=true'
                ])
                endpoint = 'statements/from_agent_json'
            else:
                endpoint = 'expand'
            resp, dt, size = self.__time_query('post', endpoint,
                                               '&'.join(query_strs),
                                               **query_data)
            assert dt < 10
            res = json.loads(resp.data)
            if result_type == 'relations':
                rels = res['relations']
            elif result_type == 'statements':
                rels = res['statements'].values()
            assert all('english' in rel for rel in rels)
            if result_type == 'relations':
                assert all('cur_count' in rel for rel in rels)
                assert all(rel['hashes'] is not None for rel in rels)
                assert all(rel['agents'] == relation['agents']
                           for rel in res['relations'])
                num_complexes = sum(rel['type'] == 'Complex'
                                    for rel in res['relations'])
                assert num_complexes <= 1
            elif result_type == 'statements':
                assert 'num_curations' in res
                assert all('evidence' in rel for rel in rels)
            sum_src_cnt = defaultdict(lambda: 0)
            for rel in rels:
                if result_type == 'relations':
                    this_src_counts = rel['source_counts']
                else:
                    this_src_counts = res['source_counts'][rel['matches_hash']]
                for src, cnt in this_src_counts.items():
                    sum_src_cnt[src] += cnt
            sum_src_cnt = dict(sum_src_cnt)
            parent_src_cnt = relation['source_counts']
            parent_set = {s for s, c in parent_src_cnt.items() if c > 0}
            sum_set = {s for s, c in sum_src_cnt.items() if c > 0}
            assert parent_set == sum_set, \
                f'Set mismatch: {parent_set} vs. {sum_set}'
            all_match = all(sum_src_cnt[src] == parent_src_cnt[src]
                            for src in parent_set)
            assert all_match, \
                '\n'.join((f"{s}: parent={parent_src_cnt[s]}, "
                           f"child sum={sum_src_cnt[s]}")
                          if parent_src_cnt[s] != sum_src_cnt[s] else f'{s}: ok'
                          for s in parent_set)
            if result_type == 'relations':
                for rel in rels:
                    drill_down(rel, 'statements')

        url_base = "agents/from_query_json"
        query = HasAgent('MEK')
        query_str = ("limit=50&with_cur_counts=true&with_english=true"
                     "&with_hashes=true")
        complexes_covered = []
        resp, dt, size = self.__time_query('post',
                                           url_base,
                                           query_str,
                                           query=query.to_json(),
                                           complexes_covered=complexes_covered)
        res = json.loads(resp.data)
        complexes_covered = res['complexes_covered']
        assert len(res['relations']) == 50
        assert isinstance(res['relations'], list)
        assert dt < 15
        assert all('english' in rel for rel in res['relations'])
        assert all('cur_count' in rel for rel in res['relations'])
        assert all(rel['hashes'] is not None for rel in res['relations'])

        assert res['relations'][0]['id'] == 'Agents(None, MEK)'
        drill_down(res['relations'][0], 'relations')
        assert res['relations'][1]['id'] == 'Agents(MEK, ERK)'
        assert res['relations'][1]['agents'] == {'0': 'MEK', '1': 'ERK'}
        drill_down(res['relations'][1], 'relations')
        print(resp)
Exemplo n.º 6
0
def get_statements_by_gene_role_type(agent_id=None,
                                     agent_ns='HGNC-SYMBOL',
                                     role=None,
                                     stmt_type=None,
                                     count=1000,
                                     db=None,
                                     do_stmt_count=False,
                                     preassembled=True,
                                     fix_refs=True,
                                     with_evidence=True,
                                     with_support=False,
                                     essentials_only=False):
    """Get statements from the DB by stmt type, agent, and/or agent role.

    WARNING: This function will be removed in the future. Please look to
    indra_db.client.readonly.query and indra_db.client.principal.raw_statements
    for alternatives.

    Parameters
    ----------
    agent_id : str
        String representing the identifier of the agent from the given
        namespace. Note: if the agent namespace argument, `agent_ns`, is set
        to 'HGNC-SYMBOL', this function will treat `agent_id` as an HGNC gene
        symbol and perform an internal lookup of the corresponding HGNC ID.
        Default is 'HGNC-SYMBOL'.
    agent_ns : str
        Namespace for the identifier given in `agent_id`.
    role : str
        String corresponding to the role of the agent in the statement.
        Options are 'SUBJECT', 'OBJECT', or 'OTHER' (in the case of `Complex`,
        `SelfModification`, and `ActiveForm` Statements).
    stmt_type : str
        Name of the Statement class.
    count : int (DEPRECATED)
        Number of statements to retrieve in each batch (passed to
        :py:func:`get_statements`).
    db : :py:class:`DatabaseManager`
        Optionally specify a database manager that attaches to something
        besides the primary database, for example a local databse instance.
    do_stmt_count : bool (DEPRECATED)
        Whether or not to perform an initial statement counting step to give
        more meaningful progress messages.
    preassembled : bool (DEPRECATED)
        If true, statements will be selected from the table of pre-assembled
        statements. Otherwise, they will be selected from the raw statements.
        Default is True.
    with_support : bool (DEPRECATED)
        Choose whether to populate the supports and supported_by list
        attributes of the Statement objects. Generally results in slower
        queries. DEFAULT IS CURRENTLY False.
    with_evidence : bool
        Choose whether or not to populate the evidence list attribute of the
        Statements. As with `with_support`, setting this to True will take
        longer.
    fix_refs : bool (DEPRECATED)
        The paper refs within the evidence objects are not populated in the
        database, and thus must be filled using the relations in the database.
        If True (default), the `pmid` field of each Statement Evidence object
        is set to the correct PMIDs, or None if no PMID is available. If False,
        the `pmid` field defaults to the value populated by the reading
        system.
    essentials_only : bool (DEPRECATED)
        Default is False. If True, retrieve only some metadata regarding the
        statements. Implicitly `with_support`, `with_evidence`, `fix_refs`, and
        `do_stmt_count` are all False, as none of the relevant features apply.

    Returns
    -------
    if essentials_only is False:
        list of Statements from the database corresponding to the query.
    else:
        list of tuples containing basic data from the statements.
    """
    warnings.warn(('This module is being taken out of service, as the tools '
                   'have become deprecated. Moreover, the service has been '
                   're-implemented to use newer tools as best as possible, '
                   'but some results may be subtly different.'),
                  DeprecationWarning)
    if db is None:
        db = get_ro('primary')

    if not preassembled:
        raise DeprecationWarning("This functionality is not longer supported. "
                                 "indra_db.client.principal.raw_statements "
                                 "has more functional features to search for "
                                 "raw statements.")

    if not (agent_id or role or stmt_type):
        raise ValueError('At least one of agent_id, role, or stmt_type '
                         'must be specified.')

    if agent_id and agent_ns == 'HGNC-SYMBOL':
        hgnc_symbol = agent_id
        agent_id = hgnc_client.get_hgnc_id(hgnc_symbol)
        if not agent_id:
            logger.warning('Invalid gene name: %s' % hgnc_symbol)
            return []
        agent_ns = 'HGNC'

    query = EmptyQuery()
    if agent_id:
        query &= HasAgent(agent_id, agent_ns, role)
    if stmt_type:
        query &= HasType([stmt_type])

    if not isinstance(query, QueryCore):
        raise ValueError("Either agent_id or stmt_type must be given.")

    if essentials_only:
        raise DeprecationWarning("This functionality is no longer supported. "
                                 "Similar features are available in"
                                 "indra_db.client.readonly.query, especially "
                                 "the `get_interactions` methods.")
    if with_evidence:
        ev_lim = None
    else:
        ev_lim = 0

    if with_support:
        raise DeprecationWarning("This feature is not supported at this "
                                 "time, and was never truly supported.")

    result = query.get_statements(db, ev_limit=ev_lim)
    return result.statements()