Пример #1
0
def test_num_evidence():
    ro = get_ro('primary')
    q = HasNumEvidence(tuple(range(5, 10)))
    res = q.get_statements(ro, limit=5, ev_limit=8)
    assert all(5 <= n < 10 for n in res.evidence_counts.values())
    stmts = res.statements()
    assert all(5 < len(s.evidence) <= 8 for s in stmts)
Пример #2
0
 def dump(self, continuing=False):
     if self.use_principal:
         ro = get_db(self.db_label)
     else:
         ro = get_ro(self.db_label)
     s3_path = self.get_s3_path()
     dump_sif(s3_path, ro=ro)
Пример #3
0
def test_evidence_filtering_trios():
    ro = get_ro('primary')
    q1 = HasAgent('TP53')
    q_list = [
        ~HasOnlySource('medscan'),
        HasSources(['reach', 'sparser']),
        HasDatabases(),
        HasReadings(),
        FromMeshIds(['D001943'])
    ]
    for q2, q3, q4 in combinations(q_list, 3):
        query = q1 | q2 | q3 | q4
        ev_filter = q2.ev_filter() & q3.ev_filter() & q4.ev_filter()
        query.get_statements(ro,
                             limit=2,
                             ev_limit=5,
                             evidence_filter=ev_filter)

        ev_filter = q2.ev_filter() | q3.ev_filter() | q4.ev_filter()
        query.get_statements(ro,
                             limit=2,
                             ev_limit=5,
                             evidence_filter=ev_filter)

    for q2, q3, q4 in permutations(q_list, 3):
        query = q1 | q2 | q3 | q4
        ev_filter = q2.ev_filter() & q3.ev_filter() | q4.ev_filter()
        query.get_statements(ro,
                             limit=2,
                             ev_limit=5,
                             evidence_filter=ev_filter)
Пример #4
0
def test_evidence_bounds_basic_case():
    ro = get_ro("primary")

    query = HasAgent('MEK') & HasAgent('ERK') \
        & HasEvidenceBound(["<= 10", " > 5"])
    res = query.get_hashes(ro)
    assert all(5 < c <= 10 for c in res.evidence_counts.values())
Пример #5
0
def test_has_hash():
    ro = get_ro('primary')
    hashes = {h for h, in ro.session.query(ro.SourceMeta.mk_hash).limit(10)}
    q = HasHash(hashes)
    res = q.get_statements(ro, limit=5, ev_limit=8)
    assert set(res.results.keys()) < hashes
    assert set(res.results.keys()) == set(res.source_counts.keys())
Пример #6
0
def test_has_agent_namespace_only():
    ro = get_ro('primary')
    query = HasAgent(namespace='HGNC') & HasType(["Inhibition"])
    res = query.get_statements(ro, limit=100)
    stmts = res.statements()
    assert len(stmts) == 100
    assert all(
        any('HGNC' in ag.db_refs for ag in s.agent_list()) for s in stmts)
Пример #7
0
def load_readonly_dump(db_label, ro_label, dump_file):
    principal_db = get_db(db_label)
    readonly_db = get_ro(ro_label)
    logger.info("Using dump_file = \"%s\"." % dump_file)
    logger.info("%s - Beginning upload of content (est. ~30 minutes)" %
                datetime.now())
    with ReadonlyTransferEnv(principal_db, readonly_db):
        readonly_db.load_dump(dump_file)
Пример #8
0
def test_evidence_bounds_null_cases():
    ro = get_ro('primary')
    query = HasAgent('MEK') & HasEvidenceBound(["< 1"])
    res = query.get_hashes(ro, limit=10)
    assert len(res.results) == 0

    query = HasAgent('MEK') & HasEvidenceBound(["== 0"])
    res = query.get_hashes(ro, limit=10)
    assert len(res.results) == 0
Пример #9
0
def test_get_agents():
    ro = get_ro('primary')
    query = HasAgent('TP53')
    res = query.get_agents(ro, limit=10)
    assert isinstance(res, QueryResult)
    assert len(res.results) <= 10, len(res.results)
    js = res.json()
    assert 'results' in js
    assert len(js['results']) == len(res.results)
Пример #10
0
def load_readonly(from_dump):
    """Load the readonly database with readonly schema dump."""
    start = Start.from_date(from_dump)
    dump_file = Readonly.from_list(start.manifest).get_s3_path()
    if not dump_file:
        print(f"ERROR: No readonly dump for {start.date_stamp}")
        return
    load_readonly_dump(get_db('primary', protected=True),
                       get_ro('primary', protected=False), dump_file)
Пример #11
0
def test_from_papers():
    ro = get_ro('primary')
    pmid = '27014235'
    q = FromPapers([('pmid', pmid)])
    res = q.get_statements(ro, limit=5)
    assert res.statements()
    assert all(
        any(ev.text_refs.get('PMID') == pmid for ev in s.evidence)
        for s in res.statements())
Пример #12
0
 def dump(self, continuing=False):
     if self.use_principal:
         ro = get_db(self.db_label)
     else:
         ro = get_ro(self.db_label)
     query_res = ro.session.query(ro.FastRawPaLink.pa_json.distinct())
     json_list = [json.loads(js[0]) for js in query_res.all()]
     s3 = boto3.client('s3')
     s3.put_object(Body=json.dumps(json_list), **self.get_s3_path().kw())
Пример #13
0
def test_evidence_count_is_10():
    ro = get_ro('primary')
    query = HasAgent('TP53') - HasOnlySource('medscan')
    res = query.get_statements(ro, limit=2, ev_limit=10)
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    assert all(len(s.evidence) <= 10 for s in stmts)
    assert res.returned_evidence == 20
    assert sum(res.evidence_counts.values()) > 20
Пример #14
0
    def dump(self, continuing=False):
        if self.use_principal:
            ro = get_db(self.db_label)
        else:
            ro = get_ro(self.db_label)

        q = ro.select_all([ro.MeshMeta.mk_hash, ro.MeshMeta.mesh_num])

        s3 = boto3.client('s3')
        s3.put_object(Body=pickle.dumps(q.all()), **self.get_s3_path().kw())
Пример #15
0
def test_has_sources():
    ro = get_ro('primary')
    q = HasSources(['reach', 'sparser'])
    res = q.get_statements(ro, limit=5, ev_limit=8)
    assert len(res.results) == 5
    stmts = res.statements()
    res_json = res.json()
    assert 'results' in res_json
    assert len(stmts) == len(res.results)
    assert all(sc[r] > 0 for sc in res.source_counts.values()
               for r in ['reach', 'sparser'])
Пример #16
0
def test_evidence_count_is_none():
    ro = get_ro('primary')
    query = HasAgent('TP53') - HasOnlySource('medscan')
    res = query.get_statements(ro, limit=2)
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    ev_list = stmts[0].evidence
    assert len(ev_list) > 10
    assert all(
        len(s.evidence) == res.evidence_counts[s.get_hash()] for s in stmts)
    assert res.returned_evidence == sum(res.evidence_counts.values())
Пример #17
0
def test_has_only_source():
    ro = get_ro('primary')
    q = HasOnlySource('signor')
    res = q.get_statements(ro, limit=5, ev_limit=8)
    res_json = res.json()
    assert 'results' in res_json
    assert set(res.results.keys()) == set(res.source_counts.keys())
    stmts = res.statements()
    assert len(stmts) == len(res.results)
    assert all(src_cnt > 0 if src == 'signor' else src_cnt == 0
               for sc in res.source_counts.values()
               for src, src_cnt in sc.items())
Пример #18
0
def test_from_mesh():
    ro = get_ro('primary')
    q = FromMeshIds(['D001943'])
    res = q.get_statements(ro, limit=5, ev_limit=8)
    mm_entries = ro.select_all(
        [ro.MeshTermMeta.mk_hash, ro.MeshTermMeta.mesh_num],
        ro.MeshTermMeta.mk_hash.in_(set(res.results.keys())))
    mm_dict = defaultdict(list)
    for h, mn in mm_entries:
        mm_dict[h].append(mn)

    assert all(1943 in mn_list for mn_list in mm_dict.values())
Пример #19
0
def test_has_num_agents():
    ro = get_ro('primary')
    q = HasNumAgents((1, 2))
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    assert all(len(s.agent_list()) in (1, 2) for s in stmts)

    q = HasNumAgents((6, ))
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    assert all(
        sum([ag is not None for ag in s.agent_list()]) == 6 for s in stmts)
Пример #20
0
def test_has_databases():
    ro = get_ro('primary')
    q = HasDatabases()
    res = q.get_statements(ro, limit=5, ev_limit=8)
    for sc in res.source_counts.values():
        for src, cnt in sc.items():
            if src in SOURCE_GROUPS['databases'] and cnt > 0:
                break
        else:
            assert False, f"No databases found in: {sc}"
    assert set(res.results.keys()) == set(res.source_counts.keys())
    stmts = res.statements()
    assert len(stmts) == len(res.results)
Пример #21
0
def _check_belief_sorted_result(query):
    ro = get_ro('primary')

    # Test `get_statements`
    res = query.get_statements(ro, sort_by='belief', limit=50, ev_limit=3)
    stmts = res.statements()
    assert len(res.belief_scores) == 50
    assert len(stmts) == 50
    assert all(len(s.evidence) <= 3 for s in stmts)
    assert all(0 <= s2.belief <= s1.belief <= 1
               for s1, s2 in zip(stmts[:-1], stmts[1:]))

    # Test `get_hashes`
    res = query.get_hashes(ro, sort_by='belief', limit=500)
    assert len(res.belief_scores) <= 500, len(res.belief_scores)
    assert len(res.results) == len(res.belief_scores)
    beliefs = list(res.belief_scores.values())
    assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:]))

    # Test `get_agents`
    res = query.get_agents(ro, limit=500, sort_by='belief', with_hashes=True)
    assert len(res.belief_scores) <= 500
    assert len(res.belief_scores) == len(res.results)
    beliefs = list(res.belief_scores.values())
    assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:]))

    # Test AgentJsonExpander First level.
    first_res = next(iter(res.results.values()))
    exp = AgentJsonExpander(first_res['agents'], hashes=first_res['hashes'])
    res = exp.expand(ro, sort_by='belief')
    assert len(res.belief_scores) == len(res.results)
    assert all(e['agents'] == first_res['agents']
               and set(e['hashes']) <= set(first_res['hashes'])
               for e in res.results.values())
    beliefs = list(res.belief_scores.values())
    assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:]))

    # Test AgentJsonExpander Second level.
    first_res = next(iter(res.results.values()))
    exp2 = AgentJsonExpander(first_res['agents'],
                             stmt_type=first_res['type'],
                             hashes=first_res['hashes'])
    res = exp2.expand(ro, sort_by='belief')
    assert len(res.belief_scores) == len(res.results.values())
    assert all(
        e['agents'] == first_res['agents'] and e['type'] == first_res['type']
        and e['hash'] in first_res['hashes'] for e in res.results.values())
    beliefs = list(res.belief_scores.values())
    assert all(b1 >= b2 for b1, b2 in zip(beliefs[:-1], beliefs[1:]))
Пример #22
0
def test_real_world_examples():
    ro = get_ro('primary')
    query = (HasAgent('MEK', namespace='FPLX', role='SUBJECT')
             & HasAgent('ERK', namespace='FPLX', role='OBJECT')
             & HasType(['Phosphorylation']) - HasOnlySource('medscan'))
    ev_filter = HasOnlySource('medscan').invert().ev_filter()
    res = query.get_statements(ro,
                               limit=100,
                               ev_limit=10,
                               evidence_filter=ev_filter)
    assert len(res.results)

    query = HasAgent('RAS') & HasAgent('RAF') & HasNumAgents((3, ))
    res = query.get_statements(ro, limit=100, ev_limit=10)
    stmts = res.statements()
    assert len(stmts)
Пример #23
0
def test_evidence_filtering_has_database():
    ro = get_ro('primary')
    q1 = HasAgent('TP53')
    q2 = HasDatabases()
    query = q1 & q2
    res = query.get_statements(ro,
                               limit=2,
                               ev_limit=None,
                               evidence_filter=q2.ev_filter())
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    assert all(ev.source_api not in SOURCE_GROUPS['reading'] for s in stmts
               for ev in s.evidence)
    js = res.json()
    assert 'results' in js
    assert len(js['results']) == len(stmts)
Пример #24
0
def test_evidence_filtering_mesh():
    ro = get_ro('primary')
    q1 = HasAgent('TP53')
    q2 = FromMeshIds(['D001943'])
    query = q1 & q2
    res = query.get_statements(ro,
                               limit=2,
                               ev_limit=None,
                               evidence_filter=q2.ev_filter())
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    assert all(
        len(s.evidence) < res.evidence_counts[s.get_hash()] for s in stmts)
    js = res.json()
    assert 'results' in js
    assert len(js['results']) == len(stmts)
Пример #25
0
def test_has_type():
    ro = get_ro('primary')
    q = HasType(['Phosphorylation', 'Activation'])
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    assert all(s.__class__.__name__ in ('Phosphorylation', 'Activation')
               for s in stmts)

    type_list = ['SelfModification', 'RegulateAmount', 'Translocation']
    q = HasType(type_list, include_subclasses=True)
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    types = {
        t
        for bt in (get_statement_by_name(n) for n in type_list)
        for t in [bt] + get_all_descendants(bt)
    }
    assert all(type(s) in types for s in stmts)
Пример #26
0
def run_all(continuing, delete_existing, load_only, dump_only,
            no_redirect_to_principal):
    """Generate new dumps and list existing dumps."""
    from indra_db import get_ro

    # Check if the readonly db handle is needed
    if not dump_only:
        ro_manager = get_ro('primary', protected=False)
    else:
        ro_manager = None

    dump(get_db('primary', protected=False),
         ro_manager,
         delete_existing,
         continuing,
         load_only,
         dump_only,
         no_redirect_to_principal=no_redirect_to_principal)
Пример #27
0
def test_evidence_filtering_has_only_source():
    ro = get_ro('primary')
    q1 = HasAgent('TP53')
    q2 = ~HasOnlySource('medscan')
    query = q1 & q2
    res = query.get_statements(ro,
                               limit=2,
                               ev_limit=None,
                               evidence_filter=q2.ev_filter())
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    assert not any(
        ev.text_refs.get('READER') == 'medscan' for s in stmts
        for ev in s.evidence)
    js = res.json()
    assert 'results' in js
    assert len(js['results']) == len(stmts)
Пример #28
0
def test_evidence_filtering_has_source():
    ro = get_ro('primary')
    q1 = HasAgent('TP53')
    q2 = HasSources(['reach', 'sparser'])
    query = q1 & q2
    res = query.get_statements(ro,
                               limit=2,
                               ev_limit=None,
                               evidence_filter=q2.ev_filter())
    assert isinstance(res, StatementQueryResult)
    stmts = res.statements()
    assert len(stmts) == 2
    assert all(
        ev.text_refs.get('READER') in ['REACH', 'SPARSER'] for s in stmts
        for ev in s.evidence)
    js = res.json()
    assert 'results' in js
    assert len(js['results']) == len(stmts)
Пример #29
0
    def _choose_db(cls, **kwargs):
        # If we don't need a database handle, just keep on walking.
        if not cls.db_required:
            return None

        # If a database is required, the type must be specified.
        assert len(cls.db_options), \
            "If db is required, db_options are too."

        # If a handle was given, use it, regardless of other inputs, if it
        # is at all permissible.
        if 'ro' in kwargs and 'db' in kwargs:
            raise ValueError("Only one database handle may be defined at"
                             "a time.")
        if 'ro' in kwargs:
            if 'readonly' in cls.db_options:
                return kwargs['ro']
            else:
                raise ValueError("Cannot use readonly database, but ro "
                                 "handle was given.")
        elif 'db' in kwargs:
            if 'principal' in cls.db_options:
                return kwargs['db']
            else:
                raise ValueError("Cannot use principal database, but db "
                                 "handle was given.")

        # Otherwise, read or guess the database type, and make a new instance.
        if len(cls.db_options) == 1:
            db_opt = cls.db_options[0]
        else:
            if 'use_principal' in kwargs:
                if kwargs['use_principal']:
                    db_opt = 'principal'
                else:
                    db_opt = 'readonly'
            else:
                raise ValueError("No database specified.")
        if db_opt == 'principal':
            return get_db('primary', protected=False)
        else:  # if db_opt == 'readonly'
            return get_ro('primary', protected=False)
Пример #30
0
def test_has_agent():
    ro = get_ro('primary')
    q = HasAgent('RAS')
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    assert all('RAS' in [ag.name for ag in s.agent_list() if ag is not None]
               for s in stmts)

    q = HasAgent('MEK', namespace='FPLX', role='SUBJECT')
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    assert all('MEK' == s.agent_list(deep_sorted=True)[0].db_refs['FPLX'] or (
        isinstance(s, Complex)
        and 'MEK' in {a.db_refs.get('FPLX')
                      for a in s.agent_list()}) for s in stmts)

    q = HasAgent('CHEBI:63637', namespace='CHEBI', agent_num=3)
    res = q.get_statements(ro, limit=5, ev_limit=8)
    stmts = res.statements()
    for s in stmts:
        ag = s.agent_list(deep_sorted=True)[3]
        assert ag.name == 'vemurafenib'
        assert ag.db_refs['CHEBI'] == 'CHEBI:63637'