Пример #1
0
def test_gilda_disambiguation():
    gm.gilda_mode = 'web'
    er1 = Agent('NDR1', db_refs={'TEXT': 'NDR1'})
    pmid1 = '18362890'
    stmt1 = Phosphorylation(None, er1,
                            evidence=[Evidence(pmid=pmid1,
                                               text_refs={'PMID': pmid1})])

    er2 = Agent('NDR1', db_refs={'TEXT': 'NDR1'})
    pmid2 = '16832411'
    stmt2 = Inhibition(None, er2,
                       evidence=[Evidence(pmid=pmid2,
                                          text_refs={'PMID': pmid2})])
    mapped_stmts1 = gm.map_stmts([stmt1])
    assert mapped_stmts1[0].sub.name == 'STK38', mapped_stmts1[0].sub.name
    assert mapped_stmts1[0].sub.db_refs['HGNC'] == '17847', \
        mapped_stmts1[0].sub.db_refs
    assert mapped_stmts1[0].sub.db_refs['UP'] == 'Q15208', \
        mapped_stmts1[0].sub.db_refs

    mapped_stmts2 = gm.map_stmts([stmt2])
    assert mapped_stmts2[0].obj.name == 'NDRG1', \
        mapped_stmts2[0].obj.name
    assert mapped_stmts2[0].obj.db_refs['HGNC'] == '7679', \
        mapped_stmts2[0].obj.db_refs
    assert mapped_stmts2[0].obj.db_refs['UP'] == 'Q92597', \
        mapped_stmts2[0].obj.db_refs

    annotations = mapped_stmts2[0].evidence[0].annotations
    assert len(annotations['agents']['gilda'][1]) == 2, \
        annotations
    assert annotations['agents']['gilda'][0] is None
    assert annotations['agents']['gilda'][1] is not None
Пример #2
0
def test_adeft_mapping_non_pos():
    er = Agent('ER', db_refs={'TEXT': 'ER'})
    # This is an exact definition of a pos_label entry so we
    # expect that it will be applied as a grounding even though the
    # Adeft model has low precision for this label.
    ev = Evidence(text='estradiol (ER)')
    stmt = Phosphorylation(None, er, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'CHEBI' in mapped_stmt.sub.db_refs, mapped_stmt.evidence
    # This one is not an exact definition so we expect the grounding to
    # be stripped out.
    ev = Evidence(text='Estradiol is one of the three estrogen hormones'
                  'naturally produced in the body.')
    stmt = Phosphorylation(None, er, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'CHEBI' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence
    # This is a non-positive label, and we expect it to be stripped out
    # whether it's an exact definition or not.
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    ev = Evidence(text='physical component summary (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
    ev = Evidence(text='physical component summary')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
Пример #3
0
def test_adeft_mapping():
    er1 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid1 = '30775882'
    stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1,
                                                          text_refs={'PMID':
                                                                     pmid1})])

    er2 = Agent('ER', db_refs={'TEXT': 'ER'})
    pmid2 = '28369137'
    stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2,
                                                     text_refs={'PMID':
                                                                pmid2})])

    mapped_stmts1 = gm.map_stmts([stmt1])
    assert mapped_stmts1[0].sub.name == 'ESR', \
        mapped_stmts1[0].sub.name
    assert mapped_stmts1[0].sub.db_refs['FPLX'] == 'ESR', \
        mapped_stmts1[0].sub.db_refs

    mapped_stmts2 = gm.map_stmts([stmt2])
    assert mapped_stmts2[0].obj.name == 'endoplasmic reticulum', \
        mapped_stmts2[0].obj.name
    assert mapped_stmts2[0].obj.db_refs['GO'] == 'GO:0005783', \
        mapped_stmts2[0].obj.db_refs

    annotations = mapped_stmts2[0].evidence[0].annotations
    assert 'GO:GO:0005783' in annotations['agents']['adeft'][1]
Пример #4
0
def test_adeft_mapping_non_pos():
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS'})
    # This is an exact definition of a non-positive label entry so we
    # expect that it will be applied as a grounding
    ev = Evidence(text='post concussive symptoms (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence

    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    ev = Evidence(text='physical component summary')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, \
        (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
Пример #5
0
def test_adeft_mapping_non_pos():
    pcs = Agent('PCS', db_refs={'TEXT': 'PCS'})
    # This is an exact definition of a non-positive label entry so we
    # expect that it will be applied as a grounding
    ev = Evidence(text='post concussive symptoms (PCS)')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence

    pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'})
    # There a non-positive entry is implied but not exactly, so
    # the prior grounding will be removed.
    ev = Evidence(text='post symptoms concussive concussion')
    stmt = Phosphorylation(None, pcs, evidence=[ev])
    mapped_stmt = gm.map_stmts([stmt])[0]
    assert 'MESH' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence
Пример #6
0
def test_misgrounding():
    baz1 = Agent('ZNF214', db_refs={'TEXT': 'baz1', 'HGNC': '13006'})
    stmt = Phosphorylation(None, baz1)
    stmts = gm.map_stmts([stmt])
    stmt = stmts[0]
    assert len(stmt.sub.db_refs) == 1, stmt.sub.db_refs
    assert stmt.sub.db_refs['TEXT'] == 'baz1'
    assert stmt.sub.name == 'baz1'
Пример #7
0
def test_map_agent():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    p_erk = Agent('P-ERK', db_refs={'TEXT': 'p-ERK'})
    stmt = Complex([erk, p_erk])
    mapped_stmts = gm.map_stmts([stmt])
    mapped_ag = mapped_stmts[0].members[1]
    assert mapped_ag.name == 'ERK'
    assert mapped_ag.db_refs.get('FPLX') == 'ERK'
Пример #8
0
def test_simple_mapping():
    akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'})
    stmt = Phosphorylation(None, akt)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_akt = mapped_stmts[0].sub
    assert mapped_akt.db_refs['TEXT'] == 'Akt'
    assert mapped_akt.db_refs['FPLX'] == 'AKT'
Пример #9
0
def test_map_standardize_up_hgnc():
    a1 = Agent('MAPK1', db_refs={'HGNC': '6871'})
    a2 = Agent('MAPK1', db_refs={'UP': 'P28482'})
    stmt = Phosphorylation(a1, a2)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    st = mapped_stmts[0]
    assert st.enz.db_refs['HGNC'] == st.sub.db_refs['HGNC']
    assert st.enz.db_refs['UP'] == st.sub.db_refs['UP']
Пример #10
0
def test_in_place_overwrite_of_gm():
    """Make sure HGNC lookups don't modify the original grounding map by adding
    keys."""
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    gmap_after_mapping = gm.grounding_map
    assert set(gmap_after_mapping['ERK1'].keys()) == set(['TEXT', 'UP'])
Пример #11
0
def test_up_and_mismatched_hgnc():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482', 'HGNC': '6877'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert mapped_stmts[0].sub.db_refs['HGNC'] == '6877', \
        mapped_stmts[0].sub.db_refs
    assert mapped_stmts[0].sub.db_refs['UP'] == 'P27361', \
        mapped_stmts[0].sub.db_refs
Пример #12
0
def test_up_with_no_gene_name_with_hgnc_sym():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'A0K5Q6', 'HGNC': '6871'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert mapped_stmts[0].sub.db_refs['HGNC'] == '6871', \
        mapped_stmts[0].sub.db_refs
    assert mapped_stmts[0].sub.db_refs['UP'] == 'P28482', \
        mapped_stmts[0].sub.db_refs
Пример #13
0
def test_map_standardize_chebi_hmdb():
    a1 = Agent('X', db_refs={'HMDB': 'HMDB0000122'})
    a2 = Agent('Y', db_refs={'CHEBI': 'CHEBI:15903'})
    stmt = Phosphorylation(a1, a2)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    st = mapped_stmts[0]
    assert st.enz.db_refs['CHEBI'] == st.sub.db_refs['CHEBI'], \
        (st.enz.db_refs, st.sub.db_refs)
    assert st.enz.name == 'beta-D-glucose', st.enz
    assert st.sub.name == 'beta-D-glucose', st.sub
Пример #14
0
def test_hgnc_but_not_up():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'HGNC': '6871'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_erk = mapped_stmts[0].sub
    assert mapped_erk.name == 'MAPK1'
    assert mapped_erk.db_refs['TEXT'] == 'ERK1'
    assert mapped_erk.db_refs['HGNC'] == '6871'
    assert mapped_erk.db_refs['UP'] == 'P28482'
Пример #15
0
def test_text_and_norm_text():
    gm.gilda_mode = 'local'

    # We should filter out ignores in both TEXT and TEXT_NORM
    ag = Agent('x', db_refs={'TEXT': 'XREF_BIBR', 'TEXT_NORM': 'ERK'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert not res
    ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XREF_BIBR'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert not res

    # We should disambiguate based on both TEXT and TEXT_NORM
    ag = Agent('x', db_refs={'TEXT': 'AA', 'TEXT_NORM': 'XXX'},)
    stmt = Phosphorylation(None, ag,
                           evidence=Evidence(text='Arachidonic acid (AA)'))
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'arachidonic acid', res[0]
    ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'AA'})
    stmt = Phosphorylation(None, ag,
                           evidence=Evidence(text='Arachidonic acid (AA)'))
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'arachidonic acid', res[0]

    ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'ERK'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'ERK', res[0]

    ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XXX'})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'ERK', res[0]
Пример #16
0
def test_map_standardize_chebi_pc():
    a1 = Agent('X', db_refs={'PUBCHEM': '42611257'})
    a2 = Agent('Y', db_refs={'CHEBI': 'CHEBI:63637'})
    stmt = Phosphorylation(a1, a2)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    st = mapped_stmts[0]
    assert st.enz.db_refs['PUBCHEM'] == st.sub.db_refs['PUBCHEM'], \
        (st.enz.db_refs, st.sub.db_refs)
    assert st.enz.db_refs['CHEBI'] == st.sub.db_refs['CHEBI'], \
        (st.enz.db_refs, st.sub.db_refs)
    assert st.enz.name == 'vemurafenib'
    assert st.sub.name == 'vemurafenib'
Пример #17
0
def test_up_id_with_no_gene_name():
    """Expect no HGNC entry; no error raised."""
    no_gn = Agent('NoGNname', db_refs={'TEXT': 'NoGN'})
    stmt = Phosphorylation(None, no_gn)
    g_map = {'NoGN': {'TEXT': 'NoGN', 'UP': 'A0K5Q6'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_ag = mapped_stmts[0].sub
    assert mapped_ag.name == 'NoGNname'
    assert mapped_ag.db_refs['TEXT'] == 'NoGN'
    assert mapped_ag.db_refs.get('HGNC') is None
    assert mapped_ag.db_refs['UP'] == 'A0K5Q6'
Пример #18
0
def test_up_id_with_no_hgnc_id():
    """Non human protein"""
    gag = Agent('Gag', db_refs={'TEXT': 'Gag'})
    stmt = Phosphorylation(None, gag)
    g_map = {'Gag': {'TEXT': 'Gag', 'UP': 'P04585'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_gag = mapped_stmts[0].sub
    assert mapped_gag.name == 'gag-pol'
    assert mapped_gag.db_refs['TEXT'] == 'Gag'
    assert mapped_gag.db_refs.get('HGNC') is None
    assert mapped_gag.db_refs['UP'] == 'P04585'
Пример #19
0
def test_map_entry_hgnc_and_up():
    """Make sure that HGNC symbol is replaced with HGNC ID when grounding map
    includes both UP ID and HGNC symbol."""
    rela = Agent('NF-kappaB p65', db_refs={'TEXT': 'NF-kappaB p65'})
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(erk, rela)
    g_map = {'NF-kappaB p65': {'TEXT': 'NF-kappaB p65', 'UP': 'Q04206',
                               'HGNC': '9955'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    ms = mapped_stmts[0]
    assert ms.sub.db_refs == {'TEXT': 'NF-kappaB p65', 'UP': 'Q04206',
                              'HGNC': '9955'}
Пример #20
0
def test_gilda_disambiguation_local():
    gm.gilda_mode = 'local'
    er1 = Agent('NDR1', db_refs={'TEXT': 'NDR1'})
    pmid1 = '18362890'
    stmt1 = Phosphorylation(
        None, er1, evidence=[Evidence(pmid=pmid1, text_refs={'PMID': pmid1})])
    mapped_stmts1 = gm.map_stmts([stmt1])
    annotations = mapped_stmts1[0].evidence[0].annotations
    assert annotations['agents']['gilda'][0] is None
    assert annotations['agents']['gilda'][1] is not None
    assert len(annotations['agents']['gilda'][1]) == 2, \
        annotations
    # This is to make sure the to_json of the ScoredMatches works
    assert annotations['agents']['gilda'][1][0]['term']['db'] == 'HGNC'
Пример #21
0
def test_grounding_map_gilda_priority():
    gm.gilda_mode = 'web'
    fetal_bovine_serum = Agent('FBS', db_refs={'TEXT': 'FBS'})
    pmid = '28536624'
    stmt = Phosphorylation(None, fetal_bovine_serum,
                           evidence=[Evidence(pmid=pmid,
                                              text_refs={'PMID': pmid})])
    mapped_stmts = gm.map_stmts([stmt])
    annotations = mapped_stmts[0].evidence[0].annotations
    # agents should not be in annotations if gilda is run. Second condition
    # added as future proofing in case some future change causes this mapping
    # to add agent annotations in the future.
    assert 'agents' not in annotations or \
        'gilda' not in annotations['agents']
Пример #22
0
def test_bound_condition_mapping_multi():
    # Test with multiple agents
    akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'})
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    akt.bound_conditions = [BoundCondition(erk)]
    stmt = Phosphorylation(akt, erk)
    mapped_stmts = gm.map_stmts([stmt])
    s = mapped_stmts[0]
    mapped_akt = mapped_stmts[0].enz
    mapped_erk1 = mapped_akt.bound_conditions[0].agent
    mapped_erk2 = mapped_stmts[0].sub

    assert mapped_akt.db_refs['TEXT'] == 'Akt'
    assert mapped_akt.db_refs['FPLX'] == 'AKT'

    for e in (mapped_erk1, mapped_erk2):
        assert e.db_refs['TEXT'] == 'ERK1'
        assert e.db_refs['HGNC'] == '6877'
        assert e.db_refs['UP'] == 'P27361'
Пример #23
0
def test_bound_condition_mapping_agent_json():
    # Test with agent/json mapping
    akt = Agent('pkbA', db_refs={'TEXT': 'p-Akt', 'UP': 'XXXXXX'})
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    akt.bound_conditions = [BoundCondition(erk)]
    stmt = Phosphorylation(None, akt)

    mapped_stmts = gm.map_stmts([stmt])

    s = mapped_stmts[0]
    mapped_akt = mapped_stmts[0].sub
    mapped_erk = mapped_akt.bound_conditions[0].agent

    #assert mapped_akt.db_refs['TEXT'] == 'p-AKT', mapped_akt.db_refs
    assert mapped_akt.db_refs['FPLX'] == 'AKT', mapped_akt.db_refs

    assert mapped_erk.db_refs['TEXT'] == 'ERK1'
    assert mapped_erk.db_refs['HGNC'] == '6877'
    assert mapped_erk.db_refs['UP'] == 'P27361'
Пример #24
0
def test_bound_condition_mapping():
    # Verify that the grounding mapper grounds the agents within a bound
    # condition
    akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'})
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    akt.bound_conditions = [BoundCondition(erk)]

    stmt = Phosphorylation(None, akt)

    mapped_stmts = gm.map_stmts([stmt])

    s = mapped_stmts[0]
    mapped_akt = mapped_stmts[0].sub
    mapped_erk = mapped_akt.bound_conditions[0].agent

    assert mapped_akt.db_refs['TEXT'] == 'Akt'
    assert mapped_akt.db_refs['FPLX'] == 'AKT'

    assert mapped_erk.db_refs['TEXT'] == 'ERK1'
    assert mapped_erk.db_refs['HGNC'] == '6877'
    assert mapped_erk.db_refs['UP'] == 'P27361'
Пример #25
0
def test_none_text_corner_case():
    ag = Agent('x', db_refs={'TEXT': None, 'TEXT_NORM': None})
    stmt = Phosphorylation(None, ag)
    res = gm.map_stmts([stmt])
    assert res[0].sub.name == 'x', res[0]
Пример #26
0
def test_ignore():
    agent = Agent('FA', db_refs={'TEXT': 'FA'})
    stmt = Phosphorylation(None, agent)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 0
Пример #27
0
def test_hgnc_sym_with_no_id():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'HGNC': 'foobar'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])