def test_text_and_norm_text(): gm.gilda_mode = 'local' # We should filter out ignores in both TEXT and TEXT_NORM ag = Agent('x', db_refs={'TEXT': 'XREF_BIBR', 'TEXT_NORM': 'ERK'}) stmt = Phosphorylation(None, ag) res = gm.map_stmts([stmt]) assert not res ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XREF_BIBR'}) stmt = Phosphorylation(None, ag) res = gm.map_stmts([stmt]) assert not res # We should disambiguate based on both TEXT and TEXT_NORM ag = Agent('x', db_refs={'TEXT': 'AA', 'TEXT_NORM': 'XXX'},) stmt = Phosphorylation(None, ag, evidence=Evidence(text='Arachidonic acid (AA)')) res = gm.map_stmts([stmt]) assert res[0].sub.name == 'arachidonic acid', res[0] ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'AA'}) stmt = Phosphorylation(None, ag, evidence=Evidence(text='Arachidonic acid (AA)')) res = gm.map_stmts([stmt]) assert res[0].sub.name == 'arachidonic acid', res[0] ag = Agent('x', db_refs={'TEXT': 'XXX', 'TEXT_NORM': 'ERK'}) stmt = Phosphorylation(None, ag) res = gm.map_stmts([stmt]) assert res[0].sub.name == 'ERK', res[0] ag = Agent('x', db_refs={'TEXT': 'ERK', 'TEXT_NORM': 'XXX'}) stmt = Phosphorylation(None, ag) res = gm.map_stmts([stmt]) assert res[0].sub.name == 'ERK', res[0]
def test_association_refinement(): health = 'UN/entities/human/health' food = 'UN/entities/human/food' food_security = 'UN/entities/human/food/food_security' eh = Event(Concept('health', db_refs={'UN': [(health, 1.0)]})) ef = Event(Concept('food', db_refs={'UN': [(food, 1.0)]})) efs = Event( Concept('food security', db_refs={'UN': [(food_security, 1.0)]})) st1 = Association([eh, ef], evidence=[Evidence(source_api='eidos1')]) st2 = Association([ef, eh], evidence=[Evidence(source_api='eidos2')]) st3 = Association([eh, efs], evidence=[Evidence(source_api='eidos3')]) st4 = Association([ef, efs], evidence=[Evidence(source_api='eidos4')]) eidos_ont = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../sources/eidos/eidos_ontology.rdf') hm = HierarchyManager(eidos_ont, True, True) hierarchies = {'entity': hm} pa = Preassembler(hierarchies, [st1, st2, st3, st4]) unique_stmts = pa.combine_duplicates() # debugging assert len(unique_stmts) == 3 rel_stmts = pa.combine_related() assert len(rel_stmts) == 2 eh_efs_stmt = [ st for st in rel_stmts if (st.members[0].concept.name in {'health', 'food security'} and st.members[1].concept.name in {'health', 'food security'}) ][0] assert len(eh_efs_stmt.supported_by) == 1 assert (eh_efs_stmt.supported_by[0].members[0].concept.name in {'food', 'health'}) assert (eh_efs_stmt.supported_by[0].members[1].concept.name in {'food', 'health'})
def test_gilda_disambiguation(): gm.gilda_mode = 'web' er1 = Agent('NDR1', db_refs={'TEXT': 'NDR1'}) pmid1 = '18362890' stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1, text_refs={'PMID': pmid1})]) er2 = Agent('NDR1', db_refs={'TEXT': 'NDR1'}) pmid2 = '16832411' stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2, text_refs={'PMID': pmid2})]) mapped_stmts1 = gm.map_stmts([stmt1]) assert mapped_stmts1[0].sub.name == 'STK38', mapped_stmts1[0].sub.name assert mapped_stmts1[0].sub.db_refs['HGNC'] == '17847', \ mapped_stmts1[0].sub.db_refs assert mapped_stmts1[0].sub.db_refs['UP'] == 'Q15208', \ mapped_stmts1[0].sub.db_refs mapped_stmts2 = gm.map_stmts([stmt2]) assert mapped_stmts2[0].obj.name == 'NDRG1', \ mapped_stmts2[0].obj.name assert mapped_stmts2[0].obj.db_refs['HGNC'] == '7679', \ mapped_stmts2[0].obj.db_refs assert mapped_stmts2[0].obj.db_refs['UP'] == 'Q92597', \ mapped_stmts2[0].obj.db_refs annotations = mapped_stmts2[0].evidence[0].annotations assert len(annotations['agents']['gilda'][1]) == 2, \ annotations assert annotations['agents']['gilda'][0] is None assert annotations['agents']['gilda'][1] is not None
def test_adeft_mapping_non_pos(): er = Agent('ER', db_refs={'TEXT': 'ER'}) # This is an exact definition of a pos_label entry so we # expect that it will be applied as a grounding even though the # Adeft model has low precision for this label. ev = Evidence(text='estradiol (ER)') stmt = Phosphorylation(None, er, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'CHEBI' in mapped_stmt.sub.db_refs, mapped_stmt.evidence # This one is not an exact definition so we expect the grounding to # be stripped out. ev = Evidence(text='Estradiol is one of the three estrogen hormones' 'naturally produced in the body.') stmt = Phosphorylation(None, er, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'CHEBI' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence # This is a non-positive label, and we expect it to be stripped out # whether it's an exact definition or not. pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'}) ev = Evidence(text='physical component summary (PCS)') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' not in mapped_stmt.sub.db_refs, \ (mapped_stmt.sub.db_refs, mapped_stmt.evidence) ev = Evidence(text='physical component summary') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' not in mapped_stmt.sub.db_refs, \ (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
def add_raw_database_statements(self, stmt_lists): """Add raw statementes that came from knowledge bases/databases.""" assert self.databases is not None if self.raw_statements is None: self.raw_statements = [] new_raw_statements = [] for dbidx, stmt_list in enumerate(stmt_lists): db_info = self.databases[dbidx] for stmt in stmt_list: ev = Evidence(db_info.source_api) stmt.evidence.append(ev) src_hash = ev.get_source_hash() raw_json = stmt.to_json() db_rs = self.db.RawStatements( db_info_id=db_info.id, json=json.dumps(raw_json).encode('utf-8'), type=raw_json['type'], uuid=stmt.uuid, batch_id=1, source_hash=src_hash, mk_hash=stmt.get_hash(), indra_version="test") self.raw_statements.append(db_rs) new_raw_statements.append(db_rs) self.db.session.add_all(new_raw_statements) self.db.session.commit() insert_raw_agents(self.db, 1, [s for slist in stmt_lists for s in slist])
def test_adeft_mapping(): er1 = Agent('ER', db_refs={'TEXT': 'ER'}) pmid1 = '30775882' stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1, text_refs={'PMID': pmid1})]) er2 = Agent('ER', db_refs={'TEXT': 'ER'}) pmid2 = '28369137' stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2, text_refs={'PMID': pmid2})]) mapped_stmts1 = gm.map_stmts([stmt1]) assert mapped_stmts1[0].sub.name == 'ESR', \ mapped_stmts1[0].sub.name assert mapped_stmts1[0].sub.db_refs['FPLX'] == 'ESR', \ mapped_stmts1[0].sub.db_refs mapped_stmts2 = gm.map_stmts([stmt2]) assert mapped_stmts2[0].obj.name == 'endoplasmic reticulum', \ mapped_stmts2[0].obj.name assert mapped_stmts2[0].obj.db_refs['GO'] == 'GO:0005783', \ mapped_stmts2[0].obj.db_refs annotations = mapped_stmts2[0].evidence[0].annotations assert 'GO:GO:0005783' in annotations['agents']['adeft'][1]
def test_adeft_mapping(): er1 = Agent('ER', db_refs={'TEXT': 'ER'}) pmid1 = '30775882' stmt1 = Phosphorylation(None, er1, evidence=[Evidence(pmid=pmid1, text_refs={'PMID': pmid1})]) er2 = Agent('ER', db_refs={'TEXT': 'ER'}) pmid2 = '28369137' stmt2 = Inhibition(None, er2, evidence=[Evidence(pmid=pmid2, text_refs={'PMID': pmid2})]) gm = GroundingMapper(default_grounding_map, default_agent_map) mapped_stmts1 = gm.map_agents([stmt1]) assert mapped_stmts1[0].sub.name == 'ESR1' assert mapped_stmts1[0].sub.db_refs['HGNC'] == '3467' assert mapped_stmts1[0].sub.db_refs['UP'] == 'P03372' mapped_stmts2 = gm.map_agents([stmt2]) assert mapped_stmts2[0].obj.name == 'Endoplasmic Reticulum' assert mapped_stmts2[0].obj.db_refs['GO'] == 'GO:0005783' annotations = mapped_stmts2[0].evidence[0].annotations assert 'GO:GO:0005783' in annotations['agents']['adeft'][1]
def fix_invalidities_evidence(ev: Evidence): """Fix invalidities of a single INDRA Evidence in place.""" for k, v in copy.deepcopy(ev.text_refs).items(): if v is None: ev.text_refs.pop(k, None) elif not k.isupper(): ev.text_refs.pop(k) ev.text_refs[k.upper()] = v if ev.pmid and not re.match(text_ref_patterns['PMID'], ev.pmid): ev.pmid = None if ev.text_refs.get('PMID') and not re.match(text_ref_patterns['PMID'], ev.text_refs['PMID']): ev.text_refs.pop('PMID', None) if ev.pmid is None and ev.text_refs.get('PMID') is not None: ev.pmid = ev.text_refs['PMID'] elif ev.text_refs.get('PMID') is None and ev.pmid is not None: ev.text_refs['PMID'] = ev.pmid if 'DOI' in ev.text_refs and not re.match(text_ref_patterns['DOI'], ev.text_refs['DOI']): ev.text_refs.pop('DOI', None) if 'PMC' in ev.text_refs and not re.match(text_ref_patterns['PMC'], ev.text_refs['PMC']): ev.text_refs.pop('PMC', None) if ev.context is not None: fix_invalidities_context(ev.context)
def test_flatten_evidence_hierarchy(): braf = Agent('BRAF') mek = Agent('MAP2K1') st1 = Phosphorylation(braf, mek, evidence=[Evidence(text='foo')]) st2 = Phosphorylation(braf, mek, 'S', '218', evidence=[Evidence(text='bar')]) pa = Preassembler(hierarchies, stmts=[st1, st2]) pa.combine_related() assert len(pa.related_stmts) == 1 flattened = flatten_evidence(pa.related_stmts) assert len(flattened) == 1 top_stmt = flattened[0] assert len(top_stmt.evidence) == 2 assert 'bar' in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert len(top_stmt.supported_by) == 1 supporting_stmt = top_stmt.supported_by[0] assert len(supporting_stmt.evidence) == 1 assert supporting_stmt.evidence[0].text == 'foo' supporting_stmt.evidence[0].text = 'changed_foo' assert supporting_stmt.evidence[0].text == 'changed_foo' assert 'changed_foo' not in [e.text for e in top_stmt.evidence] assert 'foo' in [e.text for e in top_stmt.evidence] assert {ev.annotations.get('support_type') for ev in top_stmt.evidence} \ == {'direct', 'supported_by'}
def test_model_json(): """Test the json structure and content of EmmaaModel.to_json() output""" indra_stmts = \ [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}), Agent('MAP2K1'), evidence=[Evidence(text='BRAF activates MAP2K1.')]), Activation(Agent('MAP2K1', activity=ActivityCondition('activity', True)), Agent('MAPK1'), evidence=[Evidence(text='Active MAP2K1 activates MAPK1.')]) ] st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1') emmaa_stmts = [ EmmaaStatement(stmt, datetime.datetime.now(), [st]) for stmt in indra_stmts ] config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }] } emmaa_model = EmmaaModel('test', config_dict) emmaa_model.add_statements(emmaa_stmts) emmaa_model_json = emmaa_model.to_json() # Test json structure assert emmaa_model_json['name'] == 'test' assert isinstance(emmaa_model_json['stmts'], list) assert emmaa_model_json['ndex_network'] == \ 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' # Test config assert emmaa_model_json['search_terms'][0]['type'] == 'gene' assert emmaa_model_json['search_terms'][0]['db_refs'] == {'HGNC': '20974'} # Test json statements assert 'BRAF activates MAP2K1.' == \ emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text'] assert 'BRAF activates MAP2K1.' == \ emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['text'] assert 'Active MAP2K1 activates MAPK1.' == \ emmaa_model_json['stmts'][1]['stmt']['evidence'][0]['text'] assert emmaa_model_json['stmts'][0]['stmt']['subj']['name'] == 'BRAF' assert emmaa_model_json['stmts'][1]['stmt']['subj']['name'] == 'MAP2K1' assert emmaa_model_json['stmts'][1]['stmt']['obj']['name'] == 'MAPK1' # Need hashes to be strings so that javascript can read them assert isinstance( emmaa_model_json['stmts'][0]['stmt']['evidence'][0]['source_hash'], str)
def test_combine_duplicates(): raf = Agent('RAF1') mek = Agent('MEK1') erk = Agent('ERK2') p1 = Phosphorylation(raf, mek, evidence=Evidence(text='foo')) p2 = Phosphorylation(raf, mek, evidence=Evidence(text='bar')) p3 = Phosphorylation(raf, mek, evidence=Evidence(text='baz')) p4 = Phosphorylation(raf, mek, evidence=Evidence(text='beep')) p5 = Phosphorylation(mek, erk, evidence=Evidence(text='foo2')) p6 = Dephosphorylation(mek, erk, evidence=Evidence(text='bar2')) p7 = Dephosphorylation(mek, erk, evidence=Evidence(text='baz2')) p8 = Dephosphorylation(mek, erk, evidence=Evidence(text='beep2')) p9 = Dephosphorylation(Agent('SRC'), Agent('KRAS'), evidence=Evidence(text='beep')) stmts = [p1, p2, p3, p4, p5, p6, p7, p8, p9] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_duplicates() # The statements come out sorted by their matches_key assert len(pa.unique_stmts) == 4, len(pa.unique_stmts) num_evs = [len(s.evidence) for s in pa.unique_stmts] assert pa.unique_stmts[0].matches(p6) # MEK dephos ERK assert num_evs[0] == 3, num_evs[0] assert pa.unique_stmts[1].matches(p9) # SRC dephos KRAS assert num_evs[1] == 1, num_evs[1] assert pa.unique_stmts[2].matches(p5) # MEK phos ERK assert num_evs[2] == 1, num_evs[2] assert pa.unique_stmts[3].matches(p1) # RAF phos MEK assert num_evs[3] == 4, num_evs[3]
def test_duplicates_copy(): src = Agent('SRC', db_refs={'HGNC': '11283'}) ras = Agent('RAS', db_refs={'FA': '03663'}) st1 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 1')]) st2 = Phosphorylation(src, ras, evidence=[Evidence(text='Text 2')]) stmts = [st1, st2] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_duplicates() assert len(pa.unique_stmts) == 1 assert len(stmts) == 2 assert len(stmts[0].evidence) == 1 assert len(stmts[1].evidence) == 1
def test_filter_relevance(): config_dict = { 'ndex': { 'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf' }, 'search_terms': [{ 'db_refs': { 'HGNC': '20974' }, 'name': 'MAPK1', 'search_term': 'MAPK1', 'type': 'gene' }] } indra_stmts = \ [Activation(Agent('BRAF', db_refs={'HGNC': '20974'}), Agent('MAP2K1'), evidence=[Evidence(text='BRAF activates MAP2K1.', source_api='assertion')]), Activation(Agent('MAP2K1', activity=ActivityCondition('activity', True)), Agent('MAPK1'), evidence=[Evidence(text='Active MAP2K1 activates ' 'MAPK1.', source_api='assertion')]) ] st = SearchTerm('gene', 'MAP2K1', db_refs={}, search_term='MAP2K1') emmaa_stmts = [ EmmaaStatement(stmt, datetime.datetime.now(), [st]) for stmt in indra_stmts ] # Try no filter first emmaa_model = EmmaaModel('test', config_dict) emmaa_model.extend_unique(emmaa_stmts) emmaa_model.run_assembly() assert len(emmaa_model.assembled_stmts) == 2, emmaa_model.assembled_stmts # Next do a prior_one filter config_dict['assembly'] = {'filter_relevance': 'prior_one'} emmaa_model = EmmaaModel('test', config_dict) emmaa_model.extend_unique(emmaa_stmts) emmaa_model.run_assembly() assert len(emmaa_model.assembled_stmts) == 1, emmaa_model.assembled_stmts assert emmaa_model.assembled_stmts[0].obj.name == 'MAPK1' # Next do a prior_all filter config_dict['assembly'] = {'filter_relevance': 'prior_all'} emmaa_model = EmmaaModel('test', config_dict) emmaa_model.extend_unique(emmaa_stmts) emmaa_model.run_assembly() assert len(emmaa_model.assembled_stmts) == 0
def test_ground_gilda_source(): ev1 = Evidence(source_api='reach') ev2 = Evidence(source_api='sparser') ev3 = Evidence(source_api='trips') stmts = [Phosphorylation(None, Agent('x', db_refs={'TEXT': 'kras'}), evidence=ev) for ev in (ev1, ev2, ev3)] grounded_stmts = ground_statements(stmts, sources=['trips']) assert grounded_stmts[0].sub.name == 'x', stmts[0] assert grounded_stmts[1].sub.name == 'x' assert grounded_stmts[2].sub.name == 'KRAS' grounded_stmts = ground_statements(stmts, sources=['reach', 'sparser']) assert all(stmt.sub.name == 'KRAS' for stmt in grounded_stmts[:2])
def test_combine_evidence_exact_duplicates(): raf = Agent('RAF1') mek = Agent('MEK1') p1 = Phosphorylation(raf, mek, evidence=Evidence(text='foo')) p2 = Phosphorylation(raf, mek, evidence=Evidence(text='bar')) p3 = Phosphorylation(raf, mek, evidence=Evidence(text='bar')) stmts = [p1, p2, p3] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_duplicates() # The statements come out sorted by their matches_key assert len(pa.unique_stmts) == 1 assert len(pa.unique_stmts[0].evidence) == 2 assert set(ev.text for ev in pa.unique_stmts[0].evidence) == \ set(['foo', 'bar'])
def test_adeft_mapping_non_pos(): pcs = Agent('PCS', db_refs={'TEXT': 'PCS'}) # This is an exact definition of a non-positive label entry so we # expect that it will be applied as a grounding ev = Evidence(text='post concussive symptoms (PCS)') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'}) ev = Evidence(text='physical component summary') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' not in mapped_stmt.sub.db_refs, \ (mapped_stmt.sub.db_refs, mapped_stmt.evidence)
def test_adeft_mapping_non_pos(): pcs = Agent('PCS', db_refs={'TEXT': 'PCS'}) # This is an exact definition of a non-positive label entry so we # expect that it will be applied as a grounding ev = Evidence(text='post concussive symptoms (PCS)') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' in mapped_stmt.sub.db_refs, mapped_stmt.evidence pcs = Agent('PCS', db_refs={'TEXT': 'PCS', 'MESH': 'xxx'}) # There a non-positive entry is implied but not exactly, so # the prior grounding will be removed. ev = Evidence(text='post symptoms concussive concussion') stmt = Phosphorylation(None, pcs, evidence=[ev]) mapped_stmt = gm.map_stmts([stmt])[0] assert 'MESH' not in mapped_stmt.sub.db_refs, mapped_stmt.evidence
def phosphosite_to_indra(): df = pandas.DataFrame.from_csv(psite_fname, index_col=None) df = df[df['KIN_ORGANISM'] == 'human'] dt = df[df['SUB_ORGANISM'] == 'human'] stmts = [] for _, row in df.iterrows(): enz_name = row['GENE'] enz_up = row['KIN_ACC_ID'] sub_name = row['SUB_GENE'] sub_up = row['SUB_ACC_ID'] if not enz_name or not sub_name or \ isinstance(enz_name, float) or isinstance(sub_name, float): continue enz = Agent(enz_name, db_refs={'UP': enz_up}) sub = Agent(sub_name, db_refs={'UP': sub_up}) site = row['SUB_MOD_RSD'] if site[0] in ('S', 'T', 'Y'): residue = site[0] position = site[1:] else: residue = None position = None ev = Evidence('phosphosite') st = Phosphorylation(enz, sub, residue, position, ev) stmts.append(st) logger.info('%d human-human phosphorylations in Phosphosite' % len(stmts)) with open('phosphosite_indra.pkl', 'wb') as fh: pickle.dump(stmts, fh, protocol=2) return stmts
def _process_relations(relation_rows, event_dict): header = [cell.value for cell in next(relation_rows)] stmts = [] for row in relation_rows: row_values = [r.value for r in row] row_dict = {h: v for h, v in zip(header, row_values)} cause_entries = row_dict.get('Cause Index') effect_entries = row_dict.get('Effect Index') # FIXME: Handle cases in which there is a missing cause/effect if not cause_entries or not effect_entries: continue causes = [c.strip() for c in cause_entries.split(',')] effects = [e.strip() for e in effect_entries.split(',')] rel = row_dict.get('Relation') if _in_rels(rel, pos_rels): pol = 1 elif _in_rels(rel, neg_rels): pol = -1 elif _in_rels(rel, neu_rels): pol = None # If we don't recognize this relation, we don't get any statements else: continue text = row_dict.get('Sentence') #annot_keys = ['Relation', 'Event_Type', 'Location', 'Time'] #annots = {k: row_dict.get(k) for k in annot_keys} annot_keys = ['Relation'] annots = {k: row_dict.get(k) for k in annot_keys} ref = row_dict.get('Source_File') ev = Evidence(source_api='sofia', pmid=ref, annotations=annots, text=text) for cause_index, effect_index in itertools.product( causes, effects): cause_name = event_dict[cause_index]['Relation'] cause_grounding = event_dict[cause_index]['Event_Type'] effect_name = event_dict[effect_index]['Relation'] effect_grounding = event_dict[effect_index]['Event_Type'] cause_concept = Concept(cause_name, db_refs={ 'TEXT': cause_name, 'SOFIA': cause_grounding }) effect_concept = Concept(effect_name, db_refs={ 'TEXT': effect_name, 'SOFIA': effect_grounding }) stmt = Influence(cause_concept, effect_concept, evidence=[ev]) # Assume unknown polarity on the subject, put the overall # polarity in the sign of the object stmt.subj_delta['polarity'] = None stmt.obj_delta['polarity'] = pol stmts.append(stmt) return stmts
def test_model_extend(): ev1 = Evidence(pmid='1234', text='abcd', source_api='x') ev2 = Evidence(pmid='1234', text='abcde', source_api='x') ev3 = Evidence(pmid='1234', text='abcd', source_api='x') indra_sts = [Phosphorylation(None, Agent('a'), evidence=ev) for ev in [ev1, ev2, ev3]] emmaa_sts = [EmmaaStatement(st, datetime.datetime.now(), []) for st in indra_sts] em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}}) em.add_statements([emmaa_sts[0]]) em.extend_unique(emmaa_sts[1:]) assert len(em.stmts) == 2 stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1), datetime.datetime.now(), []) em.extend_unique([stmt]) assert len(em.stmts) == 3
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient ev = Evidence(source_api='sofia', pmid=ref, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def get_evidence(assay): """Given an activity, return an INDRA Evidence object. Parameters ---------- assay : dict an activity from the activities list returned by a query to the API Returns ------- ev : :py:class:`Evidence` an :py:class:`Evidence` object containing the kinetics of the """ kin = get_kinetics(assay) source_id = assay.get('assay_chembl_id') if not kin: return None annotations = {'kinetics': kin} chembl_doc_id = str(assay.get('document_chembl_id')) pmid = get_pmid(chembl_doc_id) ev = Evidence(source_api='chembl', pmid=pmid, source_id=source_id, annotations=annotations) return ev
def node_to_evidence(self, entity_node, is_direct): """Computes an evidence object for a statement. We assume that the entire event happens within a single statement, and get the text of the sentence by getting the text of the sentence containing the provided node that corresponds to one of the entities participanting in the event. The Evidence's pmid is whatever was provided to the constructor (perhaps None), and the annotations are the subgraph containing the provided node, its ancestors, and its descendants. """ # We assume that the entire event is within a single sentence, and # get this sentence by getting the sentence containing one of the # entities sentence_text = self.G.node[entity_node]['sentence_text'] # Make annotations object containing the fully connected subgraph # containing these nodes subgraph = self.connected_subgraph(entity_node) annotations = { 'node_properties': subgraph.node, 'edge_properties': subgraph.edge } # Make evidence object epistemics = dict() evidence = Evidence(source_api='tees', pmid=self.pmid, text=sentence_text, epistemics={'direct': is_direct}, annotations=annotations) return evidence
def test_fix_stmts(): stmts = [ Translocation(Agent('x'), to_location=None, from_location=None), Phosphorylation(Agent('a', db_refs={ 'TEXT': None, 'FPLX': 'ERK' }), Agent('b'), evidence=[Evidence(text='x')]) ] stmts_out = fix_invalidities(stmts) assert len(stmts_out) == 1 assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'} stmts_out = ac.fix_invalidities(stmts) assert len(stmts_out) == 1 assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'} stmts_out = ac.fix_invalidities(stmts, in_place=True, print_report_before=True, print_report_after=True, prior_hash_annots=True) # Check the in-place effect assert stmts[1].enz.db_refs == {'FPLX': 'ERK'} assert stmts_out[0].enz.db_refs == {'FPLX': 'ERK'} assert stmts_out[0].evidence[0].annotations['prior_hash']
def test_make_evidence_html1(): # Full evidence ev1 = Evidence(source_api='trips', pmid='12345', text='Some evidence') # Has PMID but no text ev2 = Evidence(source_api='biopax', pmid='23456', text=None) # No PMID or text but has source id ev3 = Evidence(source_api='bel', pmid=None, text=None, source_id='bel_id') # No evidence other than the source API ev4 = Evidence(source_api='bel', pmid=None, text=None, source_id=None) stmt = Phosphorylation(Agent('A'), Agent('B'), evidence=[ev1, ev2, ev3, ev4]) ev_html = make_evidence_html([stmt], 'proof for a conclusion') assert 'Some evidence' in ev_html, ev_html assert 'Database entry in \'biopax\'' in ev_html, ev_html assert 'Database entry in \'bel\'' in ev_html, ev_html
def get_event_compositional(self, event_entry: Dict[str, str]) -> Event: """Get an Event with compositional grounding Parameters ---------- event_entry : The event to process Returns ------- event : An Event statement """ # Get get compositional grounding comp_name, comp_grnd = self.get_compositional_grounding(event_entry) if comp_name is not None and \ comp_grnd[0] is not None and \ comp_grnd[0][0] is not None: concept = Concept(comp_name, db_refs={ 'TEXT': comp_name, 'WM': [comp_grnd] }) # If not try to get old style Sofia grounding else: name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) if event_entry['Event_Type']: concept.db_refs['SOFIA'] = event_entry['Event_Type'] context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') agent = event_entry.get('Agent') patient = event_entry.get('Patient') anns = {} if agent: anns['agent'] = agent if patient: anns['patient'] = patient text_refs = {'DART': ref} ev = Evidence(source_api='sofia', text_refs=text_refs, text=text, annotations=anns, source_id=event_entry['Event Index']) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta=QualitativeDelta(polarity=pol, adjectives=None)) return event
def __make_test_statement(a, b, source_api, ev_num=None): A = Agent(a) B = Agent(b) ev_text = "Evidence %d for %s phosphorylates %s." % (ev_num, a, b) ev_list = [Evidence(text=ev_text, source_api=source_api)] stmt = Phosphorylation(A, B, evidence=ev_list) return stmt
def _get_evidence(self, event, subj_concept, obj_concept): """Return the Evidence object for the INDRA Statement.""" provenance = event.get('provenance') # First try looking up the full sentence through provenance doc_info = provenance[0].get('document') doc_id = doc_info['@id'] agent_strs = [ag.db_refs['TEXT'] for ag in [subj_concept, obj_concept]] text = None for sent in self.document_dict[doc_id]['sentences'].values(): # We take the first match, which _might_ be wrong sometimes. Perhaps # refine further later. if all([agent_text in sent for agent_text in agent_strs]): text = self._sanitize(sent) break else: logger.warning("Could not find sentence in document %s for event " "with agents: %s" % (doc_id, str(agent_strs))) annotations = { 'found_by': event.get('rule'), 'provenance': provenance, } location = self.document_dict[doc_id]['location'] ev = Evidence(source_api='bbn', text=text, annotations=annotations, pmid=location) return [ev]
def _get_evidence(self, event, subj_concept, obj_concept, adjectives): """Return the Evidence object for the INDRA Statement.""" provenance = event.get('provenance') # First try looking up the full sentence through provenance doc_id = provenance[0]['document']['@id'] sent_id = provenance[0]['sentence'] text = self.document_dict[doc_id]['sentences'][sent_id] text = self._sanitize(text) bounds = [ provenance[0]['documentCharPositions'][k] for k in ['start', 'end'] ] annotations = { 'found_by': event.get('rule'), 'provenance': provenance, 'event_type': basename(event.get('type')), 'adjectives': adjectives, 'bounds': bounds } location = self.document_dict[doc_id]['location'] ev = Evidence(source_api='hume', text=text, annotations=annotations, pmid=location) return [ev]
def get_event(event_entry): name = event_entry['Relation'] concept = Concept(name, db_refs={'TEXT': name}) grounding = event_entry['Event_Type'] if grounding: concept.db_refs['SOFIA'] = grounding context = WorldContext() time = event_entry.get('Time') if time: context.time = TimeContext(text=time.strip()) loc = event_entry.get('Location') if loc: context.geo_location = RefContext(name=loc) text = event_entry.get('Text') ref = event_entry.get('Source') ev = Evidence(source_api='sofia', pmid=ref, text=text) pol = event_entry.get('Polarity') event = Event(concept, context=context, evidence=[ev], delta={ 'polarity': pol, 'adjectives': [] }) return event