def test_modification_norefinement_noenz(): """A more specific modification statement should be supported by a more generic modification statement.""" src = Agent('SRC', db_refs={'HGNC': '11283'}) nras = Agent('NRAS', db_refs={'HGNC': '7989'}) st1 = Phosphorylation(src, nras) st2 = Phosphorylation(None, nras, 'Y', '32', evidence=[Evidence(text='foo')]) pa = Preassembler(hierarchies, stmts=[st1, st2]) stmts = pa.combine_related() # Modification is less specific, enzyme more specific in st1, therefore # these statements shouldn't be combined. assert (len(stmts) == 2) assert (len(stmts[1].evidence) == 1)
def test_duplicates_sorting(): mc = ModCondition('phosphorylation') map2k1_1 = Agent('MAP2K1', mods=[mc]) mc1 = ModCondition('phosphorylation', 'serine', '218') mc2 = ModCondition('phosphorylation', 'serine', '222') mc3 = ModCondition('phosphorylation', 'serine', '298') map2k1_2 = Agent('MAP2K1', mods=[mc1, mc2, mc3]) mapk3 = Agent('MAPK3') #ras = Agent('MAPK3', db_refs = {'FA': '03663'}) #nras = Agent('NRAS', db_refs = {'FA': '03663'}) st1 = Phosphorylation(map2k1_1, mapk3, position='218') st2 = Phosphorylation(map2k1_2, mapk3) st3 = Phosphorylation(map2k1_1, mapk3, position='218') stmts = [st1, st2, st3] pa = Preassembler(hierarchies, stmts=stmts) pa.combine_duplicates() assert (len(pa.unique_stmts) == 2)
def test_simple_mapping(): akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'}) stmt = Phosphorylation(None, akt) mapped_stmts = gm.map_stmts([stmt]) assert len(mapped_stmts) == 1 mapped_akt = mapped_stmts[0].sub assert mapped_akt.db_refs['TEXT'] == 'Akt' assert mapped_akt.db_refs['FPLX'] == 'AKT'
def test_misgrounding(): baz1 = Agent('ZNF214', db_refs={'TEXT': 'baz1', 'HGNC': '13006'}) stmt = Phosphorylation(None, baz1) stmts = gm.map_stmts([stmt]) stmt = stmts[0] assert len(stmt.sub.db_refs) == 1, stmt.sub.db_refs assert stmt.sub.db_refs['TEXT'] == 'baz1' assert stmt.sub.name == 'baz1'
def test_return_toplevel(): src = Agent('SRC', db_refs={'HGNC': '11283'}) nras = Agent('NRAS', db_refs={'HGNC': '7989'}) st1 = Phosphorylation(src, nras, 'tyrosine', '32') st2 = Phosphorylation(src, nras) pa = Preassembler(hierarchies, stmts=[st1, st2]) stmts = pa.combine_related(return_toplevel=True) assert (len(stmts) == 1) assert (len(stmts[0].supported_by) == 1) assert (len(stmts[0].supported_by[0].supports) == 1) stmts = pa.combine_related(return_toplevel=False) assert (len(stmts) == 2) ix = 1 if stmts[0].residue else 0 assert (len(stmts[1 - ix].supported_by) == 1) assert (len(stmts[1 - ix].supported_by[0].supports) == 1) assert (len(stmts[ix].supports) == 1) assert (len(stmts[ix].supports[0].supported_by) == 1)
def test_get_search_terms(): gp = GeneListPrior(['BRAF'], 'braf', 'BRAF model') assert gp.name == 'braf' assert gp.human_readable_name == 'BRAF model' st = gp.make_search_terms([ Inhibition(Agent('vemurafenib', db_refs={'CHEBI': 'CHEBI:63637'}), Agent('BRAF', db_refs={ 'HGNC': '1097', 'UP': 'P15056' })) ]) assert st assert all([isinstance(s, SearchTerm) for s in st]) assert st[0].type == 'gene' assert st[0].search_term == '"BRAF"' assert st[1].type == 'drug' assert st[1].search_term == '"vemurafenib"', st[1].search_term
def test_grounding_aggregation(): braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'}) braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF'}) braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'}) braf4 = Agent('BRAF', db_refs={ 'TEXT': 'B-raf', 'UP': 'P15056', 'HGNC': '1097' }) st1 = Phosphorylation(None, braf1) st2 = Phosphorylation(None, braf2) st3 = Phosphorylation(None, braf3) st4 = Phosphorylation(None, braf4) pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4]) unique_stmts = pa.combine_duplicates() assert len(unique_stmts) == 3, unique_stmts
def test_dynamic_property_to_english(): agent = Agent('EGFR', mods=[ModCondition('phosphorylation')], db_refs={'HGNC': '3236'}) query = DynamicProperty(agent, 'always_value', 'low', 'qualitative') assert query.to_english() == 'Phosphorylated EGFR is always low.' query.pattern_type = 'eventual_value' assert query.to_english() == 'Phosphorylated EGFR is eventually low.'
def test_print_model(): stmt1 = Influence(Agent('rainfall'), Agent('crop_yields')) stmt2 = Influence(Agent('irrigation'), Agent('crop_yields')) stmt3 = Influence(Agent('temperature'), Agent('crop_yields')) stmt4 = Influence(Agent('rainfall'), Agent('temperature')) stmts = [stmt1, stmt2, stmt3, stmt4] fa = FigaroAssembler(stmts) fa.make_model() txt = fa.print_model() assert txt is not None
def get_agent(concept): txt = concept.name matches = gilda.ground(txt) if not matches: return None gr = (matches[0].term.db, matches[0].term.id) agent = Agent(concept.name, db_refs={gr[0]: gr[1], 'TEXT': concept.name}) standardize_agent_name(agent, standardize_refs=True) return agent
def get_target_agent(target): target_hgnc_id = hgnc_client.get_hgnc_id(target) target_up_id = hgnc_client.get_uniprot_id(target_hgnc_id) target_agent = Agent(target, db_refs={ 'HGNC': target_hgnc_id, 'UP': target_up_id }) return target_agent
def test_open_query_to_english(): ag = Agent('EGFR', db_refs={'HGNC': '3236'}) q1 = OpenSearchQuery(ag, 'Inhibition', 'object', ['chebi', 'chembl']) q2 = OpenSearchQuery(ag, 'Inhibition', 'subject', ['hgnc']) q3 = OpenSearchQuery(ag, 'Activation', 'subject') assert q1.to_english( ) == 'What inhibits EGFR? (CHEBI, CHEMBL)', q1.to_english() assert q2.to_english() == 'What does EGFR inhibit? (HGNC)' assert q3.to_english() == 'What does EGFR activate?'
def test_stringify_dynamic_property(): agent = Agent('EGFR', mods=[ModCondition('phosphorylation')], db_refs={'HGNC': '3236'}) query = DynamicProperty(agent, 'always_value', 'low', 'qualitative') query_str = str(query) assert query_str == ("DynamicPropertyQuery(entity=EGFR(mods: " "(phosphorylation)), pattern=always_value, " "molecular quantity=('qualitative', 'low'))")
def _extract_protein(self, name, gene_id): refs = {'EGID': gene_id} hgnc_id = hgnc_client.get_hgnc_from_entrez(gene_id) if hgnc_id is not None: refs['HGNC'] = hgnc_id standard_name, db_refs = standardize_name_db_refs(refs) if standard_name: name = standard_name return Agent(name, db_refs=db_refs)
def get_statements(gene_list): res_dict = _send_request(gene_list, include_interactors=True) statements = [] if res_dict is None: return statements for int_id, interaction in res_dict.items(): agent_a_name = interaction['OFFICIAL_SYMBOL_A'] agent_b_name = interaction['OFFICIAL_SYMBOL_B'] agent_a = Agent(agent_a_name, db_refs={'HGNC': agent_a_name}) agent_b = Agent(agent_b_name, db_refs={'HGNC': agent_b_name}) ev = Evidence(source_api='biogrid', source_id=int_id, pmid=interaction['PUBMED_ID'], text=None, annotations=interaction) stmt = Complex([agent_a, agent_b], evidence=ev) statements.append(stmt) return statements
def get_agent(raw_name, entrez_id): db_refs = {'TEXT': raw_name, 'EGID': entrez_id} logger.debug('Looking up grounding data for Entrez #%s' % entrez_id) hgnc_id = hgc.get_hgnc_from_entrez(entrez_id) if hgnc_id: db_refs['HGNC'] = hgnc_id agent = Agent(raw_name, db_refs=db_refs) standardize_agent_name(agent, standardize_refs=True) return agent
def test_save_sentences_unicode(): mek = Agent('MEK', db_refs={'TEXT': 'MAP2K1'}) ev = Evidence(source_api='reach', pmid='PMID000asdf', text='foo\U0001F4A9bar') st = Phosphorylation(None, mek, evidence=[ev]) sent = get_sentences_for_agent('MAP2K1', [st]) assert unicode_strs(sent) twg = agent_texts_with_grounding([st]) save_sentences(twg, [st], 'test_save_sentences.csv')
def test_find_contradicts(): st1 = Inhibition(Agent('a'), Agent('b')) st2 = Activation(Agent('a'), Agent('b')) st3 = IncreaseAmount(Agent('a'), Agent('b')) st4 = DecreaseAmount(Agent('a'), Agent('b')) pa = Preassembler(hierarchies, [st1, st2, st3, st4]) contradicts = pa.find_contradicts() assert len(contradicts) == 2 for s1, s2 in contradicts: assert {s1.uuid, s2.uuid} in ({st1.uuid, st2.uuid}, {st3.uuid, st4.uuid})
def _extract_drug(self, line): drug_name = line['Small Molecule Name'] lincs_id = line['Small Molecule HMS LINCS ID'] refs = self._lc.get_small_molecule_refs(lincs_id) if 'PUBCHEM' in refs: chebi_id = chebi_client.get_chebi_id_from_pubchem(refs['PUBCHEM']) if chebi_id: refs['CHEBI'] = chebi_id return Agent(drug_name, db_refs=refs)
def create_type_and_source(self): bap1 = Bioagent.make_cljson(Agent('BAP1', db_refs={'HGNC': '950'})) other_genes = ['CHD8', 'SCN2A', 'ARID1B'] filter_agents = \ Bioagent.make_cljson([Agent(n, db_refs=None) for n in other_genes]) from kqml import KQMLToken for fa in filter_agents: fa.set('DB--REFS', KQMLToken('NIL')) print(filter_agents) return self._get_content('FIND-RELATIONS-FROM-LITERATURE', type='unknown', source=bap1, target=NONE, filter_agents=filter_agents)
def test_up_and_mismatched_hgnc(): erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) stmt = Phosphorylation(None, erk) g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482', 'HGNC': '6877'}} gm = GroundingMapper(g_map) mapped_stmts = gm.map_agents([stmt]) assert mapped_stmts[0].sub.db_refs['HGNC'] == '6877', \ mapped_stmts[0].sub.db_refs assert mapped_stmts[0].sub.db_refs['UP'] == 'P27361', \ mapped_stmts[0].sub.db_refs
def test_up_with_no_gene_name_with_hgnc_sym(): erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) stmt = Phosphorylation(None, erk) g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'A0K5Q6', 'HGNC': '6871'}} gm = GroundingMapper(g_map) mapped_stmts = gm.map_agents([stmt]) assert mapped_stmts[0].sub.db_refs['HGNC'] == '6871', \ mapped_stmts[0].sub.db_refs assert mapped_stmts[0].sub.db_refs['UP'] == 'P28482', \ mapped_stmts[0].sub.db_refs
def _get_agent_from_gene_name(gene_name): db_refs = {} hgnc_id = hgnc_client.get_hgnc_id(gene_name) if hgnc_id: db_refs['HGNC'] = hgnc_id up_id = hgnc_client.get_uniprot_id(hgnc_id) if up_id: db_refs['UP'] = up_id agent = Agent(gene_name, db_refs=db_refs) return agent
def test_bound_condition_mapping_multi(): # Test with multiple agents akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'}) erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) akt.bound_conditions = [BoundCondition(erk)] stmt = Phosphorylation(akt, erk) mapped_stmts = gm.map_stmts([stmt]) s = mapped_stmts[0] mapped_akt = mapped_stmts[0].enz mapped_erk1 = mapped_akt.bound_conditions[0].agent mapped_erk2 = mapped_stmts[0].sub assert mapped_akt.db_refs['TEXT'] == 'Akt' assert mapped_akt.db_refs['FPLX'] == 'AKT' for e in (mapped_erk1, mapped_erk2): assert e.db_refs['TEXT'] == 'ERK1' assert e.db_refs['HGNC'] == '6877' assert e.db_refs['UP'] == 'P27361'
def get_statements(gene_list): res_dict = _send_request(gene_list, include_interactors=True) statements = [] if res_dict is None: return statements def get_db_refs(egid): hgnc_id = hgnc_client.get_hgnc_from_entrez(egid) if not hgnc_id: logger.info("No HGNC ID for Entrez ID: %s" % egid) return (None, {}) hgnc_name = hgnc_client.get_hgnc_name(hgnc_id) if not hgnc_name: logger.info("No HGNC name for HGNC ID: %s" % hgnc_id) return (None, {}) up_id = hgnc_client.get_uniprot_id(hgnc_id) if not up_id: logger.info("No Uniprot ID for EGID / HGNC ID / Symbol " "%s / %s / %s" % (egid, hgnc_id, hgnc_name)) return (None, {}) return (hgnc_name, {'HGNC': hgnc_id, 'UP': up_id}) for int_id, interaction in res_dict.items(): agent_a_egid = interaction['ENTREZ_GENE_A'] agent_b_egid = interaction['ENTREZ_GENE_B'] agent_a_name, agent_a_db_refs = get_db_refs(agent_a_egid) agent_b_name, agent_b_db_refs = get_db_refs(agent_b_egid) if agent_a_name is None or agent_b_name is None: continue if interaction['EXPERIMENTAL_SYSTEM_TYPE'] != 'physical': logger.info("Skipping non-physical interaction: %s" % str(interaction)) continue agent_a = Agent(agent_a_name, db_refs=agent_a_db_refs) agent_b = Agent(agent_b_name, db_refs=agent_b_db_refs) ev = Evidence(source_api='biogrid', source_id=int_id, pmid=interaction['PUBMED_ID'], text=None, annotations=interaction) stmt = Complex([agent_a, agent_b], evidence=ev) statements.append(stmt) return statements
def test_simple_mapping(): akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'}) stmt = Phosphorylation(None, akt) gm = GroundingMapper(default_grounding_map) mapped_stmts = gm.map_agents([stmt]) assert len(mapped_stmts) == 1 mapped_akt = mapped_stmts[0].sub assert mapped_akt.db_refs['TEXT'] == 'Akt' assert mapped_akt.db_refs['BE'] == 'AKT' assert unicode_strs((akt, stmt, gm, mapped_akt))
def test_model_extend(): ev1 = Evidence(pmid='1234', text='abcd', source_api='x') ev2 = Evidence(pmid='1234', text='abcde', source_api='x') ev3 = Evidence(pmid='1234', text='abcd', source_api='x') indra_sts = [ Phosphorylation(None, Agent('a'), evidence=ev) for ev in [ev1, ev2, ev3] ] emmaa_sts = [ EmmaaStatement(st, datetime.datetime.now(), ['x']) for st in indra_sts ] em = EmmaaModel('x', {'search_terms': [], 'ndex': {'network': None}}) em.add_statements([emmaa_sts[0]]) em.extend_unique(emmaa_sts[1:]) assert len(em.stmts) == 2 stmt = EmmaaStatement(Phosphorylation(None, Agent('b'), evidence=ev1), datetime.datetime.now(), ['x']) em.extend_unique([stmt]) assert len(em.stmts) == 3
def test_in_place_overwrite_of_gm(): """Make sure HGNC lookups don't modify the original grounding map by adding keys.""" erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) stmt = Phosphorylation(None, erk) g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482'}} gm = GroundingMapper(g_map) mapped_stmts = gm.map_agents([stmt]) gmap_after_mapping = gm.gm assert set(gmap_after_mapping['ERK1'].keys()) == set(['TEXT', 'UP'])
def test_find_contradicts(): st1 = Inhibition(Agent('a'), Agent('b')) st2 = Activation(Agent('a'), Agent('b')) st3 = IncreaseAmount(Agent('a'), Agent('b')) st4 = DecreaseAmount(Agent('a'), Agent('b')) st5 = ActiveForm( Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]), 'kinase', True) st6 = ActiveForm( Agent('a', mods=[ModCondition('phosphorylation', None, None, True)]), 'kinase', False) pa = Preassembler(hierarchies, [st1, st2, st3, st4, st5, st6]) contradicts = pa.find_contradicts() assert len(contradicts) == 3 for s1, s2 in contradicts: assert {s1.uuid, s2.uuid} in ({st1.uuid, st2.uuid}, {st3.uuid, st4.uuid}, {st5.uuid, st6.uuid})
def test_bound_condition_refinement(): """A statement with more specific bound context should be supported by a less specific statement.""" src = Agent('SRC', db_refs={'HGNC': '11283'}) gtp = Agent('GTP', db_refs={'CHEBI': '15996'}) nras = Agent('NRAS', db_refs={'HGNC': '7989'}) nrasgtp = Agent('NRAS', db_refs={'HGNC': '7989'}, bound_conditions=[BoundCondition(gtp, True)]) st1 = Phosphorylation(src, nras, 'tyrosine', '32') st2 = Phosphorylation(src, nrasgtp, 'tyrosine', '32') # The top-level list should contain only one statement, the more specific # modification, supported by the less-specific modification. pa = Preassembler(hierarchies, stmts=[st1, st2]) stmts = pa.combine_related() assert len(stmts) == 1 assert stmts[0].equals(st2) assert len(stmts[0].supported_by) == 1 assert stmts[0].supported_by[0].equals(st1)
def test_bound_condition_mapping_agent_json(): # Test with agent/json mapping akt = Agent('pkbA', db_refs={'TEXT': 'p-Akt', 'UP': 'XXXXXX'}) erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) akt.bound_conditions = [BoundCondition(erk)] stmt = Phosphorylation(None, akt) gm = GroundingMapper(default_grounding_map, default_agent_map) mapped_stmts = gm.map_agents([stmt]) s = mapped_stmts[0] mapped_akt = mapped_stmts[0].sub mapped_erk = mapped_akt.bound_conditions[0].agent #assert mapped_akt.db_refs['TEXT'] == 'p-AKT', mapped_akt.db_refs assert mapped_akt.db_refs['FPLX'] == 'AKT', mapped_akt.db_refs assert mapped_erk.db_refs['TEXT'] == 'ERK1' assert mapped_erk.db_refs['HGNC'] == '6877' assert mapped_erk.db_refs['UP'] == 'P27361'
def test_bound_condition_mapping_multi(): # Test with multiple agents akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'}) erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) akt.bound_conditions = [BoundCondition(erk)] stmt = Phosphorylation(akt, erk) gm = GroundingMapper(default_grounding_map) mapped_stmts = gm.map_agents([stmt]) s = mapped_stmts[0] mapped_akt = mapped_stmts[0].enz mapped_erk1 = mapped_akt.bound_conditions[0].agent mapped_erk2 = mapped_stmts[0].sub assert mapped_akt.db_refs['TEXT'] == 'Akt' assert mapped_akt.db_refs['FPLX'] == 'AKT' for e in (mapped_erk1, mapped_erk2): assert e.db_refs['TEXT'] == 'ERK1' assert e.db_refs['HGNC'] == '6877' assert e.db_refs['UP'] == 'P27361'
def test_bound_condition_mapping(): # Verify that the grounding mapper grounds the agents within a bound # condition akt = Agent('pkbA', db_refs={'TEXT': 'Akt', 'UP': 'XXXXXX'}) erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'}) akt.bound_conditions = [BoundCondition(erk)] stmt = Phosphorylation(None, akt) gm = GroundingMapper(default_grounding_map) mapped_stmts = gm.map_agents([stmt]) s = mapped_stmts[0] mapped_akt = mapped_stmts[0].sub mapped_erk = mapped_akt.bound_conditions[0].agent assert mapped_akt.db_refs['TEXT'] == 'Akt' assert mapped_akt.db_refs['FPLX'] == 'AKT' assert mapped_erk.db_refs['TEXT'] == 'ERK1' assert mapped_erk.db_refs['HGNC'] == '6877' assert mapped_erk.db_refs['UP'] == 'P27361'
def map_agent(self, agent, do_rename): """Return the given Agent with its grounding mapped. This function grounds a single agent. It returns the new Agent object (which might be a different object if we load a new agent state from json) or the same object otherwise. Parameters ---------- agent : :py:class:`indra.statements.Agent` The Agent to map. do_rename: bool If True, the Agent name is updated based on the mapped grounding. If do_rename is True the priority for setting the name is FamPlex ID, HGNC symbol, then the gene name from Uniprot. Returns ------- grounded_agent : :py:class:`indra.statements.Agent` The grounded Agent. maps_to_none : bool True if the Agent is in the grounding map and maps to None. """ agent_text = agent.db_refs.get('TEXT') mapped_to_agent_json = self.agent_map.get(agent_text) if mapped_to_agent_json: mapped_to_agent = \ Agent._from_json(mapped_to_agent_json['agent']) return mapped_to_agent, False # Look this string up in the grounding map # If not in the map, leave agent alone and continue if agent_text in self.gm.keys(): map_db_refs = self.gm[agent_text] else: return agent, False # If it's in the map but it maps to None, then filter out # this statement by skipping it if map_db_refs is None: # Increase counter if this statement has not already # been skipped via another agent logger.debug("Skipping %s" % agent_text) return None, True # If it has a value that's not None, map it and add it else: # Otherwise, update the agent's db_refs field self.update_agent_db_refs(agent, agent_text, do_rename) return agent, False