def test_msa_custom_corpus_stmt_type(): # Create a pickle with a test statement test_corpus = 'test_corpus2.pkl' kras = Agent('KRAS', db_refs={'HGNC': '6407'}) st1 = Phosphorylation(Agent('x'), kras) st2 = Inhibition(Agent('y'), kras) st3 = Activation(Agent('z'), kras) with open(test_corpus, 'wb') as fh: pickle.dump([st1, st2, st3], fh) # Instantiate MSA with that pickle as the corpus msa = MSA(corpus_config='pickle:%s' % test_corpus) # Query the MSA finder = msa.find_mechanisms('to_target', target=kras, verb='activate') # Make sure we got the original statement back res_stmts = finder.get_statements() assert res_stmts[0].__class__.__name__ == 'Activation' assert res_stmts[0].subj.name == 'z' finder = msa.find_mechanisms('to_target', target=kras, verb='phosphorylate') # Make sure we got the original statement back res_stmts = finder.get_statements() assert res_stmts[0].__class__.__name__ == "Phosphorylation" assert res_stmts[0].enz.name == 'x'
def test_valid_keys_no_text(): # We test that an agent with just a PUBCHEM ID can still be queried msa = MSA() ag = Agent('vemurafenib', db_refs={'PUBCHEM': '42611257'}) finder = msa.find_mechanisms('from_source', ag) stmts = finder.get_statements(block=True) assert stmts
def test_get_finder_agents(): msa = MSA() ag = Agent('SOCS1', db_refs={'HGNC': '19383'}) finder = msa.find_mechanisms('to_target', ag, verb='phosphorylate') other_agents = finder.get_other_agents() assert all(isinstance(a, Agent) for a in other_agents) # The other names should be sorted with PIM1 first (most evidence) assert other_agents[0].name == 'PIM1' fixed_agents = finder.get_fixed_agents() assert 'object' in fixed_agents, fixed_agents assert fixed_agents['object'][0].name == 'SOCS1', fixed_agents['target']
def test_statements_from_neo4j(): user = os.environ.get('INDRA_NEO4J_USER') pw = os.environ.get('INDRA_NEO4J_PASSWORD') url = os.environ.get('INDRA_NEO4J_URL') config = 'neo4j:bolt://%s:%s@%s' % (user, pw, url) msa = MSA(corpus_config=config) kras = Agent('KRAS', db_refs={'HGNC': '6407'}) finder = msa.find_mechanisms('to_target', target=kras, verb='activate') res_stmts = finder.get_statements() assert len(res_stmts) > 1000 assert isinstance(res_stmts[0], Activation) finder = msa.find_mechanisms('from_source', source=kras, verb='activate') res_stmts = finder.get_statements() assert len(res_stmts) > 1000 assert isinstance(res_stmts[0], Activation)
def test_msa_custom_corpus(): # Create a pickle with a test statement test_corpus = 'test_corpus.pkl' stmt = Phosphorylation(Agent('XXXX'), Agent('YYYY', db_refs={'HGNC': '1'})) with open(test_corpus, 'wb') as fh: pickle.dump([stmt], fh) # Instantiate MSA with that pickle as the corpus msa = MSA(corpus_config='pickle:%s' % test_corpus) # Query the MSA finder = msa.find_mechanisms('to_target', target=Agent('YYYY', db_refs={'HGNC': '1'})) # Make sure we got the original statement back res_stmts = finder.get_statements() assert res_stmts[0].matches(stmt) # Now try a modified query finder = msa.find_mechanisms('to_target', target=Agent('ERK', db_refs={'FPLX': 'ERK'})) # Make sure we don't get anything res_stmts = finder.get_statements() assert not res_stmts
from config import read_from_config from entity_sign import EntitySign from indra.assemblers.english import EnglishAssembler from bioagents.msa.msa import MSA from indra.statements import Agent from indra.databases import hgnc_client, chebi_client from indra.preassembler.grounding_mapper import GroundingMapper import json import requests msa = MSA() def get_agent(name): opts = {'text': name} indra_url = read_from_config('INDRA_GROUND_URL') res = requests.post(indra_url, json=opts) if res.status_code != 200 and not res.json(): return Agent(name, db_refs={'TEXT': name}) js = res.json() top_term = js[0]['term'] agent = Agent(name, db_refs={'TEXT': name, top_term['db']: top_term['id']}) GroundingMapper.standardize_agent_name(agent, standardize_refs=True) return agent
def __init__(self, *args, **kwargs): self.msa = MSA() super(MSA_Module, self).__init__(*args, **kwargs) return
class MSA_Module(Bioagent): name = 'MSA' tasks = ['PHOSPHORYLATION-ACTIVATING', 'FIND-RELATIONS-FROM-LITERATURE', 'GET-PAPER-MODEL', 'CONFIRM-RELATION-FROM-LITERATURE', 'GET-COMMON'] signor_afs = _read_signor_afs() def __init__(self, *args, **kwargs): self.msa = MSA() super(MSA_Module, self).__init__(*args, **kwargs) return def respond_get_common(self, content): """Find the common up/down streams of a protein.""" # TODO: This entire function could be part of the MSA. if not CAN_CHECK_STATEMENTS: return self.make_failure( 'NO_KNOWLEDGE_ACCESS', 'Cannot access the database through the web api.' ) genes_ekb = content.gets('genes') agents = _get_agents(genes_ekb) if len(agents) < 2: return self.make_failure('NO_TARGET', 'Only %d < 2 agents given.' % len(agents)) direction = content.gets('up-down') logger.info("Got genes: %s and direction %s." % (agents, direction)) # Choose some parameters based on direction. if direction == 'ONT::MORE': method = 'common_upstreams' prefix = 'up' elif direction == 'ONT::SUCCESSOR': method = 'common_downstreams' prefix = 'down' else: # TODO: With the new MSA we could handle common neighbors. return self.make_failure("UNKNOWN_ACTION", direction) # Find the commonalities. try: finder = self.msa.find_mechanisms(method, *agents) except EntityError as e: return self.make_failure("MISSING_TARGET", e.args[0]) # Get post statements to provenance. if len(agents) > 2: name_list = ', '.join(ag.name for ag in agents[:-1]) + ',' else: name_list = agents[0].name name_list += ' and ' + agents[-1].name msg = ('%sstreams of ' % prefix).capitalize() + name_list self.send_provenance_for_stmts(finder.get_statements(), msg, ev_counts=finder.get_ev_totals()) # Create the reply resp = KQMLPerformative('SUCCESS') gene_list = KQMLList() for gene in finder.get_common_entities(): gene_list.append(gene) resp.set('commons', gene_list) resp.sets('prefix', prefix) return resp def respond_phosphorylation_activating(self, content): """Return response content to phosphorylation_activating request.""" if not CAN_CHECK_STATEMENTS: return self.make_failure( 'NO_KNOWLEDGE_ACCESS', 'Cannot access the database through the web api.' ) heading = content.head() m = re.match('(\w+)-(\w+)', heading) if m is None: return self.make_failure('UNKNOWN_ACTION') action, polarity = [s.lower() for s in m.groups()] target_ekb = content.gets('target') if target_ekb is None or target_ekb == '': return self.make_failure('MISSING_TARGET') agent = _get_agent(target_ekb) logger.debug('Found agent (target): %s.' % agent.name) site = content.gets('site') if site is None: residue = None position = None else: try: residue, position = site.split('-') except: return self.make_failure('INVALID_SITE') finder = self.msa.find_phos_activeforms(agent, residue=residue, position=position, action=action, polarity=polarity) stmts = finder.get_statements() self.say(finder.describe()) logger.info("Found %d matching statements." % len(stmts)) if not len(stmts): return self.make_failure( 'MISSING_MECHANISM', "Could not find statement matching phosphorylation activating " "%s, %s, %s, %s." % (agent.name, residue, position, 'phosphorylation') ) else: msg = "phosphorylation at %s%s activates %s." \ % (residue, position, agent.name) self.send_provenance_for_stmts(stmts, msg, ev_counts=finder.get_ev_totals()) msg = KQMLPerformative('SUCCESS') msg.set('is-activating', 'TRUE') return msg def _get_query_info(self, content): subj = _get_agent(content.gets('source')) obj = _get_agent(content.gets('target')) if not subj and not obj: raise MSALookupError('MISSING_MECHANISM') stmt_type = content.gets('type') if stmt_type == 'unknown': stmt_type = None return subj, obj, stmt_type def _send_provenance_async(self, finder, desc): q = finder.query nl_input = {k: ag.name if ag else 'unknown' for k, ag in [('subject', q.subj), ('object', q.obj)]} nl_input['stmt_type'] = q.stmt_type fmt = ('subject={subject}, statement type={stmt_type}, ' 'object={object}') nl = fmt.format(**nl_input) nl = "%s: %s" % (desc, nl) stmts = finder.get_statements(block=False) num_stmts = 'no' if stmts is None else len(stmts) logger.info("Retrieved %s statements so far. Sending provenance in a " "thread..." % num_stmts) try: th = Thread(target=self._send_display_stmts, args=(finder, nl)) th.start() except Exception as e: logger.warning("Failed to start thread to send provenance.") logger.exception(e) return def respond_find_relations_from_literature(self, content): """Find statements matching some subject, verb, object information.""" try: subj, obj, stmt_type = self._get_query_info(content) finder = \ self.msa.find_mechanism_from_input(subj, obj, None, stmt_type, ev_limit=3, persist=False, timeout=5) self._send_provenance_async(finder, 'finding statements that match') except MSALookupError as mle: return self.make_failure(mle.args[0]) stmts = finder.get_statements(timeout=15) if stmts is None: # Calling this success may be a bit ambitious. resp = KQMLPerformative('SUCCESS') resp.set('status', 'WORKING') resp.set('relations-found', 'nil') resp.set('dump-limit', str(DUMP_LIMIT)) return resp self.say(finder.describe()) resp = KQMLPerformative('SUCCESS') resp.set('status', 'FINISHED') resp.set('relations-found', str(len(stmts))) resp.set('dump-limit', str(DUMP_LIMIT)) return resp def respond_confirm_relation_from_literature(self, content): """Confirm a protein-protein interaction given subject, object, verb""" try: subj, obj, stmt_type = self._get_query_info(content) finder = \ self.msa.find_mechanism_from_input(subj, obj, None, stmt_type, ev_limit=5, persist=False, timeout=5) self._send_provenance_async(finder, 'confirming that some statements match') except MSALookupError as mle: return self.make_failure(mle.args[0]) stmts = finder.get_statements(timeout=20) if stmts is None: # TODO: Handle this more gracefully, if possible. return self.make_failure('MISSING_MECHANISM') num_stmts = len(stmts) self.say(finder.describe()) resp = KQMLPerformative('SUCCESS') resp.set('some-relations-found', 'TRUE' if num_stmts else 'FALSE') resp.set('num-relations-found', str(num_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) return resp def respond_get_paper_model(self, content): """Get and display the model from a paper, indicated by pmid.""" pmid_raw = content.gets('pmid') prefix = 'PMID-' if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit(): pmid = pmid_raw[len(prefix):] else: return self.make_failure('BAD_INPUT') try: stmts = get_statements_for_paper([('pmid', pmid)]) except IndraDBRestAPIError as e: if e.status_code == 404 and 'Invalid or unavailable' in e.reason: logger.error("Could not find pmid: %s" % e.reason) return self.make_failure('MISSING_MECHANISM') else: raise e if not stmts: resp = KQMLPerformative('SUCCESS') resp.set('relations-found', 0) return resp stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) unique_stmts = ac.run_preassembly(stmts, return_toplevel=True) diagrams = _make_diagrams(stmts) self.send_display_model(diagrams) resp = KQMLPerformative('SUCCESS') resp.set('relations-found', len(unique_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) return resp def send_display_model(self, diagrams): for diagram_type, resource in diagrams.items(): if not resource: continue if diagram_type == 'sbgn': content = KQMLList('display-sbgn') content.set('type', diagram_type) content.sets('graph', resource) else: content = KQMLList('display-image') content.set('type', diagram_type) content.sets('path', resource) self.tell(content) def _send_display_stmts(self, finder, nl_question): try: logger.debug("Waiting for statements to finish...") stmts = finder.get_statements(block=True) if stmts is None or not len(stmts): return start_time = datetime.now() logger.info('Sending display statements.') self.send_provenance_for_stmts(stmts, nl_question, ev_counts=finder.get_ev_totals()) logger.info("Finished sending provenance after %s seconds." % (datetime.now() - start_time).total_seconds()) except Exception as e: logger.exception(e) logger.error("Failed to post provenance.") raise
class MSA_Module(Bioagent): name = 'MSA' tasks = ['PHOSPHORYLATION-ACTIVATING', 'FIND-RELATIONS-FROM-LITERATURE', 'GET-PAPER-MODEL', 'CONFIRM-RELATION-FROM-LITERATURE', 'GET-COMMON'] signor_afs = _read_signor_afs() def __init__(self, *args, **kwargs): self.msa = MSA() super(MSA_Module, self).__init__(*args, **kwargs) return def respond_get_common(self, content): """Find the common up/down streams of a protein.""" # TODO: This entire function could be part of the MSA. if not CAN_CHECK_STATEMENTS: return self.make_failure( 'NO_KNOWLEDGE_ACCESS', 'Cannot access the database through the web api.' ) genes_cljson = content.get('genes') agents = [self.get_agent(ag) for ag in genes_cljson] if len(agents) < 2: return self.make_failure('NO_TARGET', 'Only %d < 2 agents given.' % len(agents)) direction = content.gets('up-down') logger.info("Got genes: %s and direction %s." % (agents, direction)) # Choose some parameters based on direction. if direction == 'ONT::MORE': method = 'common_upstreams' prefix = 'up' elif direction == 'ONT::SUCCESSOR': method = 'common_downstreams' prefix = 'down' else: # TODO: With the new MSA we could handle common neighbors. return self.make_failure("UNKNOWN_ACTION", direction) # Find the commonalities. try: finder = self.msa.find_mechanisms(method, *agents) except EntityError as e: return self.make_failure("MISSING_TARGET", e.args[0]) # Get post statements to provenance. if len(agents) > 2: name_list = ', '.join(ag.name for ag in agents[:-1]) + ',' else: name_list = agents[0].name name_list += ' and ' + agents[-1].name msg = ('%sstreams of ' % prefix).capitalize() + name_list self.send_provenance_for_stmts(finder.get_statements(), msg, ev_counts=finder.get_ev_totals(), source_counts=finder.get_source_counts()) # Create the reply resp = KQMLPerformative('SUCCESS') agents = finder.get_other_agents() resp.set('entities-found', self.make_cljson(agents)) resp.sets('prefix', prefix) return resp def respond_phosphorylation_activating(self, content): """Return response content to phosphorylation_activating request.""" if not CAN_CHECK_STATEMENTS: return self.make_failure( 'NO_KNOWLEDGE_ACCESS', 'Cannot access the database through the web api.' ) heading = content.head() m = re.match(r'(\w+)-(\w+)', heading) if m is None: return self.make_failure('UNKNOWN_ACTION') action, polarity = [s.lower() for s in m.groups()] target_cljson = content.get('target') if target_cljson is None or not len(target_cljson): return self.make_failure('MISSING_TARGET') agent = self.get_agent(target_cljson) # This is a potential but in the BA that we can handle here if isinstance(agent, list): agent = agent[0] logger.debug('Found agent (target): %s.' % agent.name) site = content.gets('site') if site is None: residue = None position = None else: try: residue, position = site.split('-') except: return self.make_failure('INVALID_SITE') finder = self.msa.find_phos_activeforms(agent, residue=residue, position=position, action=action, polarity=polarity) stmts = finder.get_statements() logger.info("Found %d matching statements." % len(stmts)) if not len(stmts): return self.make_failure( 'MISSING_MECHANISM', "Could not find statement matching phosphorylation activating " "%s, %s, %s, %s." % (agent.name, residue, position, 'phosphorylation') ) else: description = finder.describe(include_negative=False) # self.say(description) msg = "phosphorylation at %s%s activates %s." \ % (residue, position, agent.name) self.send_provenance_for_stmts(stmts, msg, ev_counts=finder.get_ev_totals(), source_counts=finder.get_source_counts()) msg = KQMLPerformative('SUCCESS') msg.set('is-activating', 'TRUE') msg.sets('suggestion', description) return msg def _get_query_info(self, content): subj = _get_agent_if_present(content, 'source') obj = _get_agent_if_present(content, 'target') if not subj and not obj: raise MSALookupError('MISSING_MECHANISM') kfilter_agents = content.get('filter_agents') filter_agents = Bioagent.get_agent(kfilter_agents) if kfilter_agents \ else [] stmt_type = content.gets('type') if stmt_type == 'unknown': stmt_type = None return subj, obj, stmt_type, filter_agents def _send_provenance_async(self, finder, desc): q = finder.query nl_input = {k: ag.name if ag else 'unknown' for k, ag in [('subject', q.subj), ('object', q.obj)]} nl_input['stmt_type'] = q.stmt_type fmt = ('subject={subject}, statement type={stmt_type}, ' 'object={object}') nl = fmt.format(**nl_input) nl = "%s: %s" % (desc, nl) stmts = finder.get_statements(block=False) num_stmts = 'no' if stmts is None else len(stmts) logger.info("Retrieved %s statements so far. Sending provenance in a " "thread..." % num_stmts) try: th = Thread(target=self._send_display_stmts, args=(finder, nl)) th.start() except Exception as e: logger.warning("Failed to start thread to send provenance.") logger.exception(e) return def respond_find_relations_from_literature(self, content): """Find statements matching some subject, verb, object information.""" try: subj, obj, stmt_type, filter_agents = self._get_query_info(content) finder = \ self.msa.find_mechanism_from_input(subj, obj, None, stmt_type, ev_limit=3, persist=False, timeout=5, filter_agents=filter_agents) self._send_provenance_async(finder, 'finding statements that match') except MSALookupError as mle: return self.make_failure(mle.args[0]) stmts = finder.get_statements(timeout=15) if stmts is None: # Calling this success may be a bit ambitious. resp = KQMLPerformative('SUCCESS') resp.set('status', 'WORKING') resp.set('entities-found', 'nil') resp.set('num-relations-found', '0') resp.set('dump-limit', str(DUMP_LIMIT)) return resp agents = finder.get_other_agents() \ if stmts else [] description = finder.describe(include_negative=False) \ if stmts else None #self.say(description) resp = KQMLPerformative('SUCCESS') resp.set('status', 'FINISHED') resp.set('entities-found', self.make_cljson(agents) if agents else KQMLList([])) resp.set('num-relations-found', str(len(stmts))) resp.set('dump-limit', str(DUMP_LIMIT)) resp.sets('suggestion', description if description else 'nil') top_stmts = self.make_cljson(stmts[:10]) if stmts else KQMLList([]) resp.set('top-stmts', top_stmts) return resp def respond_confirm_relation_from_literature(self, content): """Confirm a protein-protein interaction given subject, object, verb""" try: subj, obj, stmt_type, filter_agents = self._get_query_info(content) finder = \ self.msa.find_mechanism_from_input(subj, obj, None, stmt_type, ev_limit=5, persist=False, timeout=5, filter_agents=filter_agents) self._send_provenance_async(finder, 'confirming that some statements match') except MSALookupError as mle: return self.make_failure(mle.args[0]) stmts = finder.get_statements(timeout=20) if stmts is None: # TODO: Handle this more gracefully, if possible. return self.make_failure('MISSING_MECHANISM') num_stmts = len(stmts) description = finder.describe(include_negative=False) \ if stmts else None #self.say(description) resp = KQMLPerformative('SUCCESS') resp.set('some-relations-found', 'TRUE' if num_stmts else 'FALSE') resp.set('num-relations-found', str(num_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) resp.sets('suggestion', description if description else 'nil') return resp def respond_get_paper_model(self, content): """Get and display the model from a paper, indicated by pmid.""" pmid_raw = content.gets('pmid') prefix = 'PMID-' if pmid_raw.startswith(prefix) and pmid_raw[len(prefix):].isdigit(): pmid = pmid_raw[len(prefix):] else: return self.make_failure('BAD_INPUT') try: stmts = get_statements_for_paper([('pmid', pmid)], simple_response=True) except IndraDBRestAPIError as e: if e.status_code == 404 and 'Invalid or unavailable' in e.reason: logger.error("Could not find pmid: %s" % e.reason) return self.make_failure('MISSING_MECHANISM') else: raise e if not stmts: resp = KQMLPerformative('SUCCESS') resp.set('relations-found', 0) return resp stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) unique_stmts = ac.run_preassembly(stmts, return_toplevel=True) diagrams = _make_diagrams(stmts) self.send_display_model(diagrams) resp = KQMLPerformative('SUCCESS') resp.set('relations-found', len(unique_stmts)) resp.set('dump-limit', str(DUMP_LIMIT)) return resp def send_display_model(self, diagrams): for diagram_type, resource in diagrams.items(): if not resource: continue if diagram_type == 'sbgn': content = KQMLList('display-sbgn') content.set('type', diagram_type) content.sets('graph', resource) else: content = KQMLList('display-image') content.set('type', diagram_type) content.sets('path', resource) self.tell(content) def _send_display_stmts(self, finder, nl_question): try: logger.debug("Waiting for statements to finish...") stmts = finder.get_statements(block=True) if stmts is None: return start_time = datetime.now() logger.info('Sending display statements.') self.send_provenance_for_stmts(stmts, nl_question, ev_counts=finder.get_ev_totals(), source_counts=finder.get_source_counts()) logger.info("Finished sending provenance after %s seconds." % (datetime.now() - start_time).total_seconds()) except Exception as e: logger.exception(e) logger.error("Failed to post provenance.") raise
def __init__(self, *args, **kwargs): corpus_config = os.environ.get('CWC_MSA_CORPUS') self.msa = MSA(corpus_config=corpus_config) super(MSA_Module, self).__init__(*args, **kwargs) return