def process_paper(model_name, pmid): json_path = os.path.join(model_path, model_name, 'jsons', 'PMID%s.json' % pmid) if pmid.startswith('api') or pmid.startswith('PMID'): logger.warning('Invalid PMID: %s' % pmid) # If the paper has been read, use the json output file if os.path.exists(json_path): rp = reach.process_json_file(json_path, citation=pmid) txt_format = 'existing_json' # If the paper has not been read, download the text and read else: txt, txt_format = get_full_text(pmid, 'pmid') if txt_format == 'pmc_oa_xml': rp = reach.process_nxml_str(txt, citation=pmid, offline=True) if os.path.exists('reach_output.json'): shutil.move('reach_output.json', json_path) elif txt_format == 'elsevier_xml': # Extract the raw text from the Elsevier XML txt = elsevier_client.extract_text(txt) rp = reach.process_text(txt, citation=pmid, offline=True) if os.path.exists('reach_output.json'): shutil.move('reach_output.json', json_path) elif txt_format == 'abstract': rp = reach.process_text(txt, citation=pmid, offline=True) if os.path.exists('reach_output.json'): shutil.move('reach_output.json', json_path) else: rp = None if rp is not None: check_pmids(rp.statements) return rp, txt_format
def test_phosphorylate(): for offline in offline_modes: rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline) assert (len(rp.statements) == 1) s = rp.statements[0] assert (s.enz.name == 'MAP2K1') assert (s.sub.name == 'MAPK1') assert unicode_strs(rp.statements)
def test_activate(): for offline in offline_modes: rp = reach.process_text('HRAS activates BRAF.', offline=offline) assert (len(rp.statements) == 1) s = rp.statements[0] assert (s.subj.name == 'HRAS') assert (s.obj.name == 'BRAF') assert unicode_strs(rp.statements)
def test_activate(): for offline in offline_modes: rp = reach.process_text('HRAS activates BRAF.', offline=offline) assert(len(rp.statements) == 1) s = rp.statements[0] assert (s.subj.name == 'HRAS') assert (s.obj.name == 'BRAF') assert unicode_strs(rp.statements)
def test_phosphorylate(): for offline in offline_modes: rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline) assert(len(rp.statements) == 1) s = rp.statements[0] assert (s.enz.name == 'MAP2K1') assert (s.sub.name == 'MAPK1') assert unicode_strs(rp.statements)
def test_regulate_amount(): for offline in offline_modes: rp = reach.process_text('ERK increases the transcription of DUSP.', offline=offline) assert (len(rp.statements) == 1) s = rp.statements[0] assert (isinstance(s, IncreaseAmount)) assert (s.subj.name == 'ERK') assert (s.obj.name == 'DUSP') assert unicode_strs(rp.statements) rp = reach.process_text('ERK decreases the amount of DUSP.', offline=offline) assert (len(rp.statements) == 1) s = rp.statements[0] assert (isinstance(s, DecreaseAmount)) assert (s.subj.name == 'ERK') assert (s.obj.name == 'DUSP') assert unicode_strs(rp.statements)
def test_be_grounding(): for offline in offline_modes: rp = reach.process_text('MEK activates ERK.', offline=offline) assert (len(rp.statements) == 1) assert unicode_strs(rp.statements) if offline == True: st = rp.statements[0] assert (st.subj.db_refs.get('BE') == 'MEK') assert (st.obj.db_refs.get('BE') == 'ERK')
def test_mutation(): rp = reach.process_text('BRAF(V600E) phosphorylates MEK.') assert(len(rp.statements) == 1) braf = rp.statements[0].enz assert(braf.name == 'BRAF') assert(len(braf.mutations) == 1) assert(braf.mutations[0].position == '600') assert(braf.mutations[0].residue_from == 'V') assert(braf.mutations[0].residue_to == 'E')
def test_be_grounding(): for offline in offline_modes: rp = reach.process_text('MEK activates ERK.', offline=offline) assert(len(rp.statements) == 1) assert unicode_strs(rp.statements) if offline == True: st = rp.statements[0] assert(st.subj.db_refs.get('BE') == 'MEK') assert(st.obj.db_refs.get('BE') == 'ERK')
def process_reach(txt): print('Using REACH') ts = time.time() rp = reach.process_text(txt, offline=False) for s in rp.statements: print('%s\t%s' % (s, s.evidence[0].text)) te = time.time() print('Time taken: %.2fs' % (te - ts)) return rp.statements
def test_regulate_amount(): for offline in offline_modes: rp = reach.process_text('ERK increases the transcription of DUSP.', offline=offline) assert(len(rp.statements) == 1) s = rp.statements[0] assert(isinstance(s, IncreaseAmount)) assert (s.subj.name == 'ERK') assert (s.obj.name == 'DUSP') assert unicode_strs(rp.statements) rp = reach.process_text('ERK decreases the amount of DUSP.', offline=offline) assert(len(rp.statements) == 1) s = rp.statements[0] assert(isinstance(s, DecreaseAmount)) assert (s.subj.name == 'ERK') assert (s.obj.name == 'DUSP') assert unicode_strs(rp.statements)
def test_mutation(): for offline in offline_modes: rp = reach.process_text('BRAF(V600E) phosphorylates MEK.', offline=offline) assert (len(rp.statements) == 1) braf = rp.statements[0].enz assert (braf.name == 'BRAF') assert (len(braf.mutations) == 1) assert (braf.mutations[0].position == '600') assert (braf.mutations[0].residue_from == 'V') assert (braf.mutations[0].residue_to == 'E') assert unicode_strs(rp.statements)
def test_mutation(): for offline in offline_modes: rp = reach.process_text('BRAF(V600E) phosphorylates MEK.', offline=offline) assert(len(rp.statements) == 1) braf = rp.statements[0].enz assert(braf.name == 'BRAF') assert(len(braf.mutations) == 1) assert(braf.mutations[0].position == '600') assert(braf.mutations[0].residue_from == 'V') assert(braf.mutations[0].residue_to == 'E') assert unicode_strs(rp.statements)
def reach_process_text(): """Process text with REACH and return INDRA Statements.""" response = request.body.read().decode('utf-8') body = json.loads(response) text = body.get('text') rp = reach.process_text(text) if rp and rp.statements: stmts = stmts_to_json(rp.statements) res = {'statements': stmts} return res else: res = {'statements': []} return res
def test_hgnc_from_up(): for offline in offline_modes: rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline) assert len(rp.statements) == 1 st = rp.statements[0] (map2k1, mapk1) = st.agent_list() assert map2k1.name == 'MAP2K1' assert map2k1.db_refs['HGNC'] == '6840' assert map2k1.db_refs['UP'] == 'Q02750' assert mapk1.name == 'MAPK1' assert mapk1.db_refs['HGNC'] == '6871' assert mapk1.db_refs['UP'] == 'P28482' assert unicode_strs(rp.statements)
def test_multiple_enzymes(): for offline in offline_modes: rp = reach.process_text('MEK1 and MEK2 phosphorylate ERK1.', offline=offline) assert(len(rp.statements) == 2) s = rp.statements[0] if s.enz.name == 'MAP2K1': assert(rp.statements[1].enz.name == 'MAP2K2') else: assert(rp.statements[1].enz.name == 'MAP2K1') assert (s.sub.name == 'MAPK3') s = rp.statements[1] assert (s.sub.name == 'MAPK3') assert unicode_strs(rp.statements)
def test_multiple_enzymes(): for offline in offline_modes: rp = reach.process_text('MEK1 and MEK2 phosphorylate ERK1.', offline=offline) assert (len(rp.statements) == 2) s = rp.statements[0] if s.enz.name == 'MAP2K1': assert (rp.statements[1].enz.name == 'MAP2K2') else: assert (rp.statements[1].enz.name == 'MAP2K1') assert (s.sub.name == 'MAPK3') s = rp.statements[1] assert (s.sub.name == 'MAPK3') assert unicode_strs(rp.statements)
def test_process_unicode(): for offline in offline_modes: rp = reach.process_text('MEK1 binds ERK2\U0001F4A9.', offline=offline) assert unicode_strs(rp.statements)
rerun = False # Download the papers if they are not available yet for pmcid in pmc_ids: prefix = folder + "/" + pmcid if not have_file(prefix + ".nxml") and not have_file(prefix + ".txt"): txt, txt_format = get_full_text(pmcid) if txt_format == "nxml": fname = prefix + ".nxml" else: fname = prefix + ".txt" with open(fname, "wt") as fh: fh.write(txt.encode("utf-8")) # Read each paper if it hasn't been read yet. # Otherwise use the existing json extractions. for pmcid, pmid in zip(pmc_ids, pmids): prefix = folder + "/" + pmcid print "Processing %s..." % pmcid # If REACH already processed it then don't run it again if rerun or not have_file(prefix + ".json"): if have_file(prefix + ".txt"): txt = open(prefix + ".txt").read().decode("utf-8") rp = reach.process_text(txt, citation=pmid, offline=True) elif have_file(prefix + ".nxml"): rp = reach.process_nxml_file(prefix + ".nxml", citation=pmid, offline=True) shutil.move("reach_output.json", prefix + ".json") else: rp = reach.process_json_file(prefix + ".json", citation=pmid) run_assembly(rp.statements, folder, pmcid)
import sys import pickle from indra import reach from indra.assemblers import GraphAssembler txt = open('extension.txt', 'rt').read() rp = reach.process_text(txt, offline=True) st = rp.statements for s in st: print '%s\t%s' % (s, s.evidence[0].text) with open('extension.pkl', 'wb') as fh: pickle.dump(st, fh) graphpr = {'rankdir': 'TD'} nodepr = {'fontsize': 12, 'shape': 'plaintext', 'margin': '0,0', 'pad': 0} ga = GraphAssembler(st, graph_properties=graphpr, node_properties=nodepr) ga.make_model() ga.save_dot('jnk_extension.dot') ga.save_pdf('jnk_extension.pdf')
def test_phosphorylate(): rp = reach.process_text('MEK1 phosphorylates ERK2.') assert(len(rp.statements) == 1) s = rp.statements[0] assert (s.enz.name == 'MAP2K1') assert (s.sub.name == 'MAPK1')
def test_activity(): for offline in offline_modes: rp = reach.process_text('MEK1 activates ERK2.', offline=offline) assert(len(rp.statements) == 1) assert unicode_strs(rp.statements)
def test_activate(): rp = reach.process_text('HRAS activates BRAF.') assert(len(rp.statements) == 1) s = rp.statements[0] assert (s.subj.name == 'HRAS') assert (s.obj.name == 'BRAF')
def test_bind(): rp = reach.process_text('MEK1 binds ERK2.') assert(len(rp.statements) == 1)
def test_activity(): rp = reach.process_text('MEK1 activates ERK2.') assert(len(rp.statements) == 1)
def test_activity(): for offline in offline_modes: rp = reach.process_text('MEK1 activates ERK2.', offline=offline) assert (len(rp.statements) == 1) assert unicode_strs(rp.statements)
for pmcid in pmc_ids: prefix = folder + '/' + pmcid if not have_file(prefix + '.nxml') and\ not have_file(prefix + '.txt'): txt, txt_format = get_full_text(pmcid) if txt_format == 'nxml': fname = prefix + '.nxml' else: fname = prefix + '.txt' with open(fname, 'wt') as fh: fh.write(txt.encode('utf-8')) pmids.append(id_lookup(pmcid)['pmid']) # Read each paper if it hasn't been read yet. # Otherwise use the existing json extractions. for pmcid, pmid in zip(pmc_ids, pmids): prefix = folder + '/' + pmcid print 'Processing %s...' % pmcid # If REACH already processed it then don't run it again if rerun or not have_file(prefix + '.json'): if have_file(prefix + '.txt'): txt = open(prefix + '.txt').read().decode('utf-8') rp = reach.process_text(txt, citation=pmid) elif have_file(prefix + '.nxml'): rp = reach.process_nxml_file(prefix + '.nxml', citation=pmid) shutil.move('reach_output.json', prefix + '.json') else: rp = reach.process_json_file(prefix + '.json', citation=pmid) run_assembly(rp.statements, folder, pmcid)