def process_sentence_xml(sentence): fname = re.sub('[^a-zA-Z0-9]', '_', sentence[:-1]) + '.ekb' path = os.path.join(path_this, 'trips_ekbs', fname) with open(path, 'rb') as fh: xml = fh.read().decode('utf-8') tp = trips.process_xml(xml) return tp
def get_file_stmts(fname): with open(fname, 'rt') as fh: xml_str = fh.read() tp = trips.process_xml(xml_str) if tp is None: return [] return tp.statements
def trips_process_xml(): """Process TRIPS EKB XML and return INDRA Statements.""" response = request.body.read().decode('utf-8') body = json.loads(response) xml_str = body.get('xml_str') tp = trips.process_xml(xml_str) if tp and tp.statements: stmts = stmts_to_json(tp.statements) res = {'statements': stmts} return res else: res = {'statements': []} return res
def process_trips(txt, reread=True): print('Using TRIPS') ts = time.time() if reread: stmts = [] sentences = txt.strip().split('\n') for sentence in sentences: print(sentence) tp = trips.process_text(sentence) stmts += tp.statements else: tp = trips.process_xml(open('trips_output.xml', 'r').read()) stmts = tp.statements te = time.time() print('Time taken: %.2fs' % (te - ts)) for st in stmts: print('%s\t%s' % (st, st.evidence[0].text)) return stmts
def assemble_model(model_name, reread=False): xml_fname = model_name + '.xml' if not reread: print('Processing %s' % xml_fname) if os.path.exists(xml_fname): with open(xml_fname, 'rb') as fh: tp = trips.process_xml(fh.read()) else: reread = True if reread: fname = model_name + '.txt' print('Reading %s' % fname) with open(fname, 'rb') as fh: ts = time.time() tp = trips.process_text(fh.read(), xml_fname) te = time.time() print('Reading took %.2fs' % (te - ts)) print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) print('----------------------') pa = PysbAssembler() pa.add_statements(tp.statements) ts = time.time() model = pa.make_model() te = time.time() print('Assembly took %.2fs' % (te - ts)) model.name = model_name add_observable(model) set_parameters(model) # Save and return model pa.model = model pa.save_model('%s.py' % model_name) return model
from __future__ import absolute_import, print_function, unicode_literals from builtins import dict, str from indra import trips from assembly_eval import have_file, run_assembly if __name__ == '__main__': pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()] # Use the existing EKB extractions. for pmcid in pmc_ids: folder = 'trips' prefix = folder + '/' + pmcid print('Processing %s...' % pmcid) with open(prefix + '.ekb', 'r') as f: tp = trips.process_xml(f.read()) # PMIDs from TRIPS need to be set here because it propagates # the PMCID by default run_assembly(tp.statements, folder, pmcid)
# Load the REACH reading output with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f: reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print('Processing %s...' % pmcid) # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print("No REACH statements for %s" % pmcid) # Get prior statements rasmodel_stmts = rasmodel.get_statements() # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid for stmt in all_statements: stmt.uuid = str(uuid.uuid4()) # Run assembly run_assembly(all_statements, 'combined', pmcid, background_assertions=rasmodel_stmts)
def assemble_model(model_id, reread=False): model_name = 'model%d' % model_id # If model has already been read, just process the EKB XML if os.path.exists(model_name + '.xml') and not reread: tp = trips.process_xml(open(model_name + '.xml').read()) else: # Start with the basic model model_txt = open('model1.txt').read() # Apply patches one by one to get to the current model text for j in range(1, model_id): patch_txt = open('model%d_from%d.txt' % (j + 1, j)).read() model_txt = apply_patch(model_txt, patch_txt) print('Reading model %d text:' % model_id) print(model_txt) # Process model text and save result EKB XML tp = trips.process_text(model_txt, model_name + '.xml') print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) # Assemble the PySB model pa = PysbAssembler() pa.add_statements(tp.statements) model = pa.make_model(policies='two_step') # Set initial conditions erk = model.monomers['ERK'] obs = Observable(b'ERK_p', erk(phospho='p')) model.add_component(obs) vem = model.monomers['VEMURAFENIB'] obs = Observable(b'Vem_free', vem(map3k=None)) model.add_component(obs) ras = model.monomers['RAS'] obs = Observable(b'RAS_active', ras(gtp=ANY)) model.add_component(obs) braf = model.monomers['BRAF'] obs = Observable(b'BRAF_active', braf(vemurafenib=None)) model.add_component(obs) model.parameters[b'BRAF_0'].value = 0 egf = model.monomers['EGF'] obs = Observable(b'EGF_free', egf(erbb=None)) model.add_component(obs) # Add mutated form of BRAF as initial condition sites_dict = {} for site in braf.sites: if site in braf.site_states: sites_dict[site] = braf.site_states[site][0] else: sites_dict[site] = None sites_dict['V600'] = 'E' model.add_component(Parameter('BRAF_mut_0', 1e5)) model.initial(braf(**sites_dict), model.parameters['BRAF_mut_0']) # Set up model parameters model.parameters['kf_ee_bind_1'].value = 1 model.parameters['kr_ee_bind_1'].value = 0.1 model.parameters['kf_ee_bind_2'].value = 1 model.parameters['kr_ee_bind_2'].value = 0.1 model.parameters['kf_eg_bind_1'].value = 1 model.parameters['kr_eg_bind_1'].value = 0.1 model.parameters['kf_gs_bind_1'].value = 1 model.parameters['kr_gs_bind_1'].value = 0.1 model.parameters['kf_sr_bind_1'].value = 1 model.parameters['kr_sr_bind_1'].value = 50 model.parameters['kf_rg_bind_1'].value = 50 model.parameters['kr_rg_bind_1'].value = 0.5 model.parameters['kf_rb_bind_1'].value = 1 model.parameters['kr_rb_bind_1'].value = 0.5 model.parameters['kf_vb_bind_1'].value = 10 model.parameters['kr_vb_bind_1'].value = 1 model.parameters['kf_bm_bind_1'].value = 1 model.parameters['kr_bm_bind_1'].value = 0.1 model.parameters['kc_bm_phosphorylation_1'].value = 3 model.parameters['kf_pm_bind_1'].value = 1 model.parameters['kr_pm_bind_1'].value = 0.001 model.parameters['kc_pm_dephosphorylation_1'].value = 10 model.parameters['kf_me_bind_1'].value = 1 model.parameters['kr_me_bind_1'].value = 0.1 model.parameters['kc_me_phosphorylation_1'].value = 10 model.parameters['kf_de_bind_1'].value = 1 model.parameters['kr_de_bind_1'].value = 0.001 model.parameters['kc_de_dephosphorylation_1'].value = 10 model.parameters['VEMURAFENIB_0'].value = 0 model.parameters['EGF_0'].value = 1e3 model.parameters['EGFR_0'].value = 1e5 model.parameters['SOS_0'].value = 1e3 model.parameters['GRB2_0'].value = 1e5 model.parameters['RAS_0'].value = 2e5 model.parameters['GTP_0'].value = 1e7 model.parameters['MEK_0'].value = 1e5 model.parameters['ERK_0'].value = 1e5 model.parameters['DUSP6_0'].value = 1e3 model.parameters['PPP2CA_0'].value = 1e5 if model_id >= 2: model.parameters['Phosphatase_0'].value = 1e2 model.parameters['kf_es_bind_1'].value = 1e-05 model.parameters['kr_es_bind_1'].value = 1e-04 model.parameters['kc_es_phosphorylation_1'].value = 1 model.parameters['kf_ps_bind_1'].value = 1 model.parameters['kr_ps_bind_1'].value = 0.1 model.parameters['kc_ps_dephosphorylation_1'].value = 1e-04 if model_id >= 3: model.parameters['kf_bb_bind_1'].value = 10 model.parameters['kr_bb_bind_1'].value = 1 model.parameters['kf_vb_bind_2'].value = 1e-04 pa.model = model pa.save_model('model%d.py' % model_id) return model
from indra import trips from indra.literature import id_lookup from assembly_eval import have_file, run_assembly if __name__ == '__main__': pmc_ids = ['PMC1234335', 'PMC3178447', 'PMC3690480', 'PMC4345513', 'PMC534114'] pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids] # Use the existing EKB extractions. for pmid, pmcid in zip(pmids, pmc_ids): folder = 'trips' prefix = folder + '/' + pmcid print 'Processing %s...' % pmcid tp = trips.process_xml(open(prefix + '-20160503T1152.ekb').read()) # PMIDs from TRIPS need to be set here because it propagates # the PMCID by default for s in tp.statements: for e in s.evidence: e.pmid = pmid run_assembly(tp.statements, folder, pmcid)
from indra import trips, reach from indra.literature import id_lookup from assembly_eval import have_file, run_assembly if __name__ == "__main__": pmc_ids = ["PMC1234335", "PMC3178447", "PMC3690480", "PMC4345513", "PMC534114"] pmids = [id_lookup(pmcid)["pmid"] for pmcid in pmc_ids] for pmid, pmcid in zip(pmids, pmc_ids): print "Processing %s..." % pmcid trips_fname = "trips/" + pmcid + "-20160503T1152.ekb" tp = trips.process_xml(open(trips_fname).read()) for s in tp.statements: for e in s.evidence: e.pmid = pmid reach_fname = "reach/" + pmcid + ".json" rp = reach.process_json_file(reach_fname) all_statements = tp.statements + rp.statements run_assembly(all_statements, "combined", pmcid)
def assemble_model(model_name, reread=False): xml_fname = model_name + '.xml' if not reread: print('Processing %s' % xml_fname) if os.path.exists(xml_fname): with open(xml_fname, 'rb') as fh: tp = trips.process_xml(fh.read()) else: reread = True if reread: fname = model_name + '.txt' print('Reading %s' % fname) with open(fname, 'rb') as fh: tp = trips.process_text(fh.read(), xml_fname) print('Assembling statements:') for i, st in enumerate(tp.statements): print('%d: %s' % (i, st)) print('----------------------') pa = PysbAssembler() pa.add_statements(tp.statements) model = pa.make_model() model.name = model_name p53 = model.monomers['TP53'] obs = Observable(b'p53_active', p53(activity='active')) model.add_component(obs) if not model_name.endswith('var'): model.parameters['kf_aa_act_1'].value = 5e-06 model.parameters['kf_pt_act_1'].value = 1e-05 if model_name == 'p53_ATM': model.add_component(Parameter('ATMa_0', 1)) atm = model.monomers['ATM'] model.initial(atm(activity='active'), model.parameters['ATMa_0']) model.parameters['kf_pa_act_1'].value = 1e-04 obs = Observable(b'atm_active', atm(activity='active')) model.add_component(obs) if model_name == 'p53_ATR': model.add_component(Parameter('ATRa_0', 1)) atr = model.monomers['ATR'] model.initial(atr(activity='active'), model.parameters['ATRa_0']) obs = Observable(b'atr_active', atr(activity='active')) model.add_component(obs) if model_name == 'p53_ATM_var': #model.add_component(Parameter('ATMa_0', 1)) #atm = model.monomers['ATM'] #model.initial(atm(activity='active'), # model.parameters['ATMa_0']) model.add_component(Parameter('ATMa_0', 1)) atm = model.monomers['ATM'] model.initial(atm(phospho='p'), model.parameters['ATMa_0']) model.parameters['kf_pa_dephosphorylation_1'].value = 1e-04 model.parameters['MDM2_0'].value = 0 model.parameters['kf_m_deg_1'].value = 8e-01 model.parameters['kf_tm_synth_1'].value = 0.2 model.parameters['kf_aa_phosphorylation_1'].value = 5e-06 obs = Observable(b'atm_active', atm(phospho='p')) model.add_component(obs) pa.model = model pa.save_model('%s.py' % model_name) return model
from indra import trips from indra.literature import id_lookup from assembly_eval import have_file, run_assembly if __name__ == '__main__': pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()] pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids] # Use the existing EKB extractions. for pmid, pmcid in zip(pmids, pmc_ids): folder = 'trips' prefix = folder + '/' + pmcid print 'Processing %s...' % pmcid tp = trips.process_xml(open(prefix + '_20160614.ekb').read()) # PMIDs from TRIPS need to be set here because it propagates # the PMCID by default for s in tp.statements: for e in s.evidence: e.pmid = pmid run_assembly(tp.statements, folder, pmcid)
def test_trips_processor_offline(): """Smoke test to see if imports and executes without error. Doesn't check for correctness of parse or of assembled model.""" tp = trips.process_xml(open(test_small_file).read())
db_refs_str = ', '.join(db_refs) ev_txt = (s.evidence[0].text).encode('utf-8') fh.write('%s\t%s\t%s\t%s\n' % (s, db_refs_str, 'PMC'+s.evidence[0].pmid, ev_txt)) if __name__ == '__main__': fnames = glob.glob('*.ekb') pa = Preassembler(eh, mh) for fn in fnames: print '\n\n----------------------------' print 'Processing %s...' % fn xml_str = open(fn, 'rt').read() tp = trips.process_xml(xml_str) print 'Extracted events by type' print '------------------------' for k,v in tp.extracted_events.iteritems(): print k, len(v) print '------------------------' print '%s statements collected.' % len(tp.statements) pa.add_statements(tp.statements) print '----------------------------\n\n' print '%d statements collected in total.' % len(pa.stmts) duplicate_stmts = pa.combine_duplicates() print '%d statements after combining duplicates.' % len(duplicate_stmts) related_stmts = pa.combine_related() print '%d statements after combining related.' % len(related_stmts)