from indra import trips from indra.literature import id_lookup from assembly_eval import have_file, run_assembly if __name__ == '__main__': pmc_ids = ['PMC1234335', 'PMC3178447', 'PMC3690480', 'PMC4345513', 'PMC534114'] pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids] # Use the existing EKB extractions. for pmid, pmcid in zip(pmids, pmc_ids): folder = 'trips' prefix = folder + '/' + pmcid print 'Processing %s...' % pmcid tp = trips.process_xml(open(prefix + '-20160503T1152.ekb').read()) # PMIDs from TRIPS need to be set here because it propagates # the PMCID by default for s in tp.statements: for e in s.evidence: e.pmid = pmid run_assembly(tp.statements, folder, pmcid)
# Load the REACH reading output with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f: reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print('Processing %s...' % pmcid) # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print("No REACH statements for %s" % pmcid) # Get prior statements rasmodel_stmts = rasmodel.get_statements() # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid for stmt in all_statements: stmt.uuid = str(uuid.uuid4()) # Run assembly run_assembly(all_statements, 'combined', pmcid, background_assertions=rasmodel_stmts)
from indra import trips, reach from indra.literature import id_lookup from assembly_eval import have_file, run_assembly if __name__ == "__main__": pmc_ids = ["PMC1234335", "PMC3178447", "PMC3690480", "PMC4345513", "PMC534114"] pmids = [id_lookup(pmcid)["pmid"] for pmcid in pmc_ids] for pmid, pmcid in zip(pmids, pmc_ids): print "Processing %s..." % pmcid trips_fname = "trips/" + pmcid + "-20160503T1152.ekb" tp = trips.process_xml(open(trips_fname).read()) for s in tp.statements: for e in s.evidence: e.pmid = pmid reach_fname = "reach/" + pmcid + ".json" rp = reach.process_json_file(reach_fname) all_statements = tp.statements + rp.statements run_assembly(all_statements, "combined", pmcid)
import sys import csv import shutil import pickle from indra.sources import reach from indra.util import read_unicode_csv from indra.literature import pmc_client, get_full_text, id_lookup from assembly_eval import have_file, run_assembly if __name__ == '__main__': # This script assumes that the papers have been processed offline, # e.g., using the submit_reading_pipeline.py script on Amazon, # and the results placed in a dict (mapping PMID -> lists of statements) # and put in the folder reach/reach_stmts_batch_4_eval.pkl. folder = 'reach' # Load the PMID to PMCID map pmid_to_pmcid = {} csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t') for row in csvreader: pmid_to_pmcid[row[1]] = row[0] # Load the REACH reading output with open(os.path.join(folder, 'reach_stmts_batch_4_eval.pkl'), 'rb') as f: stmts = pickle.load(f) # Iterate over all of the PMIDs for pmid, stmts in stmts.items(): pmcid = pmid_to_pmcid[pmid] run_assembly(stmts, folder, pmcid)
if __name__ == '__main__': pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()] # Load the REACH reading output with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f: reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print('Processing %s...' % pmcid) # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print("No REACH statements for %s" % pmcid) # Get prior statements rasmodel_stmts = rasmodel.get_statements() # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid # Run assembly run_assembly(all_statements, 'combined', pmcid, background_assertions=rasmodel_stmts)
reach_stmts = pickle.load(f) # Load the PMID to PMCID map pmcid_to_pmid = {} with open('pmc_batch_4_id_map.txt') as f: csvreader = csv.reader(f, delimiter='\t') for row in csvreader: pmcid_to_pmid[row[0]] = row[1] for pmcid in pmc_ids: print 'Processing %s...' % pmcid # Process TRIPS trips_fname = 'trips/' + pmcid + '.ekb' tp = trips.process_xml(open(trips_fname).read()) # Get REACH statements reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], []) if not reach_stmts_for_pmcid: print "No REACH statements for %s" % pmcid # Get NACTEM/ISI statements fname = 'nactem/' + pmcid + '.cards' if not os.path.exists(fname): nactem_stmts = [] else: icp = index_cards.process_json_file(fname, 'nactem') nactem_stmts = icp.statements # Combine all statements all_statements = tp.statements + reach_stmts_for_pmcid + nactem_stmts # Run assembly run_assembly(all_statements, 'combined', pmcid)
from __future__ import absolute_import, print_function, unicode_literals from builtins import dict, str from indra.sources import trips from assembly_eval import have_file, run_assembly if __name__ == '__main__': pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()] # Use the existing EKB extractions. for pmcid in pmc_ids: folder = 'trips' prefix = folder + '/' + pmcid print('Processing %s...' % pmcid) with open(prefix + '.ekb', 'r') as f: tp = trips.process_xml(f.read()) # PMIDs from TRIPS need to be set here because it propagates # the PMCID by default run_assembly(tp.statements, folder, pmcid)