Exemplo n.º 1
0
from indra import trips
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    pmc_ids = ['PMC1234335', 'PMC3178447', 'PMC3690480',
               'PMC4345513', 'PMC534114']
    pmids = [id_lookup(pmcid)['pmid'] for pmcid in pmc_ids]
    # Use the existing EKB extractions.
    for pmid, pmcid in zip(pmids, pmc_ids):
        folder = 'trips'
        prefix = folder + '/' + pmcid
        print 'Processing %s...' % pmcid
        tp = trips.process_xml(open(prefix + '-20160503T1152.ekb').read())
        # PMIDs from TRIPS need to be set here because it propagates
        # the PMCID by default
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        run_assembly(tp.statements, folder, pmcid)
Exemplo n.º 2
0
    # Load the REACH reading output
    with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f:
        reach_stmts = pickle.load(f)

    # Load the PMID to PMCID map
    pmcid_to_pmid = {}
    csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t')
    for row in csvreader:
        pmcid_to_pmid[row[0]] = row[1]

    for pmcid in pmc_ids:
        print('Processing %s...' % pmcid)
        # Process TRIPS
        trips_fname = 'trips/' + pmcid + '.ekb'
        tp = trips.process_xml(open(trips_fname).read())
        # Get REACH statements
        reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], [])
        if not reach_stmts_for_pmcid:
            print("No REACH statements for %s" % pmcid)
        # Get prior statements
        rasmodel_stmts = rasmodel.get_statements()
        # Combine all statements
        all_statements = tp.statements + reach_stmts_for_pmcid
        for stmt in all_statements:
            stmt.uuid = str(uuid.uuid4())
        # Run assembly
        run_assembly(all_statements,
                     'combined',
                     pmcid,
                     background_assertions=rasmodel_stmts)
Exemplo n.º 3
0
from indra import trips, reach
from indra.literature import id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == "__main__":
    pmc_ids = ["PMC1234335", "PMC3178447", "PMC3690480", "PMC4345513", "PMC534114"]
    pmids = [id_lookup(pmcid)["pmid"] for pmcid in pmc_ids]

    for pmid, pmcid in zip(pmids, pmc_ids):
        print "Processing %s..." % pmcid
        trips_fname = "trips/" + pmcid + "-20160503T1152.ekb"
        tp = trips.process_xml(open(trips_fname).read())
        for s in tp.statements:
            for e in s.evidence:
                e.pmid = pmid
        reach_fname = "reach/" + pmcid + ".json"
        rp = reach.process_json_file(reach_fname)
        all_statements = tp.statements + rp.statements
        run_assembly(all_statements, "combined", pmcid)
Exemplo n.º 4
0
import sys
import csv
import shutil
import pickle
from indra.sources import reach
from indra.util import read_unicode_csv
from indra.literature import pmc_client, get_full_text, id_lookup
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    # This script assumes that the papers have been processed offline,
    # e.g., using the submit_reading_pipeline.py script on Amazon,
    # and the results placed in a dict (mapping PMID -> lists of statements)
    # and put in the folder reach/reach_stmts_batch_4_eval.pkl.
    folder = 'reach'

    # Load the PMID to PMCID map
    pmid_to_pmcid = {}
    csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t')
    for row in csvreader:
        pmid_to_pmcid[row[1]] = row[0]

    # Load the REACH reading output
    with open(os.path.join(folder, 'reach_stmts_batch_4_eval.pkl'), 'rb') as f:
        stmts = pickle.load(f)

    # Iterate over all of the PMIDs
    for pmid, stmts in stmts.items():
        pmcid = pmid_to_pmcid[pmid]
        run_assembly(stmts, folder, pmcid)
Exemplo n.º 5
0
if __name__ == '__main__':
    pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()]

    # Load the REACH reading output
    with open('reach/reach_stmts_batch_4_eval.pkl', 'rb') as f:
        reach_stmts = pickle.load(f)

    # Load the PMID to PMCID map
    pmcid_to_pmid = {}
    csvreader = read_unicode_csv('pmc_batch_4_id_map.txt', delimiter='\t')
    for row in csvreader:
        pmcid_to_pmid[row[0]] = row[1]

    for pmcid in pmc_ids:
        print('Processing %s...' % pmcid)
        # Process TRIPS
        trips_fname = 'trips/' + pmcid + '.ekb'
        tp = trips.process_xml(open(trips_fname).read())
        # Get REACH statements
        reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], [])
        if not reach_stmts_for_pmcid:
            print("No REACH statements for %s" % pmcid)
        # Get prior statements
        rasmodel_stmts = rasmodel.get_statements()
        # Combine all statements
        all_statements = tp.statements + reach_stmts_for_pmcid
        # Run assembly
        run_assembly(all_statements, 'combined', pmcid,
                     background_assertions=rasmodel_stmts)
Exemplo n.º 6
0
        reach_stmts = pickle.load(f)

    # Load the PMID to PMCID map
    pmcid_to_pmid = {}
    with open('pmc_batch_4_id_map.txt') as f:
        csvreader = csv.reader(f, delimiter='\t')
        for row in csvreader:
            pmcid_to_pmid[row[0]] = row[1]

    for pmcid in pmc_ids:
        print 'Processing %s...' % pmcid
        # Process TRIPS
        trips_fname = 'trips/' + pmcid + '.ekb'
        tp = trips.process_xml(open(trips_fname).read())
        # Get REACH statements
        reach_stmts_for_pmcid = reach_stmts.get(pmcid_to_pmid[pmcid], [])
        if not reach_stmts_for_pmcid:
            print "No REACH statements for %s" % pmcid
        # Get NACTEM/ISI statements
        fname = 'nactem/' + pmcid + '.cards'
        if not os.path.exists(fname):
            nactem_stmts = []
        else:
            icp = index_cards.process_json_file(fname, 'nactem')
            nactem_stmts = icp.statements

        # Combine all statements
        all_statements = tp.statements + reach_stmts_for_pmcid + nactem_stmts
        # Run assembly
        run_assembly(all_statements, 'combined', pmcid)
Exemplo n.º 7
0
from __future__ import absolute_import, print_function, unicode_literals
from builtins import dict, str
from indra.sources import trips
from assembly_eval import have_file, run_assembly

if __name__ == '__main__':
    pmc_ids = [s.strip() for s in open('pmcids.txt', 'rt').readlines()]
    # Use the existing EKB extractions.
    for pmcid in pmc_ids:
        folder = 'trips'
        prefix = folder + '/' + pmcid
        print('Processing %s...' % pmcid)
        with open(prefix + '.ekb', 'r') as f:
            tp = trips.process_xml(f.read())
        # PMIDs from TRIPS need to be set here because it propagates
        # the PMCID by default
        run_assembly(tp.statements, folder, pmcid)