Python process_text示例，indra.reach.process_text Python示例

示例#1

0

显示文件

def process_paper(model_name, pmid):
    json_path = os.path.join(model_path, model_name, 'jsons',
                             'PMID%s.json' % pmid)

    if pmid.startswith('api') or pmid.startswith('PMID'):
        logger.warning('Invalid PMID: %s' % pmid)
    # If the paper has been read, use the json output file
    if os.path.exists(json_path):
        rp = reach.process_json_file(json_path, citation=pmid)
        txt_format = 'existing_json'
    # If the paper has not been read, download the text and read
    else:
        txt, txt_format = get_full_text(pmid, 'pmid')
        if txt_format == 'pmc_oa_xml':
            rp = reach.process_nxml_str(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'elsevier_xml':
            # Extract the raw text from the Elsevier XML
            txt = elsevier_client.extract_text(txt)
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'abstract':
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        else:
            rp = None
    if rp is not None:
        check_pmids(rp.statements)
    return rp, txt_format

示例#2

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_phosphorylate():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline)
        assert (len(rp.statements) == 1)
        s = rp.statements[0]
        assert (s.enz.name == 'MAP2K1')
        assert (s.sub.name == 'MAPK1')
        assert unicode_strs(rp.statements)

示例#3

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_activate():
    for offline in offline_modes:
        rp = reach.process_text('HRAS activates BRAF.', offline=offline)
        assert (len(rp.statements) == 1)
        s = rp.statements[0]
        assert (s.subj.name == 'HRAS')
        assert (s.obj.name == 'BRAF')
        assert unicode_strs(rp.statements)

示例#4

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_activate():
    for offline in offline_modes:
        rp = reach.process_text('HRAS activates BRAF.', offline=offline)
        assert(len(rp.statements) == 1)
        s = rp.statements[0]
        assert (s.subj.name == 'HRAS')
        assert (s.obj.name == 'BRAF')
        assert unicode_strs(rp.statements)

示例#5

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_phosphorylate():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline)
        assert(len(rp.statements) == 1)
        s = rp.statements[0]
        assert (s.enz.name == 'MAP2K1')
        assert (s.sub.name == 'MAPK1')
        assert unicode_strs(rp.statements)

示例#6

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_regulate_amount():
    for offline in offline_modes:
        rp = reach.process_text('ERK increases the transcription of DUSP.',
                                offline=offline)
        assert (len(rp.statements) == 1)
        s = rp.statements[0]
        assert (isinstance(s, IncreaseAmount))
        assert (s.subj.name == 'ERK')
        assert (s.obj.name == 'DUSP')
        assert unicode_strs(rp.statements)
        rp = reach.process_text('ERK decreases the amount of DUSP.',
                                offline=offline)
        assert (len(rp.statements) == 1)
        s = rp.statements[0]
        assert (isinstance(s, DecreaseAmount))
        assert (s.subj.name == 'ERK')
        assert (s.obj.name == 'DUSP')
        assert unicode_strs(rp.statements)

示例#7

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_be_grounding():
    for offline in offline_modes:
        rp = reach.process_text('MEK activates ERK.', offline=offline)
        assert (len(rp.statements) == 1)
        assert unicode_strs(rp.statements)
        if offline == True:
            st = rp.statements[0]
            assert (st.subj.db_refs.get('BE') == 'MEK')
            assert (st.obj.db_refs.get('BE') == 'ERK')

示例#8

0

显示文件

def test_mutation():
    rp = reach.process_text('BRAF(V600E) phosphorylates MEK.')
    assert(len(rp.statements) == 1)
    braf = rp.statements[0].enz
    assert(braf.name == 'BRAF')
    assert(len(braf.mutations) == 1)
    assert(braf.mutations[0].position == '600')
    assert(braf.mutations[0].residue_from == 'V')
    assert(braf.mutations[0].residue_to == 'E')

示例#9

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_be_grounding():
    for offline in offline_modes:
        rp = reach.process_text('MEK activates ERK.', offline=offline)
        assert(len(rp.statements) == 1)
        assert unicode_strs(rp.statements)
        if offline == True:
            st = rp.statements[0]
            assert(st.subj.db_refs.get('BE') == 'MEK')
            assert(st.obj.db_refs.get('BE') == 'ERK')

示例#10

0

显示文件

def process_reach(txt):
    print('Using REACH')
    ts = time.time()
    rp = reach.process_text(txt, offline=False)
    for s in rp.statements:
        print('%s\t%s' % (s, s.evidence[0].text))
    te = time.time()
    print('Time taken: %.2fs' % (te - ts))
    return rp.statements

示例#11

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_regulate_amount():
    for offline in offline_modes:
        rp = reach.process_text('ERK increases the transcription of DUSP.',
                                offline=offline)
        assert(len(rp.statements) == 1)
        s = rp.statements[0]
        assert(isinstance(s, IncreaseAmount))
        assert (s.subj.name == 'ERK')
        assert (s.obj.name == 'DUSP')
        assert unicode_strs(rp.statements)
        rp = reach.process_text('ERK decreases the amount of DUSP.',
                                offline=offline)
        assert(len(rp.statements) == 1)
        s = rp.statements[0]
        assert(isinstance(s, DecreaseAmount))
        assert (s.subj.name == 'ERK')
        assert (s.obj.name == 'DUSP')
        assert unicode_strs(rp.statements)

示例#12

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_mutation():
    for offline in offline_modes:
        rp = reach.process_text('BRAF(V600E) phosphorylates MEK.',
                                offline=offline)
        assert (len(rp.statements) == 1)
        braf = rp.statements[0].enz
        assert (braf.name == 'BRAF')
        assert (len(braf.mutations) == 1)
        assert (braf.mutations[0].position == '600')
        assert (braf.mutations[0].residue_from == 'V')
        assert (braf.mutations[0].residue_to == 'E')
        assert unicode_strs(rp.statements)

示例#13

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_mutation():
    for offline in offline_modes:
        rp = reach.process_text('BRAF(V600E) phosphorylates MEK.',
                                offline=offline)
        assert(len(rp.statements) == 1)
        braf = rp.statements[0].enz
        assert(braf.name == 'BRAF')
        assert(len(braf.mutations) == 1)
        assert(braf.mutations[0].position == '600')
        assert(braf.mutations[0].residue_from == 'V')
        assert(braf.mutations[0].residue_to == 'E')
        assert unicode_strs(rp.statements)

示例#14

0

显示文件

文件： api.py 项目： lijielife/indra

def reach_process_text():
    """Process text with REACH and return INDRA Statements."""
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    text = body.get('text')
    rp = reach.process_text(text)
    if rp and rp.statements:
        stmts = stmts_to_json(rp.statements)
        res = {'statements': stmts}
        return res
    else:
        res = {'statements': []}
    return res

示例#15

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_hgnc_from_up():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 phosphorylates ERK2.', offline=offline)
        assert len(rp.statements) == 1
        st = rp.statements[0]
        (map2k1, mapk1) = st.agent_list()
        assert map2k1.name == 'MAP2K1'
        assert map2k1.db_refs['HGNC'] == '6840'
        assert map2k1.db_refs['UP'] == 'Q02750'
        assert mapk1.name == 'MAPK1'
        assert mapk1.db_refs['HGNC'] == '6871'
        assert mapk1.db_refs['UP'] == 'P28482'
        assert unicode_strs(rp.statements)

示例#16

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_multiple_enzymes():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 and MEK2 phosphorylate ERK1.',
                                offline=offline)
        assert(len(rp.statements) == 2)
        s = rp.statements[0]
        if s.enz.name == 'MAP2K1':
            assert(rp.statements[1].enz.name == 'MAP2K2')
        else:
            assert(rp.statements[1].enz.name == 'MAP2K1')
        assert (s.sub.name == 'MAPK3')
        s = rp.statements[1]
        assert (s.sub.name == 'MAPK3')
        assert unicode_strs(rp.statements)

示例#17

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_hgnc_from_up():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 phosphorylates ERK2.',
                                offline=offline)
        assert len(rp.statements) == 1
        st = rp.statements[0]
        (map2k1, mapk1) = st.agent_list()
        assert map2k1.name == 'MAP2K1'
        assert map2k1.db_refs['HGNC'] == '6840'
        assert map2k1.db_refs['UP'] == 'Q02750'
        assert mapk1.name == 'MAPK1'
        assert mapk1.db_refs['HGNC'] == '6871'
        assert mapk1.db_refs['UP'] == 'P28482'
        assert unicode_strs(rp.statements)

示例#18

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_multiple_enzymes():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 and MEK2 phosphorylate ERK1.',
                                offline=offline)
        assert (len(rp.statements) == 2)
        s = rp.statements[0]
        if s.enz.name == 'MAP2K1':
            assert (rp.statements[1].enz.name == 'MAP2K2')
        else:
            assert (rp.statements[1].enz.name == 'MAP2K1')
        assert (s.sub.name == 'MAPK3')
        s = rp.statements[1]
        assert (s.sub.name == 'MAPK3')
        assert unicode_strs(rp.statements)

示例#19

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_process_unicode():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 binds ERK2\U0001F4A9.', offline=offline)
        assert unicode_strs(rp.statements)

示例#20

0

显示文件

    rerun = False

    # Download the papers if they are not available yet
    for pmcid in pmc_ids:
        prefix = folder + "/" + pmcid
        if not have_file(prefix + ".nxml") and not have_file(prefix + ".txt"):
            txt, txt_format = get_full_text(pmcid)
            if txt_format == "nxml":
                fname = prefix + ".nxml"
            else:
                fname = prefix + ".txt"
            with open(fname, "wt") as fh:
                fh.write(txt.encode("utf-8"))

    # Read each paper if it hasn't been read yet.
    # Otherwise use the existing json extractions.
    for pmcid, pmid in zip(pmc_ids, pmids):
        prefix = folder + "/" + pmcid
        print "Processing %s..." % pmcid
        # If REACH already processed it then don't run it again
        if rerun or not have_file(prefix + ".json"):
            if have_file(prefix + ".txt"):
                txt = open(prefix + ".txt").read().decode("utf-8")
                rp = reach.process_text(txt, citation=pmid, offline=True)
            elif have_file(prefix + ".nxml"):
                rp = reach.process_nxml_file(prefix + ".nxml", citation=pmid, offline=True)
            shutil.move("reach_output.json", prefix + ".json")
        else:
            rp = reach.process_json_file(prefix + ".json", citation=pmid)
        run_assembly(rp.statements, folder, pmcid)

示例#21

0

显示文件

文件： run_extension.py 项目： jmuhlich/indra

import sys
import pickle
from indra import reach
from indra.assemblers import GraphAssembler

txt = open('extension.txt', 'rt').read()
rp = reach.process_text(txt, offline=True)
st = rp.statements
for s in st:
    print '%s\t%s' % (s, s.evidence[0].text)

with open('extension.pkl', 'wb') as fh:
    pickle.dump(st, fh)

graphpr = {'rankdir': 'TD'}
nodepr = {'fontsize': 12, 'shape': 'plaintext', 'margin': '0,0', 'pad': 0}
ga = GraphAssembler(st, graph_properties=graphpr, node_properties=nodepr)
ga.make_model()
ga.save_dot('jnk_extension.dot')
ga.save_pdf('jnk_extension.pdf')

示例#22

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_process_unicode():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 binds ERK2\U0001F4A9.', offline=offline)
        assert unicode_strs(rp.statements)

示例#23

0

显示文件

def test_phosphorylate():
    rp = reach.process_text('MEK1 phosphorylates ERK2.')
    assert(len(rp.statements) == 1)
    s = rp.statements[0]
    assert (s.enz.name == 'MAP2K1')
    assert (s.sub.name == 'MAPK1')

示例#24

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_activity():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 activates ERK2.', offline=offline)
        assert(len(rp.statements) == 1)
        assert unicode_strs(rp.statements)

示例#25

0

显示文件

def test_activate():
    rp = reach.process_text('HRAS activates BRAF.')
    assert(len(rp.statements) == 1)
    s = rp.statements[0]
    assert (s.subj.name == 'HRAS')
    assert (s.obj.name == 'BRAF')

示例#26

0

显示文件

def test_bind():
    rp = reach.process_text('MEK1 binds ERK2.')
    assert(len(rp.statements) == 1)

示例#27

0

显示文件

def test_activity():
    rp = reach.process_text('MEK1 activates ERK2.')
    assert(len(rp.statements) == 1)

示例#28

0

显示文件

文件： test_reach.py 项目： jmuhlich/indra

def test_activity():
    for offline in offline_modes:
        rp = reach.process_text('MEK1 activates ERK2.', offline=offline)
        assert (len(rp.statements) == 1)
        assert unicode_strs(rp.statements)

示例#29

0

显示文件

    for pmcid in pmc_ids:
        prefix = folder + '/' + pmcid
        if not have_file(prefix + '.nxml') and\
           not have_file(prefix + '.txt'):
            txt, txt_format = get_full_text(pmcid)
            if txt_format == 'nxml':
                fname = prefix + '.nxml'
            else:
                fname = prefix + '.txt'
            with open(fname, 'wt') as fh:
                fh.write(txt.encode('utf-8'))
        pmids.append(id_lookup(pmcid)['pmid'])


    # Read each paper if it hasn't been read yet.
    # Otherwise use the existing json extractions.
    for pmcid, pmid in zip(pmc_ids, pmids):
        prefix = folder + '/' + pmcid
        print 'Processing %s...' % pmcid
        # If REACH already processed it then don't run it again
        if rerun or not have_file(prefix + '.json'):
            if have_file(prefix + '.txt'):
                txt = open(prefix + '.txt').read().decode('utf-8')
                rp = reach.process_text(txt, citation=pmid)
            elif have_file(prefix + '.nxml'):
                rp = reach.process_nxml_file(prefix + '.nxml', citation=pmid)
            shutil.move('reach_output.json', prefix + '.json')
        else:
            rp = reach.process_json_file(prefix + '.json', citation=pmid)
        run_assembly(rp.statements, folder, pmcid)