Exemplo n.º 1
0
def run_reading(text_contents, cached=True):
    organism_preference = None
    stmts = {}
    for trid, text_content in text_contents.items():
        print('Reading %s' % trid)
        output_fname = os.path.join(NEW_REACH_PATH, '%s.json' % trid)
        if cached and os.path.exists(output_fname):
            rp = reach.process_json_file(output_fname)
            if rp is None:
                continue
        else:
            if text_content.startswith('<!DOCTYPE'):
                rp = reach.process_nxml_str(
                    text_content,
                    url=reach.local_nxml_url,
                    output_fname=output_fname,
                    organism_priority=organism_preference)
            else:
                rp = reach.process_text(text_content,
                                        url=reach.local_text_url,
                                        output_fname=output_fname,
                                        organism_priority=organism_preference)
        if rp is not None:
            stmts[trid] = rp.statements
    return stmts
Exemplo n.º 2
0
def process_paper(model_name, pmid):
    json_path = os.path.join(model_name, 'jsons', 'PMID%s.json' % pmid)

    if pmid.startswith('api') or pmid.startswith('PMID'):
        logger.warning('Invalid PMID: %s' % pmid)
    # If the paper has been read, use the json output file
    if os.path.exists(json_path):
        rp = reach.process_json_file(json_path, citation=pmid)
        txt_format = 'existing_json'
    # If the paper has not been read, download the text and read
    else:
        try:
            txt, txt_format = get_full_text(pmid, 'pmid')
        except:
            return None, None

        if txt_format == 'pmc_oa_xml':
            rp = reach.process_nxml_str(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'elsevier_xml':
            # Extract the raw text from the Elsevier XML
            txt = elsevier_client.extract_text(txt)
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        elif txt_format == 'abstract':
            rp = reach.process_text(txt, citation=pmid, offline=True)
            if os.path.exists('reach_output.json'):
                shutil.move('reach_output.json', json_path)
        else:
            rp = None
    if rp is not None:
        check_pmids(rp.statements)
    return rp, txt_format
Exemplo n.º 3
0
def test_phosphorylation_regulation():
    here = os.path.dirname(os.path.abspath(__file__))
    test_file = os.path.join(here, 'reach_reg_phos.json')
    rp = reach.process_json_file(test_file)
    assert rp is not None
    assert len(rp.statements) == 1
    stmt = rp.statements[0]
    assert isinstance(stmt, Phosphorylation), stmt
    assert not stmt.sub.mods
Exemplo n.º 4
0
def test_amount_embedded_in_activation():
    here = os.path.dirname(os.path.abspath(__file__))
    test_file = os.path.join(here, 'reach_act_amt.json')
    rp = reach.process_json_file(test_file)
    assert rp is not None
    assert len(rp.statements) == 1
    assert isinstance(rp.statements[0], IncreaseAmount)
    assert rp.statements[0].subj is not None
    assert rp.statements[0].obj is not None
Exemplo n.º 5
0
def process_paper(model_name, pmid):
    """Process a paper with the given pubmed identifier

    Parameters
    ----------
    model_name : str
        The directory for the INDRA machine
    pmid : str
        The PMID to process.

    Returns
    -------
    rp : ReachProcessor
        A ReachProcessor containing the extracted INDRA Statements
        in rp.statements.
    txt_format : str
        A string representing the format of the text
    """
    json_directory = os.path.join(model_name, 'jsons')
    json_path = os.path.join(json_directory, 'PMID%s.json' % pmid)

    if pmid.startswith('api') or pmid.startswith('PMID'):
        logger.warning('Invalid PMID: %s' % pmid)
    # If the paper has been read, use the json output file
    if os.path.exists(json_path):
        rp = reach.process_json_file(json_path, citation=pmid)
        txt_format = 'existing_json'
    # If the paper has not been read, download the text and read
    else:
        try:
            txt, txt_format = get_full_text(pmid, 'pmid')
        except Exception:
            return None, None

        if txt_format == 'pmc_oa_xml':
            rp = reach.process_nxml_str(txt,
                                        citation=pmid,
                                        offline=True,
                                        output_fname=json_path)
        elif txt_format == 'elsevier_xml':
            # Extract the raw text from the Elsevier XML
            txt = elsevier_client.extract_text(txt)
            rp = reach.process_text(txt,
                                    citation=pmid,
                                    offline=True,
                                    output_fname=json_path)
        elif txt_format == 'abstract':
            rp = reach.process_text(txt,
                                    citation=pmid,
                                    offline=True,
                                    output_fname=json_path)
        else:
            rp = None
    if rp is not None:
        check_pmids(rp.statements)
    return rp, txt_format
Exemplo n.º 6
0
def test_agent_coordinates():
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'reach_coordinates.json')
    stmts = reach.process_json_file(path).statements
    pa = Preassembler(bio_ontology, stmts)
    unique_stmt = pa.combine_duplicates()[0]
    agent_annots = [ev.annotations['agents'] for ev in unique_stmt.evidence]
    assert all(a['raw_text'] == ['MEK1', 'ERK2'] for a in agent_annots)
    assert {tuple(a['coords'])
            for a in agent_annots} == {((21, 25), (0, 4)), ((0, 4), (15, 19))}
Exemplo n.º 7
0
def test_conversions():
    here = os.path.dirname(os.path.abspath(__file__))
    test_file = os.path.join(here, 'reach_conversion.json')
    rp = reach.process_json_file(test_file)
    assert rp is not None
    assert len(rp.statements) == 1
    stmt = rp.statements[0]
    assert stmt.subj.name == 'ACE'
    assert len(stmt.obj_from) == 1
    assert stmt.obj_from[0].name == 'angiotensin-I'
    assert stmt.obj_to[0].name == 'angiotensin-II'
Exemplo n.º 8
0
def test_agent_coordinates():
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'reach_coordinates.json')
    stmts = reach.process_json_file(path).statements
    pa = Preassembler(hierarchies, stmts)
    unique_stmt = pa.combine_duplicates()[0]
    evidence_list = unique_stmt.evidence
    agent_annots = [ev.annotations['agents'] for ev in unique_stmt.evidence]
    assert all(a['raw_text'] == ['MEK1', 'ERK2'] for a in agent_annots)
    assert {tuple(a['coords']) for a in agent_annots} == {((21, 25), (0, 4)),
                                                          ((0, 4), (15, 19))}
Exemplo n.º 9
0
def process_paper(model_name, pmid):
    """Process a paper with the given pubmed identifier

    Parameters
    ----------
    model_name : str
        The directory for the INDRA machine
    pmid : str
        The PMID to process.

    Returns
    -------
    rp : ReachProcessor
        A ReachProcessor containing the extracted INDRA Statements
        in rp.statements.
    txt_format : str
        A string representing the format of the text
    """
    json_directory = os.path.join(model_name, 'jsons')
    json_path = os.path.join(json_directory, 'PMID%s.json' % pmid)

    if pmid.startswith('api') or pmid.startswith('PMID'):
        logger.warning('Invalid PMID: %s' % pmid)
    # If the paper has been read, use the json output file
    if os.path.exists(json_path):
        rp = reach.process_json_file(json_path, citation=pmid)
        txt_format = 'existing_json'
    # If the paper has not been read, download the text and read
    else:
        try:
            txt, txt_format = get_full_text(pmid, 'pmid')
        except Exception:
            return None, None

        if txt_format == 'pmc_oa_xml':
            rp = reach.process_nxml_str(txt, citation=pmid, offline=True,
                                        output_fname=json_path)
        elif txt_format == 'elsevier_xml':
            # Extract the raw text from the Elsevier XML
            txt = elsevier_client.extract_text(txt)
            rp = reach.process_text(txt, citation=pmid, offline=True,
                                    output_fname=json_path)
        elif txt_format == 'abstract':
            rp = reach.process_text(txt, citation=pmid, offline=True,
                                    output_fname=json_path)
        else:
            rp = None
    if rp is not None:
        check_pmids(rp.statements)
    return rp, txt_format
Exemplo n.º 10
0
 def process(organism_priority, expected_up_id):
     rp = reach.process_json_file(test_file,
                                  organism_priority=organism_priority)
     assert rp.statements[0].subj.db_refs['UPPRO'] == expected_up_id, \
         rp.statements[0].subj.db_refs['UPPRO']