Exemplo n.º 1
0
def extract_eidos_text(docnames):
    texts = {}
    # Extract the evidence text for all events into a dict
    for docname in docnames:
        key = docname
        texts[key] = []
        print(docname)
        fname = 'docs/%s.txt' % docname
        with open(fname, 'r') as fh:
            print('Reading %s' % fname)
            txt = fh.read()
        json_fname = 'eidos/%s.txt.jsonld' % docname
        ep = eidos.process_json_ld_file(json_fname)
        for stmt in ep.statements:
            for ev in stmt.evidence:
                txt = ev.text
                txt = txt.replace('\n', ' ')
                texts[key].append(txt)
    # Now clean up all the texts to remove redundancies
    for key, sentences in texts.items():
        cleaned_sentences = copy.copy(sentences)
        for s1, s2 in itertools.combinations(sentences, 2):
            if s1 in s2 and s1 in cleaned_sentences:
                cleaned_sentences.remove(s1)
            elif s2 in s1 and s2 in cleaned_sentences:
                cleaned_sentences.remove(s2)
        texts[key] = cleaned_sentences

    return texts
Exemplo n.º 2
0
def text_to_stmts(text):
    """Run Eidos reading on a given text and return INDRA Statements."""
    # We use some caching here so that sentences we have already read
    # are not re-read.
    fname = text.replace(' ', '_').replace(',', '_') + '.jsonld'
    if os.path.exists(fname):
        ep = eidos.process_json_ld_file(fname)
    else:
        ep = eidos.process_text(text)
        shutil.move('eidos_output.json', fname)
    return ep.statements
Exemplo n.º 3
0
def read_eidos(docnames):
    stmts = []
    for docname in docnames:
        fname = os.path.join('docs', '%s.txt' % docname)
        jsonname = os.path.join('eidos', '%s.txt.jsonld' % docname)
        if os.path.exists(jsonname):
            ep = eidos.process_json_ld_file(jsonname)
        else:
            with open(fname, 'r') as fh:
                print('Reading %s' % docname)
                txt = fh.read()
            ep = eidos.process_text(txt,
                                    save_json=jsonname,
                                    out_format='json_ld')
        print('%d stmts from %s' % (len(ep.statements), docname))
        # Set the PMID on these statements so that we can get the document ID
        # during assembly
        for stmt in ep.statements:
            stmt.evidence[0].pmid = docname
        stmts += ep.statements
    return stmts
Exemplo n.º 4
0
def test_process_json_ld_file():
    ep = eidos.process_json_ld_file(test_jsonld)
    assert len(ep.statements) == 1
    assert 'UN' in ep.statements[0].subj.db_refs
    assert 'UN' in ep.statements[0].obj.db_refs