Exemplo n.º 1
0
 def dump_local(self, base_folder):
     """Dump assembled corpus into local files."""
     corpus_folder = os.path.join(base_folder, self.corpus_id)
     os.makedirs(corpus_folder, exist_ok=True)
     stmts_to_json_file(self.assembled_stmts,
                        os.path.join(corpus_folder, 'statements.json'),
                        format='jsonl')
     with open(os.path.join(corpus_folder, 'metadata.json'), 'w') as fh:
         json.dump(fh, self.metadata)
Exemplo n.º 2
0
        #stmts = load_eidos()
        #stmts = ac.filter_by_type(stmts, Influence)
        #remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN', 'PROPS',
        #                          'INTERVENTIONS'])
        ont = load_world_ontology(ont_url)
        if key != 'no_regrounding':
            stmts = reground_stmts(stmts, ont, 'WM', None, True)

        scorer = get_eidos_scorer()

        matches_fun, refinement_fun = None, None
        assembled_stmts = ac.run_preassembly(stmts,
                                             belief_scorer=scorer,
                                             matches_fun=matches_fun,
                                             refinement_fun=refinement_fun,
                                             normalize_equivalences=True,
                                             normalize_opposites=True,
                                             normalize_ns='WM',
                                             ontology=ont,
                                             return_toplevel=False,
                                             poolsize=4)
        print('-----Finished assembly-----')
        remove_raw_grounding(assembled_stmts)
        corpus_name = 'eidos-regrounding-20191214-%s' % key
        fname = os.path.join('.', corpus_name + '.json')
        sj = stmts_to_json_file(assembled_stmts,
                                fname,
                                matches_fun=matches_fun)
        corpus = Corpus(corpus_name, assembled_stmts, raw_statements=stmts)
        corpus.s3_put()
Exemplo n.º 3
0
from indra.tools import assemble_corpus as ac
from indra.statements import stmts_to_json_file
from indra.assemblers.html import HtmlAssembler
from indra.sources import reach
tp = reach.process_pmc('PMC4455820', url=reach.local_nxml_url)
if tp:
    stmts = tp.statements
    print(stmts)
    stmts = ac.filter_grounded_only(stmts)  # Filter out ungrounded agents
    stmts = ac.run_preassembly(
        stmts,  # Run preassembly
        return_toplevel=False,
        normalize_equivalences=
        True,  # Optional: rewrite equivalent groundings to one standard
        normalize_opposites=
        True,  # Optional: rewrite opposite groundings to one standard
        normalize_ns='WM'
    )  # Use 'WM' namespace to normalize equivalences and opposites
    stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
    stmts_to_json_file(stmts, 'PMC4455820.json')
    ha = HtmlAssembler(stmts)
    ha.save_model('PMC4455820.html')

#
#
#
#
Exemplo n.º 4
0
#attempt to combine many statements

from indra.tools import assemble_corpus as ac
from indra.statements import stmts_to_json_file
from indra.assemblers.html import HtmlAssembler
from indra.sources import reach

pmcids = ["PMC3717945", "PMC5906628"]
stmts = []

for pmcid in pmcids:
    tp = reach.process_pmc(pmcid)
    stmts += tp.statements

stmts = ac.filter_grounded_only(stmts)  # Filter out ungrounded agents
stmts = ac.run_preassembly(
    stmts,  # Run preassembly
    return_toplevel=False,
    normalize_equivalences=
    True,  # Optional: rewrite equivalent groundings to one standard
    normalize_opposites=
    True,  # Optional: rewrite opposite groundings to one standard
    normalize_ns='WM'
)  # Use 'WM' namespace to normalize equivalences and opposites
stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
stmts_to_json_file(stmts, 'bigresults.json')
ha = HtmlAssembler(stmts)
ha.save_model('bigresults.html')
Exemplo n.º 5
0
def export_json(statements, fname):
    """Export statements into JSON."""
    stmts_to_json_file(statements, fname)
Exemplo n.º 6
0
    # Querying for and assembling statements
    all_stmts = []
    for db_ns, db_id, name in groundings:
        if db_id in black_list:
            print('Skipping %s in black list' % name)
            continue
        print('Looking up %s' % name)
        db_stmts = get_db_stmts_by_grounding(db_ns, db_id)
        tas_stmts = get_tas_stmts(db_ns, db_id) if db_ns == 'HGNC' else []
        stmts = db_stmts + tas_stmts
        smts = ac.filter_by_curation(stmts, db_curations)
        stmts = reground_stmts(stmts, grounding_map, misgrounding_map)
        all_stmts += stmts
    all_stmts = make_unique_hashes(all_stmts)
    all_stmts = ac.run_preassembly(all_stmts)
    ########################################

    # Dunp results
    with open('disease_map_indra_stmts_full.pkl', 'wb') as fh:
        pickle.dump(all_stmts, fh)

    stmts_to_json_file(all_stmts, 'disease_map_indra_stmts_full.json')

    filtered_stmts = filter_prior_all(all_stmts, groundings)
    with open('disease_map_indra_stmts_filtered.pkl', 'wb') as fh:
        pickle.dump(filtered_stmts, fh)

    stmts_to_json_file(filtered_stmts, 'disease_map_indra_stmts_filtered.json')
    ##################
Exemplo n.º 7
0
from indra.statements import stmts_to_json_file
import indra.tools.assemble_corpus as ac
from assemble_model import process_eidos, assemble_stmts

if __name__ == '__main__':
    stmts = process_eidos()
    stmts_to_json_file(stmts, 'eidos_500m_raw.json')
    stmts = assemble_stmts(stmts)
    stmts = ac.merge_groundings(stmts)
    stmts = ac.merge_deltas(stmts)
    stmts = ac.standardize_names_groundings(stmts)
    stmts_to_json_file(stmts, 'eidos_500m_assembled.json')
Exemplo n.º 8
0
                                '@id': doc_id
                            }
                        }]
                    else:
                        prov = ev.annotations['provenance'][0]['document']
                        prov['@id'] = doc_id
            stmts += pp.statements
        if grounding == 'compositional':
            validate_grounding_format(stmts)

    ap = AssemblyPipeline.from_json_file('assembly_%s.json' % grounding)
    assembled_stmts = ap.run(stmts)

    if do_upload:
        corpus_id = 'compositional_v4'
        stmts_to_json_file(assembled_stmts, '%s.json' % corpus_id)

        meta_data = {
            'corpus_id':
            corpus_id,
            'description': ('Assembly of 4 reader outputs with the '
                            'compositional ontology (%s).' % ont_url),
            'display_name':
            'Compositional ontology assembly v3',
            'readers':
            readers,
            'assembly': {
                'level': 'grounding',
                'grounding_threshold': 0.6,
            },
            'num_statements':
Exemplo n.º 9
0
    all_stmts = []
    for db_ns, db_id, name in tqdm.tqdm(groundings):
        if db_id in black_list:
            print('Skipping %s in black list' % name)
            continue
        print('Looking up %s' % name)
        db_stmts = get_db_stmts_by_grounding(db_ns, db_id)
        tas_stmts = get_tas_stmts(db_ns, db_id) if db_ns == 'HGNC' else []
        stmts = db_stmts + tas_stmts
        smts = ac.filter_by_curation(stmts, db_curations)
        stmts = reground_stmts(stmts, grounding_map, misgrounding_map)
        all_stmts += stmts
    all_stmts = make_unique_hashes(all_stmts)
    all_stmts = ac.run_preassembly(all_stmts)
    ########################################

    # Dunp results
    with open(f'disease_map_indra_stmts_full_{version}.pkl', 'wb') as fh:
        pickle.dump(all_stmts, fh)

    stmts_to_json_file(all_stmts,
                       f'disease_map_indra_stmts_full_{version}.json')

    filtered_stmts = filter_prior_all(all_stmts, groundings)
    with open(f'disease_map_indra_stmts_filtered_{version}.pkl', 'wb') as fh:
        pickle.dump(filtered_stmts, fh)

    stmts_to_json_file(filtered_stmts,
                       f'disease_map_indra_stmts_filtered_{version}.json')
    ##################
Exemplo n.º 10
0
def do_regrounding(stmts):
    concepts = []
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept_txt = concept.db_refs.get('TEXT')
            concepts.append(concept_txt)
    groundings = er.reground_texts(concepts)
    # Update the corpus with new groundings
    idx = 0
    for stmt in stmts:
        for concept in stmt.agent_list():
            concept.db_refs['UN'] = groundings[idx]
            idx += 1
    return stmts


if __name__ == '__main__':
    config = load_config()
    fnames = config['files']
    for fname in fnames:
        print('Processing %s' % fname)
        hp = hume.process_jsonld_file(fname)
        parts = fname.split('/')
        new_fname = '%s_%s' % (parts[-2], parts[-1])
        new_fname = new_fname.replace('json-ld', 'json')
        print('Running regrounding')
        stmts = do_regrounding(hp.statements)
        print('Savig into JSON')
        stmts_to_json_file(hp.statements, new_fname)