def assemble_one_corpus(): """For assembling one of the four corpora.""" path = '/home/bmg16/data/wm/2-Jsonld' corpus_size = '16k' prefix = '%s%s' % (path, corpus_size) fnames = glob.glob('%s/*.jsonld' % prefix) # For large corpus all_statements = [] for idx, fname in enumerate(fnames): ep = eidos.process_json_file(fname) for stmt in ep.statements: for ev in stmt.evidence: ev.annotations['provenance'][0]['document']['@id'] = \ os.path.basename(fname) all_statements += ep.statements print('%d: %d' % (idx, len(all_statements))) with open('%s/3-Indra%s.pkl' % (prefix, corpus_size), 'wb') as fh: pickle.dump(all_statements, fh) scorer = get_eidos_scorer() assembled_stmts = ac.run_preassembly(all_statements, belief_scorer=scorer, return_toplevel=False) jd = stmts_to_json(assembled_stmts, use_sbo=False) with open('%s/3-Indra%s.json' % (prefix, corpus_size), 'w') as fh: json.dump(jd, fh, indent=1)
def process_args(self, args_json): for arg in args_json: if arg == 'stmt_type': args_json[arg] = get_statement_by_name(args_json[arg]) elif arg in ['matches_fun', 'refinement_fun']: args_json[arg] = pipeline_functions[args_json[arg]] elif arg == 'curations': Curation = namedtuple( 'Curation', ['pa_hash', 'source_hash', 'tag']) args_json[arg] = [ Curation(cur['pa_hash'], cur['source_hash'], cur['tag']) for cur in args_json[arg]] elif arg == 'belief_scorer': if args_json[arg] == 'wm': args_json[arg] = get_eidos_scorer() else: args_json[arg] = None elif arg == 'ontology': if args_json[arg] == 'wm': args_json[arg] = world_ontology else: args_json[arg] = bio_ontology elif arg == 'whitelist' or arg == 'mutations': args_json[arg] = { gene: [tuple(mod) for mod in mods] for gene, mods in args_json[arg].items()} return args_json
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def test_wm_scorer(): scorer = wm_scorer.get_eidos_scorer() stmt = Influence(Concept('a'), Concept('b'), evidence=[Evidence(source_api='eidos')]) # Make sure other sources are still in the map assert 'hume' in scorer.prior_probs['rand'] assert 'biopax' in scorer.prior_probs['syst'] engine = BeliefEngine(scorer) engine.set_prior_probs([stmt])
def assemble_stmts(stmts): print('Running preassembly') hm = get_wm_hierarchies() scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, belief_scorer=scorer, return_toplevel=True, flatten_evidence=True, flatten_evidence_collect_from='supported_by', poolsize=2) return stmts
def default_assembly(stmts): from indra.belief.wm_scorer import get_eidos_scorer from indra.preassembler.hierarchy_manager import get_wm_hierarchies hm = get_wm_hierarchies() scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, belief_scorer=scorer, return_toplevel=True, flatten_evidence=True, flatten_evidence_collect_from='supported_by', poolsize=4) stmts = ac.merge_groundings(stmts) stmts = ac.merge_deltas(stmts) stmts = ac.standardize_names_groundings(stmts) return stmts
def test_readme_wm_pipeline(): from indra.tools import assemble_corpus as ac from indra.belief.wm_scorer import get_eidos_scorer from indra.ontology.world import world_ontology stmts = wm_raw_stmts # stmts = ac.filter_grounded_only(stmts) # Does not work on test stmts belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, ontology=world_ontology, normalize_opposites=True, normalize_ns='WM') stmts = ac.filter_belief(stmts, 0.8) # Apply belief cutoff of e.g., 0.8 assert stmts, 'Update example to yield statements list of non-zero length'
def run_preassembly(): """Run preassembly on a list of INDRA Statements.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) scorer = body.get('scorer') return_toplevel = body.get('return_toplevel') if scorer == 'wm': belief_scorer = get_eidos_scorer() else: belief_scorer = None stmts_out = ac.run_preassembly(stmts, belief_scorer=belief_scorer, return_toplevel=return_toplevel) return _return_stmts(stmts_out)
#'50': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \ # 'wm_fao/20181101/2-Jsonld50', '500': '/home/bmg16/Dropbox/postdoc/darpa/src/indra_apps/' + \ 'wm_fao/20181101/2-Jsonld500', '16k': '/home/bmg16/data/wm/2-Jsonld16k', } all_statements = [] for corpus_size, path in corpora.items(): fnames = glob.glob('%s/*.jsonld' % path) for idx, fname in enumerate(fnames): ep = eidos.process_json_file(fname) for stmt in ep.statements: for ev in stmt.evidence: ev.annotations['provenance'][0]['document']['@id'] = \ os.path.basename(fname) ev.annotations['provenance'][0]['document']['corpus'] = \ corpus_size all_statements += ep.statements print('%d: %d' % (idx, len(all_statements))) scorer = get_eidos_scorer() assembled_stmts = ac.run_preassembly(all_statements, belief_scorer=scorer, return_toplevel=False) jd = stmts_to_json(assembled_stmts, use_sbo=False) with open('3-Indra-merged-500-16k.json', 'w') as fh: json.dump(jd, fh, indent=1) # assemble_all()
from indra.tools import assemble_corpus as ac from indra.sources.eidos import migration_table_processor as mtp from indra.preassembler.hierarchy_manager import YamlHierarchyManager from indra.preassembler.make_eidos_hume_ontologies import load_yaml_from_url, \ rdf_graph_from_yaml from indra.preassembler.custom_preassembly import location_time_delta_matches, \ location_time_delta_refinement from indra.belief.wm_scorer import get_eidos_scorer wm_ont_url = ('https://raw.githubusercontent.com/WorldModelers/'\ 'Ontologies/master/wm.yml') if __name__ == '__main__': fname = 'grounded CAG links - New Ontology.xlsx' stmts = mtp.process_workbook(fname) hm = YamlHierarchyManager(load_yaml_from_url(wm_ont_url), rdf_graph_from_yaml, True) stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_score=get_eidos_scorer(), hierarchies={'entity': hm}, matches_fun=location_time_delta_matches, refinement_fun=location_time_delta_refinement) stmts = ac.standardize_names_groundings(stmts)