def test_filter_no_hypothesis(): a = Agent('MAPK1') ev1 = Evidence(epistemics={'hypothesis': True}) ev2 = Evidence(epistemics={'hypothesis': False}) st1 = Phosphorylation(None, a, evidence=[ev1, ev2]) st2 = Phosphorylation(None, a, evidence=[ev1, ev1]) st_out = ac.filter_no_hypothesis([st1, st2])
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def test_filter_no_hypothesis(): a = Agent('MAPK1') ev1 = Evidence(epistemics={'hypothesis': True}) ev2 = Evidence(epistemics={'hypothesis': False}) st1 = Phosphorylation(None, a, evidence=[ev1, ev2]) st2 = Phosphorylation(None, a, evidence=[ev1, ev1]) st_out = ac.filter_no_hypothesis([st1, st2]) assert len(st_out) == 1
def test_readme_pipeline(): stmts = gn_stmts # Added only here, not in docs from indra.tools import assemble_corpus as ac stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.8) assert stmts, 'Update example to yield statements list of non-zero length'
def preassemble(self, filters=None, grounding_map=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - human_only: require that all proteins are human proteins - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None grounding_map : Optional[dict] A user supplied grounding map which maps a string to a dictionary of database IDs (in the format used by Agents' db_refs). """ stmts = self.get_statements() # Filter out hypotheses stmts = ac.filter_no_hypothesis(stmts) # Fix grounding if grounding_map is not None: stmts = ac.map_grounding(stmts, grounding_map=grounding_map) else: stmts = ac.map_grounding(stmts) if filters and ('grounding' in filters): stmts = ac.filter_grounded_only(stmts) # Fix sites stmts = ac.map_sequence(stmts) if filters and 'human_only' in filters: stmts = ac.filter_human_only(stmts) # Run preassembly stmts = ac.run_preassembly(stmts, return_toplevel=False) # Run relevance filter stmts = self._relevance_filter(stmts, filters) # Save Statements self.assembled_stmts = stmts
def preassemble(self, filters=None, grounding_map=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - human_only: require that all proteins are human proteins - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None grounding_map : Optional[dict] A user supplied grounding map which maps a string to a dictionary of database IDs (in the format used by Agents' db_refs). """ stmts = self.get_statements() # Filter out hypotheses stmts = ac.filter_no_hypothesis(stmts) # Fix grounding if grounding_map is not None: stmts = ac.map_grounding(stmts, grounding_map=grounding_map) else: stmts = ac.map_grounding(stmts) if filters and ('grounding' in filters): stmts = ac.filter_grounded_only(stmts) # Fix sites stmts = ac.map_sequence(stmts) if filters and 'human_only' in filters: stmts = ac.filter_human_only(stmts) # Run preassembly stmts = ac.run_preassembly(stmts, return_toplevel=False) # Run relevance filter stmts = self._relevance_filter(stmts, filters) # Save Statements self.assembled_stmts = stmts
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements. Returns ------- stmts : list[indra.statements.Statement] The list of assembled INDRA Statements. """ stmts = self.get_indra_smts() stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.map_sequence(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) return stmts
', '.join(assemble_models)) print('##############') outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reach_stmts = ac.filter_no_hypothesis(reach_stmts) #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl')) extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl')) reading_stmts = reach_stmts + extra_stmts reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts + extra_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one') stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl')) #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
# If generic assembly needs to be done (instead of just loading the result) # set this to True reassemble = False # The file in which the preassembled statements will be saved pre_stmts_file = prefixed_pkl('preassembled') if reassemble: # Load various files that were previously produced sources = [ 'indradb', 'trips', 'bel', 'biopax', 'phosphosite', 'r3', 'sparser' ] stmts = [] for source in sources: stmts += ac.load_statements(prefixed_pkl(source)) stmts = ac.filter_no_hypothesis(stmts) # Fix grounding and filter to grounded entities and for proteins, # filter to the human ones stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) # Combinatorially expand protein families stmts = ac.expand_families(stmts) # Apply a strict filter to statements based on the gene names stmts = ac.filter_gene_list(stmts, gene_names, 'all') # Fix errors in references to protein sequences stmts = ac.map_sequence(stmts) # Run preassembly and save result stmts = ac.run_preassembly(stmts, return_toplevel=False) ac.dump_statements(stmts, pre_stmts_file)