def filter(stmts, cutoff, filename): stmts = ac.filter_belief(stmts, cutoff) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) #stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, filename) return stmts
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def test_belief_cut_plus_filter_top(): st1 = Phosphorylation(None, Agent('a')) st2 = Phosphorylation(Agent('b'), Agent('a')) st1.supports = [st2] st2.supported_by = [st1] st1.belief = 0.9 st2.belief = 0.1 st_high_belief = ac.filter_belief([st1, st2], 0.5) st_top_level = ac.filter_top_level(st_high_belief) assert len(st_top_level) == 1
def assemble_cx(stmts, out_file): """Return a CX assembler.""" stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.strip_agent_context(stmts) ca = CxAssembler() ca.add_statements(stmts) model = ca.make_model() ca.save_model(out_file) return ca
def assemble_cx(stmts, out_file): """Return a CX assembler.""" stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.strip_agent_context(stmts) ca = CxAssembler() ca.add_statements(stmts) model = ca.make_model() ca.save_model(out_file) return ca
def test_readme_pipeline(): stmts = gn_stmts # Added only here, not in docs from indra.tools import assemble_corpus as ac stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.8) assert stmts, 'Update example to yield statements list of non-zero length'
def print_statements( statements: List[Statement], file: Union[None, str, TextIO] = None, sep: Optional[str] = None, limit: Optional[int] = None, allow_duplicates: bool = False, keep_only_pmids: Union[None, str, Collection[str]] = None, sort_attrs: Iterable[str] = ('uuid', 'pmid'), allow_ungrounded: bool = True, minimum_belief: Optional[float] = None, extra_columns: Optional[List[str]] = None, ) -> None: """Write statements to a CSV for curation. This one is similar to the other one, but sorts by the BEL string and only keeps the first for each group. """ sep = sep or '\t' extra_columns = extra_columns or [] extra_columns_placeholders = [''] * len(extra_columns) statements = run_preassembly(statements) if not allow_ungrounded: statements = filter_grounded_only(statements) if minimum_belief is not None: statements = filter_belief(statements, minimum_belief) rows = get_rows_from_statements(statements, allow_duplicates=allow_duplicates, keep_only_pmids=keep_only_pmids) rows = sorted(rows, key=attrgetter(*sort_attrs)) if limit is not None: rows = rows[:limit] if not rows: logger.warning('no rows written') return def _write(_file): print(*start_header, *extra_columns, *end_header, sep=sep, file=_file) for row in rows: print(*row.start_tuple, *extra_columns_placeholders, *row.end_tuple, sep=sep, file=_file) if isinstance(file, str): with open(file, 'w') as _file: _write(_file) else: _write(file)
def test_readme_wm_pipeline(): stmts = wm_raw_stmts # stmts = ac.filter_grounded_only(stmts) # Does not work on test stmts belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, ontology=world_ontology, normalize_opposites=True, normalize_ns='WM') stmts = ac.filter_belief(stmts, 0.8) # Apply belief cutoff of e.g., 0.8 assert stmts, 'Update example to yield statements list of non-zero length'
def filter_belief(): """Filter to beliefs above a given threshold.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') belief_cutoff = body.get('belief_cutoff') if belief_cutoff is not None: belief_cutoff = float(belief_cutoff) stmts = stmts_from_json(stmts_json) stmts_out = ac.filter_belief(stmts, belief_cutoff) return _return_stmts(stmts_out)
def filter_belief(): """Filter to beliefs above a given threshold.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') belief_cutoff = body.get('belief_cutoff') if belief_cutoff is not None: belief_cutoff = float(belief_cutoff) stmts = stmts_from_json(stmts_json) stmts_out = ac.filter_belief(stmts, belief_cutoff) return _return_stmts(stmts_out)
def run_assembly(stmts, save_file): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one') stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, save_file) return stmts
def assemble_cx(stmts, out_file_prefix, network_type): """Return a CX assembler.""" stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) if network_type == 'direct': stmts = ac.filter_direct(stmts) out_file = '%s_%s.cx' % (out_file_prefix, network_type) ca = CxAssembler() ca.add_statements(stmts) model = ca.make_model() ca.save_model(out_file) return ca
def assemble_pybel(stmts, out_file_prefix): """Return a PyBEL Assembler""" stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) pba = PybelAssembler(stmts, name='INDRA/REACH Korkut Model', description='Automatically assembled model of ' 'cancer signaling.', version='0.0.10') pba.make_model() pybel.to_bel_path(pba.model, out_file_prefix + '.bel') with open(out_file_prefix, 'wt') as f: pybel.to_json_file(pba.model, f) url = 'https://pybel.scai.fraunhofer.de/api/receive' headers = {'content-type': 'application/json'} requests.post(url, json=pybel.to_json(pba.model), headers=headers)
def assemble_pysb(stmts, data_genes, out_file): """Return an assembled PySB model.""" stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.reduce_activities(stmts) pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() # Add observables o = Observable('MAPK1p', model.monomers['MAPK1'](T185='p', Y187='p')) model.add_component(o) o = Observable('MAPK3p', model.monomers['MAPK3'](T202='p', Y204='p')) model.add_component(o) o = Observable('GSK3Ap', model.monomers['GSK3A'](S21='p')) model.add_component(o) o = Observable('GSK3Bp', model.monomers['GSK3B'](S9='p')) model.add_component(o) o = Observable('RPS6p', model.monomers['RPS6'](S235='p')) model.add_component(o) o = Observable('EIF4EBP1p', model.monomers['EIF4EBP1'](S65='p')) model.add_component(o) o = Observable('JUNp', model.monomers['JUN'](S73='p')) model.add_component(o) o = Observable('FOXO3p', model.monomers['FOXO3'](S315='p')) model.add_component(o) o = Observable('AKT1p', model.monomers['AKT1'](S473='p')) model.add_component(o) o = Observable('AKT2p', model.monomers['AKT2'](S474='p')) model.add_component(o) o = Observable('AKT3p', model.monomers['AKT3'](S='p')) model.add_component(o) o = Observable('ELK1', model.monomers['ELK1'](S383='p')) model.add_component(o) # Set context pa.set_context('SKMEL28_SKIN') pa.save_model(out_file) ke = KappaExporter(model) with open('%s.ka' % base_file, 'wb') as fh: base_file, _ = os.path.splitext(out_file) fh.write(ke.export().encode('utf-8')) return model
def preprocess_stmts(stmts, data_genes): # Filter the INDRA Statements to be put into the model stmts = ac.filter_mutation_status(stmts, {'BRAF': [('V', '600', 'E')]}, ['PTEN']) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() af_stmts = ac.filter_by_type(ml.statements, ActiveForm) non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True) af_stmts = ac.run_preassembly(af_stmts) stmts = af_stmts + non_af_stmts # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements return stmts
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)
def assemble_sif(stmts, data, out_file): """Return an assembled SIF.""" # Filter for high-belief statements stmts = ac.filter_belief(stmts, 0.99) stmts = ac.filter_top_level(stmts) # Filter for Activation / Inhibition stmts_act = ac.filter_by_type(stmts, Activation) stmts_inact = ac.filter_by_type(stmts, Inhibition) stmts = stmts_act + stmts_inact # Get Ras227 and filter statments ras_genes = process_data.get_ras227_genes() ras_genes = [x for x in ras_genes if x not in ['YAP1']] stmts = ac.filter_gene_list(stmts, ras_genes, 'all') # Get the drugs inhibiting their targets as INDRA # statements def get_drug_statements(): drug_targets = process_data.get_drug_targets() drug_stmts = [] for dn, tns in drug_targets.items(): da = Agent(dn + ':Drugs') for tn in tns: ta = Agent(tn) drug_stmt = Inhibition(da, ta) drug_stmts.append(drug_stmt) return drug_stmts drug_stmts = get_drug_statements() stmts = stmts + drug_stmts # Because of a bug in CNO, node names containing AND # need to be replaced def rename_and_nodes(st): for s in st: for a in s.agent_list(): if a is not None: if a.name.find('AND') != -1: a.name = a.name.replace('AND', 'A_ND') rename_and_nodes(stmts) # Rewrite statements to replace genes with their corresponding # antibodies when possible stmts = rewrite_ab_stmts(stmts, data) def filter_ab_edges(st, policy='all'): st_out = [] for s in st: if policy == 'all': all_ab = True for a in s.agent_list(): if a is not None: if a.name.find('_p') == -1 and \ a.name.find('Drugs') == -1: all_ab = False break if all_ab: st_out.append(s) elif policy == 'one': any_ab = False for a in s.agent_list(): if a is not None and a.name.find('_p') != -1: any_ab = True break if any_ab: st_out.append(s) return st_out stmts = filter_ab_edges(stmts, 'all') # Get a list of the AB names that end up being covered in the prior network # This is important because other ABs will need to be taken out of the # MIDAS file to work. def get_ab_names(st): prior_abs = set() for s in st: for a in s.agent_list(): if a is not None: if a.name.find('_p') != -1: prior_abs.add(a.name) return sorted(list(prior_abs)) pkn_abs = get_ab_names(stmts) print('Boolean PKN contains these antibodies: %s' % ', '.join(pkn_abs)) # Make the SIF model sa = SifAssembler(stmts) sa.make_model(use_name_as_key=True) sif_str = sa.print_model() with open(out_file, 'wb') as fh: fh.write(sif_str.encode('utf-8')) # Make the MIDAS data file used for training the model midas_data = process_data.get_midas_data(data, pkn_abs) return sif_str
for pmid_sample_size, num_trials in sample_sizes_trials: print("\n\nSample size: %d\n\n" % pmid_sample_size) trial_results = [] trial_results_uniq = [] trial_results_top = [] trial_results_filt = [] for i in range(num_trials): sample_pmids = np.random.choice(pmids, pmid_sample_size, replace=False) trial_stmts = [s for pmid in sample_pmids for s in stmts_by_pmid[pmid]] trial_results.append(len(trial_stmts)) # be = BeliefEngine() pa = Preassembler(hierarchies, trial_stmts) trial_stmts_top = pa.combine_related(poolsize=16, return_toplevel=True) trial_stmts_uniq = pa.unique_stmts trial_stmts_filt = ac.filter_belief(trial_stmts_top, 0.90) #trial_stmts_uniq = ac.run_preassembly_duplicate(pa, be) trial_results_uniq.append(len(trial_stmts_uniq)) trial_results_top.append(len(trial_stmts_top)) trial_results_filt.append(len(trial_stmts_filt)) results.append((np.mean(trial_results), np.std(trial_results))) results_uniq.append((np.mean(trial_results_uniq), np.std(trial_results_uniq))) results_top.append((np.mean(trial_results_top), np.std(trial_results_top))) results_filt.append((np.mean(trial_results_filt), np.std(trial_results_filt))) results = np.array(results) results_uniq = np.array(results_uniq) results_top = np.array(results_top) results_filt = np.array(results_filt)
def test_filter_belief(): st_out = ac.filter_belief([st1, st2, st3], 0.75) assert len(st_out) == 2
stats = {} logger.info(time.strftime('%c')) logger.info('Preassembling original model.') model.preassemble(filters=global_filters) logger.info(time.strftime('%c')) # Original statistics stats['orig_stmts'] = len(model.get_statements()) stats['orig_assembled'] = len(model.assembled_stmts) db_stmts = ac.filter_evidence_source(model.assembled_stmts, ['biopax', 'bel'], policy='one') no_db_stmts = ac.filter_evidence_source(model.assembled_stmts, ['biopax', 'bel'], policy='none') no_db_stmts = ac.filter_belief(no_db_stmts, belief_threshold) orig_stmts = db_stmts + no_db_stmts stats['orig_final'] = len(orig_stmts) logger.info('%d final statements' % len(orig_stmts)) # Extend the model with PMIDs logger.info('----------------') logger.info(time.strftime('%c')) logger.info('Extending model.') stats['new_papers'], stats['new_abstracts'], stats['existing'] = \ extend_model(model_name, model, pmids) # Having added new statements, we preassemble the model model.preassemble(filters=global_filters) # New statistics stats['new_stmts'] = len(model.get_statements())
def test_filter_belief(): st_out = ac.filter_belief([st1, st2, st3], 0.75) assert (len(st_out) == 2)
from indra.util import _require_python3 from indra.assemblers.sif import SifAssembler import indra.tools.assemble_corpus as ac stmts = ac.load_statements('output/preassembled.pkl') stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_direct(stmts) sa = SifAssembler(stmts) sa.make_model(True, True, False) sa.set_edge_weights('support_all') fname = 'model_high_belief_v2.sif' with open(fname, 'wt') as fh: for s, t, d in sa.graph.edges(data=True): source = sa.graph.nodes[s]['name'] target = sa.graph.nodes[t]['name'] fh.write('%s %f %s\n' % (source, d['weight'], target))
from indra.tools import assemble_corpus as ac from indra.statements import stmts_to_json_file from indra.assemblers.html import HtmlAssembler from indra.sources import reach tp = reach.process_pmc('PMC4455820', url=reach.local_nxml_url) if tp: stmts = tp.statements print(stmts) stmts = ac.filter_grounded_only(stmts) # Filter out ungrounded agents stmts = ac.run_preassembly( stmts, # Run preassembly return_toplevel=False, normalize_equivalences= True, # Optional: rewrite equivalent groundings to one standard normalize_opposites= True, # Optional: rewrite opposite groundings to one standard normalize_ns='WM' ) # Use 'WM' namespace to normalize equivalences and opposites stmts = ac.filter_belief(stmts, 0.8) # Apply belief cutoff of e.g., 0.8 stmts_to_json_file(stmts, 'PMC4455820.json') ha = HtmlAssembler(stmts) ha.save_model('PMC4455820.html') # # # #