def main(args): # This file takes about 32 GB to load if not args.infile: args.infile = './Data/indra_raw/bioexp_all_raw.pkl' if not args.outfile: args.outfile = './filtered_indra_network.sif' # Load statements from file stmts_raw = assemble_corpus.load_statements(args.infile) # Expand families, fix grounding errors and run run preassembly stmts_fixed = assemble_corpus.run_preassembly( assemble_corpus.map_grounding( assemble_corpus.expand_families(stmts_raw))) # Default filtering: specific (unique) genes that are grounded. stmts_filtered = assemble_corpus.filter_grounded_only( assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True)) # Custom filters if args.human_only: stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered) if args.filter_direct: stmts_filtered = assemble_corpus.filter_direct(stmts_filtered) binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None] rows = [] for s in binary_stmts: rows.append([ag.name for ag in s.agent_list()]) # Write rows to .sif file with open(args.outfile, 'w', newline='') as csvfile: wrtr = csv.writer(csvfile, delimiter='\t') for row in rows: wrtr.writerow(row)
def filter(stmts, cutoff, filename): stmts = ac.filter_belief(stmts, cutoff) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) #stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, filename) return stmts
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def assemble_cx(stmts, out_file_prefix, network_type): """Return a CX assembler.""" stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) if network_type == 'direct': stmts = ac.filter_direct(stmts) out_file = '%s_%s.cx' % (out_file_prefix, network_type) ca = CxAssembler() ca.add_statements(stmts) model = ca.make_model() ca.save_model(out_file) return ca
def run_assembly(stmts, save_file): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one') stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, save_file) return stmts
def assemble_pysb(stmts, data_genes, out_file): """Return an assembled PySB model.""" stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.reduce_activities(stmts) pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() # Add observables o = Observable('MAPK1p', model.monomers['MAPK1'](T185='p', Y187='p')) model.add_component(o) o = Observable('MAPK3p', model.monomers['MAPK3'](T202='p', Y204='p')) model.add_component(o) o = Observable('GSK3Ap', model.monomers['GSK3A'](S21='p')) model.add_component(o) o = Observable('GSK3Bp', model.monomers['GSK3B'](S9='p')) model.add_component(o) o = Observable('RPS6p', model.monomers['RPS6'](S235='p')) model.add_component(o) o = Observable('EIF4EBP1p', model.monomers['EIF4EBP1'](S65='p')) model.add_component(o) o = Observable('JUNp', model.monomers['JUN'](S73='p')) model.add_component(o) o = Observable('FOXO3p', model.monomers['FOXO3'](S315='p')) model.add_component(o) o = Observable('AKT1p', model.monomers['AKT1'](S473='p')) model.add_component(o) o = Observable('AKT2p', model.monomers['AKT2'](S474='p')) model.add_component(o) o = Observable('AKT3p', model.monomers['AKT3'](S='p')) model.add_component(o) o = Observable('ELK1', model.monomers['ELK1'](S383='p')) model.add_component(o) # Set context pa.set_context('SKMEL28_SKIN') pa.save_model(out_file) ke = KappaExporter(model) with open('%s.ka' % base_file, 'wb') as fh: base_file, _ = os.path.splitext(out_file) fh.write(ke.export().encode('utf-8')) return model
def preprocess_stmts(stmts, data_genes): # Filter the INDRA Statements to be put into the model stmts = ac.filter_mutation_status(stmts, {'BRAF': [('V', '600', 'E')]}, ['PTEN']) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() af_stmts = ac.filter_by_type(ml.statements, ActiveForm) non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True) af_stmts = ac.run_preassembly(af_stmts) stmts = af_stmts + non_af_stmts # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements return stmts
def test_filter_direct(): st_out = ac.filter_direct([st12]) assert (len(st_out) == 1) st_out = ac.filter_direct([st13]) assert (len(st_out) == 0)
from indra.util import _require_python3 from indra.assemblers.sif import SifAssembler import indra.tools.assemble_corpus as ac stmts = ac.load_statements('output/preassembled.pkl') stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_direct(stmts) sa = SifAssembler(stmts) sa.make_model(True, True, False) sa.set_edge_weights('support_all') fname = 'model_high_belief_v2.sif' with open(fname, 'wt') as fh: for s, t, d in sa.graph.edges(data=True): source = sa.graph.nodes[s]['name'] target = sa.graph.nodes[t]['name'] fh.write('%s %f %s\n' % (source, d['weight'], target))
with open(LIGANDS_IN_DATA[count], 'rb') as fh: ligands_in_data = pickle.load(fh) with open(NATURE_HASHES[count], 'rb') as fh: hashes = pickle.load(fh) # get the union of all the statement hashes all_hashes = set.union(*hashes.values()) # Download the statements by hashes stmts_by_hash = download_statements(all_hashes) # get only the list of all the available statemtns indra_db_stmts = list(stmts_by_hash.values()) # Filtering out the indirect INDRA statements indra_db_stmts = ac.filter_direct(indra_db_stmts) # Filter statements which are not ligands/receptors from # OmniPath database and filter op statemeents which are not # in 2015 paper op_filtered = filter_op_stmts(op.statements, ligands_in_data.values(), receptors_in_data, lg_rg) # Merge omnipath/INDRA statements and run assembly indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered, run_refinement=False) # Filter incorrect curations indra_op_filtered = filter_incorrect_curations(indra_op_stmts) # Filter complex statements indra_op_filtered = filter_complex_statements(indra_op_filtered,
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)
def test_filter_direct(): st_out = ac.filter_direct([st12]) assert len(st_out) == 1 st_out = ac.filter_direct([st13]) assert len(st_out) == 0
# remove all the receptors from the surface_protein_set full_ligand_set = get_ligands() - receptor_genes_go # Now get INDRA DB Statements for the receptor-ligand pairs hashes_by_gene_pair = get_hashes_by_gene_pair(indra_df, full_ligand_set, receptor_genes_go) # get the union of all the statement hashes all_hashes = set.union(*hashes_by_gene_pair.values()) # Download the statements by hashes stmts_by_hash = download_statements(all_hashes) # get only the list of all the available statemtns indra_db_stmts = list(stmts_by_hash.values()) # Filtering out the indirect INDRA statements indra_db_stmts = ac.filter_direct(indra_db_stmts) # Fetch omnipath database biomolecular interactions and # process them into INDRA statements op = process_from_web() # Filter statements which are not ligands/receptors from # OmniPath database op_filtered = filter_op_stmts(op.statements, full_ligand_set, receptor_genes_go) op_filtered = ac.filter_direct(op_filtered) op_filtered = ac.filter_by_curation(op_filtered, curations=db_curations) # Merge omnipath/INDRA statements and run assembly indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered,