def main(args): # This file takes about 32 GB to load if not args.infile: args.infile = './Data/indra_raw/bioexp_all_raw.pkl' if not args.outfile: args.outfile = './filtered_indra_network.sif' # Load statements from file stmts_raw = assemble_corpus.load_statements(args.infile) # Expand families, fix grounding errors and run run preassembly stmts_fixed = assemble_corpus.run_preassembly( assemble_corpus.map_grounding( assemble_corpus.expand_families(stmts_raw))) # Default filtering: specific (unique) genes that are grounded. stmts_filtered = assemble_corpus.filter_grounded_only( assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True)) # Custom filters if args.human_only: stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered) if args.filter_direct: stmts_filtered = assemble_corpus.filter_direct(stmts_filtered) binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None] rows = [] for s in binary_stmts: rows.append([ag.name for ag in s.agent_list()]) # Write rows to .sif file with open(args.outfile, 'w', newline='') as csvfile: wrtr = csv.writer(csvfile, delimiter='\t') for row in rows: wrtr.writerow(row)
def run_assembly(stmts, save_file): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one') stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, save_file) return stmts
def get_indra_phos_stmts(): stmts = by_gene_role_type(stmt_type='Phosphorylation') stmts += by_gene_role_type(stmt_type='Dephosphorylation') stmts = ac.map_grounding(stmts) # Expand families before site mapping stmts = ac.expand_families(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.map_sequence(stmts) ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl') stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_phos_stmts_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl') return stmts
def test_expand_families(): st_out = ac.expand_families([st10]) assert (len(st_out) == 2)
def test_expand_families(): st_out = ac.expand_families([st10]) assert len(st_out) == 2
prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reach_stmts = ac.filter_no_hypothesis(reach_stmts) #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl')) extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl')) reading_stmts = reach_stmts + extra_stmts reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts + extra_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one') stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl')) #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl')) stmts = ac.run_preassembly(stmts, return_toplevel=False, save=pjoin(outf, 'preassembled.pkl'), poolsize=4) ### PySB assembly if 'pysb' in assemble_models: pysb_model = assemble_pysb(stmts, data_genes, pjoin(outf, 'korkut_model_pysb.py')) ### SIF assembly if 'sif' in assemble_models: sif_str = assemble_sif(stmts, data, pjoin(outf,
stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) #stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one') stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl')) stmts = ac.run_preassembly(stmts, return_toplevel=False, save=pjoin(outf, 'preassembled.pkl')) assemble_models = [] assemble_models.append('sif') assemble_models.append('pysb') assemble_models.append('cx') ### PySB assembly if 'pysb' in assemble_models: pysb_model = assemble_pysb(stmts, data_genes, pjoin(outf, 'korkut_model_pysb.py')) ### SIF assembly