def optimize_tss(args_ops): if len(os.listdir(args_ops.gffs)) == 0: print("Error: there is no gff files!!!") sys.exit() if len(os.listdir(args_ops.fastas)) == 0: print("Error: there is no fasta files!!!") sys.exit() if len(os.listdir(args_ops.wigs)) == 0: print("Error: there is no wiggle files!!!") sys.exit() Multiparser().parser_wig(args_ops.wigs) Multiparser().parser_gff(args_ops.gffs, None) Multiparser().parser_fasta(args_ops.fastas) gff_path = os.path.join(args_ops.gffs, "tmp") wig_path = os.path.join(args_ops.wigs, "tmp") fasta_path = os.path.join(args_ops.fastas, "tmp") for gff in os.listdir(gff_path): if args_ops.project_strain in gff: gff_file = os.path.join(gff_path, gff) break for fa in os.listdir(fasta_path): if args_ops.project_strain in fa: fasta_file = os.path.join(fasta_path, fa) break Helper().check_uni_attributes(gff_file) optimization(wig_path, fasta_file, gff_file, args_ops) Helper().remove_all_content(os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "config", "file") Helper().remove_all_content(os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "Master", "dir") Helper().remove_tmp(args_ops.wigs) Helper().remove_tmp(args_ops.gffs) Helper().remove_tmp(args_ops.fastas)
def optimize_tss(args_ops): if len(os.listdir(args_ops.gffs)) == 0: print("Error: There is no gff files!!!") sys.exit() if len(os.listdir(args_ops.fastas)) == 0: print("Error: There is no fasta files!!!") sys.exit() if len(os.listdir(args_ops.wigs)) == 0: print("Error: There is no wiggle files!!!") sys.exit() Multiparser().parser_wig(args_ops.wigs) Multiparser().parser_gff(args_ops.gffs, None) Multiparser().parser_fasta(args_ops.fastas) gff_path = os.path.join(args_ops.gffs, "tmp") wig_path = os.path.join(args_ops.wigs, "tmp") fasta_path = os.path.join(args_ops.fastas, "tmp") for gff in os.listdir(gff_path): if args_ops.project_strain in gff: gff_file = os.path.join(gff_path, gff) break for fa in os.listdir(fasta_path): if args_ops.project_strain in fa: fasta_file = os.path.join(fasta_path, fa) break Helper().check_uni_attributes(gff_file) optimization(wig_path, fasta_file, gff_file, args_ops) Helper().remove_all_content( os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "config", "file") Helper().remove_all_content( os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "Master", "dir") Helper().remove_tmp_dir(args_ops.wigs) Helper().remove_tmp_dir(args_ops.gffs) Helper().remove_tmp_dir(args_ops.fastas)
def test_optimization(self): ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_1"] args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.steps = 2 args.gene_length = 2000 args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 args.length = None args.replicate_name = "test" args.tsspredator_path = "test" args.manual = os.path.join(self.test_folder, "manual.gff") gen_file(args.manual, self.example.manual_file) log = open(os.path.join(self.test_folder, "test.log"), "w") args.output_folder = self.test_folder os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator")) ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa", log) self.assertTrue( os.path.exists( os.path.join(self.test_folder, "optimized_TSSpredator", "stat_aaa.csv")))
def test_optimization(self): ot.run_TSSpredator_paralle = Mock_func().mock_run_TSSpredator_paralle ot.convert2gff = Mock_func().mock_convert2gff if not os.path.exists(os.path.join(self.test_folder, "wigs")): os.mkdir(os.path.join(self.test_folder, "wigs")) wig_folder = os.path.join(self.test_folder, "wigs", "tmp") if not os.path.exists(wig_folder): os.mkdir(wig_folder) fasta = os.path.join(self.test_folder, "aaa.fa") gff = os.path.join(self.test_folder, "aaa.gff") gen_file(fasta, self.example.fasta) gen_file(gff, self.example.gff_file) output_prefix = ["test_1"] args = self.mock_args.mock() args.libs = self.example.libs args.cores = 1 args.cluster = 3 args.program = "TSS" args.project_strain = "aaa" args.replicate = "all_1" args.utr = 200 args.steps = 2 args.gene_length = 2000 args.height = 0.9 args.height_reduction = 0.8 args.factor = 0.9 args.factor_reduction = 0.8 args.base_height = 0.01 args.enrichment = 0.5 args.processing = 0.5 args.length = None args.replicate_name = "test" args.tsspredator_path = "test" args.manual = os.path.join(self.test_folder, "manual.gff") gen_file(args.manual, self.example.manual_file) log = open(os.path.join(self.test_folder, "test.log"), "w") args.output_folder = self.test_folder os.mkdir(os.path.join(self.test_folder, "optimized_TSSpredator")) ot.optimization(wig_folder, fasta, gff, args, args.manual, 2000, "aaa", log) self.assertTrue(os.path.exists(os.path.join( self.test_folder, "optimized_TSSpredator", "stat_aaa.csv")))
def optimize_tss(args_ops, log): if len(os.listdir(args_ops.gffs)) == 0: print("Error: There is no gff file!") sys.exit() if len(os.listdir(args_ops.fastas)) == 0: print("Error: There is no fasta file!") sys.exit() if len(os.listdir(args_ops.wigs)) == 0: print("Error: There is no wiggle file!") sys.exit() Multiparser().parser_wig(args_ops.wigs) Multiparser().parser_gff(args_ops.gffs, None) Multiparser().parser_fasta(args_ops.fastas) Multiparser().parser_gff(args_ops.manuals, None) gff_path = os.path.join(args_ops.gffs, "tmp") wig_path = os.path.join(args_ops.wigs, "tmp") fasta_path = os.path.join(args_ops.fastas, "tmp") manual_path = os.path.join(args_ops.manuals, "tmp") if "all" not in args_ops.strain_lengths.keys(): for strain in args_ops.strain_lengths.keys(): detect = False for man in os.listdir(manual_path): if strain == man.replace(".gff", ""): detect = True log.write("The manually-curated set is found - " "{0}\n".format(os.path.join(manual_path, man))) if not detect: log.write( "The manually-curated set of {0} is not found.\n".format( strain)) print("Error: There are genomes in --genome_lengths " "which is not contained in manually-detected " "TSS gff files!") sys.exit() for man in os.listdir(manual_path): run = False prefix = man.replace(".gff", "") man_file = os.path.join(manual_path, man) if (prefix in args_ops.strain_lengths.keys()): length = args_ops.strain_lengths[prefix] run = True elif ("all" in args_ops.strain_lengths.keys()): length = "all" run = True log.write("The comparing sequence region of {0} is ".format( prefix, length)) if run: for gff in os.listdir(gff_path): if (gff[:-4] == prefix) and (".gff" in gff): gff_file = os.path.join(gff_path, gff) break for fa in os.listdir(fasta_path): if (".".join(fa.split(".")[:-1]) == prefix) and (".fa" in fa): fasta_file = os.path.join(fasta_path, fa) break if length == "all": length = get_length(fasta_file) log.write(str(length) + "\n") Helper().check_uni_attributes(gff_file) log.write("Running optimize_TSSpredator.py for optimization.\n") optimization(wig_path, fasta_file, gff_file, args_ops, man_file, length, prefix, log) Helper().remove_all_content( os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "config", "file") Helper().remove_all_content( os.path.join(args_ops.output_folder, "optimized_TSSpredator"), "Master", "dir") Helper().remove_tmp_dir(args_ops.wigs) Helper().remove_tmp_dir(args_ops.gffs) Helper().remove_tmp_dir(args_ops.fastas) Helper().remove_tmp_dir(args_ops.manuals)