示例#1
0
 def _compute_intersection_forward_reverse(
         self, prefixs, merge_path, wig_path, merge_wigs, args_term):
     for prefix in prefixs:
         tmp_seq = os.path.join(args_term.out_folder,
                                "_".join(["inter_seq", prefix]))
         tmp_sec = os.path.join(args_term.out_folder,
                                "_".join(["inter_sec", prefix]))
         tran_file = os.path.join(self.tran_path,
                                  "_".join([prefix, "transcript.gff"]))
         gff_file = os.path.join(merge_path, prefix + ".gff")
         print("Extracting seq of {0}".format(prefix))
         intergenic_seq(os.path.join(self.fasta_path, prefix + ".fa"),
                        tran_file, gff_file, tmp_seq)
         self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec, prefix)
         tmp_cand = os.path.join(args_term.out_folder,
                                 "_".join(["term_candidates", prefix]))
         poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand, args_term)
         print("detection of terminator")
         detect_coverage(
             tmp_cand, os.path.join(merge_path, prefix + ".gff"),
             os.path.join(self.tran_path, "_".join([
                 prefix, "transcript.gff"])),
             os.path.join(self.fasta_path, prefix + ".fa"),
             os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
             os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
             os.path.join(self.tmps["hp_path"], "_".join([
                 prefix, self.tmps["hp_gff"]])), merge_wigs,
             os.path.join(self.outfolder["term"], "_".join([
                 prefix, self.suffixs["gff"]])),
             os.path.join(self.tmps["term_table"], "_".join([
                 prefix, "term_raw.csv"])), args_term)
     self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
                                  None, "term")
     self._move_file(self.outfolder["term"], self.outfolder["csv"])
示例#2
0
 def _compute_intersection_forward_reverse(
         self, prefixs, merge_path, wig_path, merge_wigs, args_term, log):
     '''the approach for searching gene converged region terminator'''
     log.write("Searching terminators which located in gene converged "
               "region.\n")
     for prefix in prefixs:
         tmp_seq = os.path.join(args_term.out_folder,
                                "_".join(["inter_seq", prefix]))
         tmp_index = os.path.join(args_term.out_folder,
                                  "_".join(["inter_index", prefix]))
         tmp_sec = os.path.join(args_term.out_folder,
                                "_".join(["inter_sec", prefix]))
         tran_file = os.path.join(self.tran_path,
                                  "_".join([prefix, "transcript.gff"]))
         gff_file = os.path.join(merge_path, prefix + ".gff")
         tmp_cand = tmp_cand = os.path.join(args_term.out_folder,
                                  "_".join(["term_candidates", prefix]))
         if os.path.exists(tran_file):
             print("Extracting sequences of {0}".format(prefix))
             log.write("Running get_inter_seq.py to extract the potential "
                       "sequences from {0}.\n".format(prefix))
             intergenic_seq(os.path.join(self.fasta_path, prefix + ".fa"),
                            tran_file, gff_file, tmp_seq, tmp_index, args_term)
             log.write("\t" + tmp_seq + " is generated for storing the "
                       "potential sequences.\n")
             self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec,
                               prefix, log)
             log.write("Running extract_sec_info.py to extract the "
                       "information of secondary structure from {0}.\n".format(
                       prefix))
             extract_info_sec(tmp_sec, tmp_seq, tmp_index)
             os.remove(tmp_index)
             log.write("Running get_polyT.py to detect the "
                       "terminator candidates for {0}.\n".format(prefix))
             poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand, args_term)
             log.write("\t" + tmp_cand + " which temporary stores terminator "
                       "candidates is generated.\n")
         print("Detecting terminators for " + prefix)
         log.write("Running detect_coverage_term.py to gain "
                   "high-confidence terminators for {0}.\n".format(prefix))
         detect_coverage(
             tmp_cand, os.path.join(merge_path, prefix + ".gff"),
             os.path.join(self.tran_path, "_".join([
                 prefix, "transcript.gff"])),
             os.path.join(self.fasta_path, prefix + ".fa"),
             os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
             os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
             os.path.join(self.tmps["hp_path"], "_".join([
                 prefix, self.tmps["hp_gff"]])), merge_wigs,
             os.path.join(self.outfolder["term"], "_".join([
                 prefix, self.suffixs["gff"]])),
             os.path.join(self.tmps["term_table"], "_".join([
                 prefix, "term_raw.csv"])), args_term)
     self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
                                  None, "term")
     self._move_file(self.outfolder["term"], self.outfolder["csv"])
示例#3
0
 def _compute_intersection_forward_reverse(self, prefixs, merge_path,
                                           wig_path, merge_wigs, args_term):
     '''the approach for searching gene converged region terminator'''
     for prefix in prefixs:
         tmp_seq = os.path.join(args_term.out_folder,
                                "_".join(["inter_seq", prefix]))
         tmp_index = os.path.join(args_term.out_folder,
                                  "_".join(["inter_index", prefix]))
         tmp_sec = os.path.join(args_term.out_folder,
                                "_".join(["inter_sec", prefix]))
         tran_file = os.path.join(self.tran_path,
                                  "_".join([prefix, "transcript.gff"]))
         gff_file = os.path.join(merge_path, prefix + ".gff")
         tmp_cand = tmp_cand = os.path.join(
             args_term.out_folder, "_".join(["term_candidates", prefix]))
         if os.path.exists(tran_file):
             print("Extracting sequences of {0}".format(prefix))
             intergenic_seq(os.path.join(self.fasta_path,
                                         prefix + ".fa"), tran_file,
                            gff_file, tmp_seq, tmp_index, args_term)
             self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec,
                               prefix)
             extract_info_sec(tmp_sec, tmp_seq, tmp_index)
             os.remove(tmp_index)
             poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand,
                    args_term)
         print("Detecting terminators for " + prefix)
         detect_coverage(
             tmp_cand, os.path.join(merge_path, prefix + ".gff"),
             os.path.join(self.tran_path,
                          "_".join([prefix, "transcript.gff"])),
             os.path.join(self.fasta_path, prefix + ".fa"),
             os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
             os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
             os.path.join(self.tmps["hp_path"],
                          "_".join([prefix, self.tmps["hp_gff"]])),
             merge_wigs,
             os.path.join(self.outfolder["term"],
                          "_".join([prefix, self.suffixs["gff"]])),
             os.path.join(self.tmps["term_table"],
                          "_".join([prefix, "term_raw.csv"])), args_term)
     self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
                                  None, "term")
     self._move_file(self.outfolder["term"], self.outfolder["csv"])
示例#4
0
 def _compute_intersection_forward_reverse(self, prefixs, merge_path,
                                           wig_path, merge_wigs, args_term,
                                           log):
     '''the approach for searching gene converged region terminator'''
     log.write("Searching terminators which located in gene converged "
               "region.\n")
     for prefix in prefixs:
         tmp_seq = os.path.join(args_term.out_folder,
                                "_".join(["inter_seq", prefix]))
         tmp_index = os.path.join(args_term.out_folder,
                                  "_".join(["inter_index", prefix]))
         tmp_sec = os.path.join(args_term.out_folder,
                                "_".join(["inter_sec", prefix]))
         tran_file = os.path.join(self.tran_path,
                                  "_".join([prefix, "transcript.gff"]))
         gff_file = os.path.join(merge_path, prefix + ".gff")
         tmp_cand = tmp_cand = os.path.join(
             args_term.out_folder, "_".join(["term_candidates", prefix]))
         if os.path.exists(tran_file):
             print("Extracting sequences of {0}".format(prefix))
             log.write("Running get_inter_seq.py to extract the potential "
                       "sequences from {0}.\n".format(prefix))
             intergenic_seq(os.path.join(self.fasta_path,
                                         prefix + ".fa"), tran_file,
                            gff_file, tmp_seq, tmp_index, args_term)
             log.write("\t" + tmp_seq + " is generated for storing the "
                       "potential sequences.\n")
             self._run_rnafold(args_term.RNAfold_path, tmp_seq, tmp_sec,
                               prefix, log)
             log.write(
                 "Running extract_sec_info.py to extract the "
                 "information of secondary structure from {0}.\n".format(
                     prefix))
             extract_info_sec(tmp_sec, tmp_seq, tmp_index)
             os.remove(tmp_index)
             log.write("Running get_polyT.py to detect the "
                       "terminator candidates for {0}.\n".format(prefix))
             poly_t(tmp_seq, tmp_sec, gff_file, tran_file, tmp_cand,
                    args_term)
             log.write("\t" + tmp_cand +
                       " which temporary stores terminator "
                       "candidates is generated.\n")
         print("Detecting terminators for " + prefix)
         log.write("Running detect_coverage_term.py to gain "
                   "high-confidence terminators for {0}.\n".format(prefix))
         detect_coverage(
             tmp_cand, os.path.join(merge_path, prefix + ".gff"),
             os.path.join(self.tran_path,
                          "_".join([prefix, "transcript.gff"])),
             os.path.join(self.fasta_path, prefix + ".fa"),
             os.path.join(wig_path, "_".join([prefix, "forward.wig"])),
             os.path.join(wig_path, "_".join([prefix, "reverse.wig"])),
             os.path.join(self.tmps["hp_path"],
                          "_".join([prefix, self.tmps["hp_gff"]])),
             merge_wigs,
             os.path.join(self.outfolder["term"],
                          "_".join([prefix, self.suffixs["gff"]])),
             os.path.join(self.tmps["term_table"],
                          "_".join([prefix, "term_raw.csv"])), args_term)
     self.multiparser.combine_gff(args_term.gffs, self.outfolder["term"],
                                  None, "term")
     self._move_file(self.outfolder["term"], self.outfolder["csv"])