Пример #1
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #2
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                 prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff),
                 os.path.join(self.tss_path, prefix + "_TSS.gff"),
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #3
0
 def test_regenerate_seq(self):
     out_table = os.path.join(self.test_folder, "table")
     out_seq = os.path.join(self.test_folder, "seq")
     align_file = os.path.join(self.test_folder, "align")
     seq_file = os.path.join(self.test_folder, "ribo_seq")
     gen_file(align_file, self.example.scan_file)
     gen_file(seq_file, self.example.seq_file)
     rr.regenerate_seq(align_file, seq_file, out_table, out_seq)
     data = import_data(out_table)
     self.assertEqual("\n".join(data), "riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16\tRF00162\t6.2e-18\t5\t12")
     data = import_data(out_seq)
     self.assertEqual("\n".join(data), ">riboswitch_5|Staphylococcus_aureus_HG003|+|SAOUHSC_00013|14|21\nATTATTAC")
Пример #4
0
 def test_regenerate_seq(self):
     out_table = os.path.join(self.test_folder, "table")
     out_seq = os.path.join(self.test_folder, "seq")
     align_file = os.path.join(self.test_folder, "align")
     seq_file = os.path.join(self.test_folder, "ribo_seq")
     gen_file(align_file, self.example.scan_file)
     gen_file(seq_file, self.example.seq_file)
     rr.regenerate_seq(align_file, seq_file, out_table, out_seq)
     data = import_data(out_table)
     self.assertEqual(
         "\n".join(data),
         "riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t10\t16\tRF00162\t6.2e-18\t5\t12"
     )
     data = import_data(out_seq)
     self.assertEqual("\n".join(data), (
         ">riboswitch_5|Staphylococcus_aureus_HG003|+|SAOUHSC_00013|14|21\nATTATTAC\n"
         ">riboswitch_5|Staphylococcus_aureus_HG003|+|SAOUHSC_00013|14|21\nATTATTAC"
     ))
Пример #5
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             print("Pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("Scanning of {0}".format(prefix))
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #6
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff), tss_file,
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             log.write("Running recompute_RBS.py to update the potential "
                       "sequences of riboswitches/RNA thermometers for {0} "
                       "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #7
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                 prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff), tss_file,
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write(
                 "Please make sure the version of Infernal is at least 1.1.1.\n"
             )
             first_scan_file = self._run_cmscan(args_ribo, first_seq, "txt",
                                                prefix, tmp_files, suffixs,
                                                rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(tmp_files["table"],
                                        "_".join([prefix, suffixs["csv"]]))
             log.write(
                 "Running recompute_RBS.py to update the potential "
                 "sequences of riboswitches/RNA thermometers for {0} "
                 "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(args_ribo, sec_seq, "re_txt",
                                              prefix, tmp_files, suffixs,
                                              rfam, log)
             sec_table = os.path.join(tmp_files["table"],
                                      "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs