Пример #1
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                     self.tmp_files["table"],
                     "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #2
0
 def _scan_extract_rfam(self, prefixs, args_ribo):
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(self.tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("extracting seq of riboswitch candidates of {0}".format(
                 prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff),
                 os.path.join(self.tss_path, prefix + "_TSS.gff"),
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo)
             print("pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_infernal(args_ribo, first_seq,
                                                  "txt", prefix)
             sec_seq = os.path.join(self.tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             print("scanning of {0}".format(prefix))
             sec_scan_file = self._run_infernal(args_ribo, sec_seq,
                                                "re_txt", prefix)
             sec_table = os.path.join(
                 self.tmp_files["table"],
                 "_".join([prefix, self.suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #3
0
    def test_modify_table(self):
        result = """#ID\tstrain\tstrand\tassociated_CDS\tstart_genome\tend_genome	Rfam	e_value	start_align	end_align
riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t15948\t16046	RF00162	1.6e-18	1	99
riboswitch_11\tStaphylococcus_aureus_HG003\t-\tSAOUHSC_00007\t27955\t28053	RF00162	1.6e-18	1	99
riboswitch_183\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00372\t377996\t378098	RF00167	2.2e-18	1	103"""
        table = os.path.join(self.test_folder, "test")
        gen_file(table, self.example.ribos)
        mrt.modify_table(table, True)
        data = import_data(table)
        self.assertEqual("\n".join(data), result)
        gen_file(table, self.example.ribos)
        mrt.modify_table(table, False)
        data = import_data(table)
        self.assertEqual("\n".join(data), result)
Пример #4
0
    def test_modify_table(self):
        result = """#ID\tGenome\tStrand\tAssociated_CDS\tStart_genome\tEnd_genome\tRfam\tE_value\tScore\tStart_align\tEnd_align
riboswitch_5\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00013\t15948\t16046\tRF00162\t1.6e-18\t74\t1\t99
riboswitch_11\tStaphylococcus_aureus_HG003\t-\tSAOUHSC_00007\t27955\t28053\tRF00162\t1.6e-18\t74\t1\t99
riboswitch_183\tStaphylococcus_aureus_HG003\t+\tSAOUHSC_00372\t377996\t378098\tRF00167\t2.2e-18\t45\t1\t103"""
        table = os.path.join(self.test_folder, "test")
        gen_file(table, self.example.ribos)
        mrt.modify_table(table, True)
        data = import_data(table)
        self.assertEqual("\n".join(data), result)
        gen_file(table, self.example.ribos)
        mrt.modify_table(table, False)
        data = import_data(table)
        self.assertEqual("\n".join(data), result)
Пример #5
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff),
                   os.path.join(self.tss_path, prefix + "_TSS.gff"),
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             print("Pre-scanning of {0}".format(prefix))
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             print("Scanning of {0}".format(prefix))
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             reextract_rbs(sec_scan_file, first_table, sec_table)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #6
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"],
                                      prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                   prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                   os.path.join(self.fasta_path, prefix + ".fa"),
                   os.path.join(self.gff_path, gff), tss_file,
                   os.path.join(self.tran_path, prefix + "_transcript.gff"),
                   first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at least 1.1.1.\n")
             first_scan_file = self._run_cmscan(
                     args_ribo, first_seq, "txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["csv"]]))
             log.write("Running recompute_RBS.py to update the potential "
                       "sequences of riboswitches/RNA thermometers for {0} "
                       "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq,
                            first_table, sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(
                     args_ribo, sec_seq, "re_txt", prefix, tmp_files,
                     suffixs, rfam, log)
             sec_table = os.path.join(
                     tmp_files["table"],
                     "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs
Пример #7
0
 def _scan_extract_rfam(self, prefixs, args_ribo, tmp_files, suffixs,
                        feature, rfam, log):
     '''extract the seq of candidates and scanning the candidates'''
     for gff in os.listdir(self.gff_path):
         if gff.endswith(".gff"):
             prefix = gff.replace(".gff", "")
             first_seq = os.path.join(tmp_files["fasta"], prefix + ".fa")
             prefixs.append(prefix)
             print("Extracting sequences of candidates for {0}".format(
                 prefix))
             if self.tss_path is not None:
                 tss_file = os.path.join(self.tss_path, prefix + "_TSS.gff")
             else:
                 tss_file = None
             log.write("Running extract_RBS.py to extract potential "
                       "sequences of riboswitches/RNA thermometers for "
                       "{0}.\n".format(prefix))
             extract_potential_rbs(
                 os.path.join(self.fasta_path, prefix + ".fa"),
                 os.path.join(self.gff_path, gff), tss_file,
                 os.path.join(self.tran_path, prefix + "_transcript.gff"),
                 first_seq, args_ribo, feature)
             log.write("\t" + first_seq + " is temporary generated.\n")
             print("Pre-scanning of {0}".format(prefix))
             log.write("Using Infernal to pre-scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write(
                 "Please make sure the version of Infernal is at least 1.1.1.\n"
             )
             first_scan_file = self._run_cmscan(args_ribo, first_seq, "txt",
                                                prefix, tmp_files, suffixs,
                                                rfam, log)
             sec_seq = os.path.join(tmp_files["fasta"],
                                    "_".join([prefix, "regenerate.fa"]))
             first_table = os.path.join(tmp_files["table"],
                                        "_".join([prefix, suffixs["csv"]]))
             log.write(
                 "Running recompute_RBS.py to update the potential "
                 "sequences of riboswitches/RNA thermometers for {0} "
                 "based on the pre-scanning results.\n".format(prefix))
             regenerate_seq(first_scan_file, first_seq, first_table,
                            sec_seq)
             log.write("\t" + sec_seq + " is temporary generated.\n")
             print("Scanning of {0}".format(prefix))
             log.write("Using Infernal to scan riboswitches/RNA "
                       "thermometers for {0}.\n".format(prefix))
             log.write("Please make sure the version of Infernal is at "
                       "least 1.1.1.\n")
             sec_scan_file = self._run_cmscan(args_ribo, sec_seq, "re_txt",
                                              prefix, tmp_files, suffixs,
                                              rfam, log)
             sec_table = os.path.join(tmp_files["table"],
                                      "_".join([prefix, suffixs["re_csv"]]))
             log.write("Running recompute_RBS.py and modify_rbs_table.py "
                       "to generate tables for {0} "
                       "based on the scanning results.\n".format(prefix))
             reextract_rbs(sec_scan_file, first_table, sec_table,
                           args_ribo.cutoff)
             shutil.move(sec_table, first_table)
             modify_table(first_table, args_ribo.output_all)
     return prefixs