示例#1
0
 def __init__(self, args):
     """Create an instance."""
     self._args = args
     if (len(args.__dict__) > 3):
         if not os.path.exists(args.project_path):
             print("Error: --project_path does not exists!")
             sys.exit()
     self._paths = Paths(args.project_path)
     self.args_container = ArgsContainer()
     self.helper = Helper()
示例#2
0
def utr_derived_srna(args_srna, libs, texs, wig_fs, wig_rs):
    inters = []
    cdss, tas, tsss, pros, seq = read_data(args_srna)
    out = open(args_srna.output_file, "w")
    out.write("##gff-version 3\n")
    out_t = open(args_srna.output_table, "w")
    get_terminal(cdss, inters, seq, "start")
    get_inter(cdss, inters)
    get_terminal(cdss, inters, seq, "end")
    inters = sorted(inters,
                    key=lambda k:
                    (k["strain"], k["start"], k["end"], k["strand"]))
    args_srna = ArgsContainer().extend_utr_container(args_srna, cdss, tsss,
                                                     pros, out, out_t, texs)
    for inter in inters:
        for ta in tas:
            if (inter["strain"] == ta.seq_id) and (inter["strand"]
                                                   == ta.strand):
                class_utr(inter, ta, args_srna, wig_fs, wig_rs)
    covers = get_utr_coverage(args_srna.utrs)
    mediandict = set_cutoff(covers, args_srna)
    print_median(args_srna.out_folder, mediandict)
    detect_srna(mediandict, args_srna)
    args_srna.out.close()
    args_srna.out_t.close()
    paras = [
        args_srna.srnas, args_srna.utrs, seq, inters, tas, cdss, tas, tsss,
        pros, covers
    ]
    free_memory(paras)
示例#3
0
def intergenic_srna(args_srna, libs, texs, wigs_f, wigs_r, tss_file):
    '''get intergenic and antisense sRNA'''
    inter_cutoff_coverage, inter_notex = get_intergenic_antisense_cutoff(
        args_srna)
    anti_cutoff_coverage, anti_notex = get_intergenic_antisense_cutoff(
        args_srna)
    nums, cdss, tas, pros, genes, ncs = read_data(args_srna)
    tsss, num_tss = read_tss(tss_file)
    detects = {"overlap": False, "uni_with_tss": False, "anti": False}
    output = open(args_srna.output_file, "w")
    out_table = open(args_srna.output_table, "w")
    output.write("##gff-version 3\n")
    for ta in tas:
        detects["overlap"] = False
        detects["anti"] = False
        compare_ta_cds(cdss, ta, detects)
        if (detects["overlap"]) and (not args_srna.in_cds):
            continue
        else:
            if not detects["anti"]:
                cutoff_coverage = inter_cutoff_coverage
                notex = inter_notex
            else:
                cutoff_coverage = anti_cutoff_coverage
                notex = anti_notex
            args_srna = ArgsContainer().extend_inter_container(
                args_srna, tsss, pros, nums, output, out_table, texs, detects,
                cutoff_coverage, notex)
            check_srna_condition(ta, args_srna, cdss, wigs_f, wigs_r)
    file_name = args_srna.output_file.split(".")
    file_name = file_name[0] + ".stat"
    output.close()
    out_table.close()
    paras = [tsss, tas, pros, genes, cdss]
    free_memory(paras)
示例#4
0
 def __init__(self, args_srna):
     self.args_container = ArgsContainer()
     self.helper = Helper()
     self.multiparser = Multiparser()
     self.gff_output = os.path.join(args_srna.out_folder, "gffs")
     self.table_output = os.path.join(args_srna.out_folder, "tables")
     self.stat_path = os.path.join(args_srna.out_folder, "statistics")
     self.tss_path = self._check_folder_exist(args_srna.tss_folder)
     self.pro_path = self._check_folder_exist(args_srna.pro_folder)
     self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
     self.fasta_path = os.path.join(args_srna.fastas, "tmp")
     self.tran_path = os.path.join(args_srna.trans, "tmp")
     self.term_path = self._check_folder_exist(args_srna.terms)
     self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
     self.prefixs = {
         "merge": os.path.join(args_srna.out_folder, "tmp_merge"),
         "utr": os.path.join(args_srna.out_folder, "tmp_utrsrna"),
         "normal": os.path.join(args_srna.out_folder, "tmp_normal"),
         "in_cds": os.path.join(args_srna.out_folder, "tmp_incds"),
         "merge_table": os.path.join(args_srna.out_folder,
                                     "tmp_merge_table"),
         "utr_table": os.path.join(args_srna.out_folder,
                                   "tmp_utrsrna_table"),
         "normal_table": os.path.join(args_srna.out_folder,
                                      "tmp_normal_table"),
         "in_cds_table": os.path.join(args_srna.out_folder,
                                      "tmp_incds_table"),
         "basic": os.path.join(args_srna.out_folder, "tmp_basic"),
         "energy": os.path.join(args_srna.out_folder, "tmp_energy")
     }
     self.tmps = {
         "nr": os.path.join(args_srna.out_folder, "tmp_nr"),
         "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")
     }
     self.best_table = os.path.join(self.table_output, "best")
     self.table_output = os.path.join(args_srna.out_folder, "tables")
     self.stat_path = os.path.join(args_srna.out_folder, "statistics")
     self.all_best = {
         "all_gff": os.path.join(self.gff_output, "all_candidates"),
         "best_gff": os.path.join(self.gff_output, "best"),
         "all_table": os.path.join(self.table_output, "all_candidates"),
         "best_table": os.path.join(self.table_output, "best")
     }
示例#5
0
 def __init__(self, args_srna):
     self.args_container = ArgsContainer()
     self.helper = Helper()
     self.multiparser = Multiparser()
     self.gff_output = os.path.join(args_srna.out_folder, "gffs")
     self.table_output = os.path.join(args_srna.out_folder, "tables")
     self.stat_path = os.path.join(args_srna.out_folder, "statistics")
     self.tss_path = self._check_folder_exist(args_srna.tss_folder)
     self.pro_path = self._check_folder_exist(args_srna.pro_folder)
     self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
     self.fasta_path = os.path.join(args_srna.fastas, "tmp")
     self.tran_path = os.path.join(args_srna.trans, "tmp")
     self.term_path = self._check_folder_exist(args_srna.terms)
     self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
     self.prefixs = {"merge": os.path.join(
                         args_srna.out_folder, "tmp_merge"),
                     "utr": os.path.join(
                         args_srna.out_folder, "tmp_utrsrna"),
                     "normal": os.path.join(
                         args_srna.out_folder, "tmp_normal"),
                     "in_cds": os.path.join(
                         args_srna.out_folder, "tmp_incds"),
                     "merge_table": os.path.join(
                         args_srna.out_folder, "tmp_merge_table"),
                     "utr_table": os.path.join(
                         args_srna.out_folder, "tmp_utrsrna_table"),
                     "normal_table": os.path.join(
                         args_srna.out_folder, "tmp_normal_table"),
                     "in_cds_table": os.path.join(
                         args_srna.out_folder, "tmp_incds_table"),
                     "basic": os.path.join(
                         args_srna.out_folder, "tmp_basic"),
                     "energy": os.path.join(
                         args_srna.out_folder, "tmp_energy")}
     self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"),
                  "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")}
     self.best_table = os.path.join(self.table_output, "best")
     self.table_output = os.path.join(args_srna.out_folder, "tables")
     self.stat_path = os.path.join(args_srna.out_folder, "statistics")
     self.all_best = {"all_gff": os.path.join(
                          self.gff_output, "all_candidates"),
                      "best_gff": os.path.join(self.gff_output, "best"),
                      "all_table": os.path.join(
                          self.table_output, "all_candidates"),
                      "best_table": os.path.join(self.table_output, "best")}
示例#6
0
class sRNADetection(object):

    def __init__(self, args_srna):
        self.args_container = ArgsContainer()
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.gff_output = os.path.join(args_srna.out_folder, "gffs")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.tss_path = self._check_folder_exist(args_srna.tss_folder)
        self.pro_path = self._check_folder_exist(args_srna.pro_folder)
        self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
        self.fasta_path = os.path.join(args_srna.fastas, "tmp")
        self.tran_path = os.path.join(args_srna.trans, "tmp")
        self.term_path = self._check_folder_exist(args_srna.terms)
        self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
        self.prefixs = {"merge": os.path.join(
                            args_srna.out_folder, "tmp_merge"),
                        "utr": os.path.join(
                            args_srna.out_folder, "tmp_utrsrna"),
                        "normal": os.path.join(
                            args_srna.out_folder, "tmp_normal"),
                        "in_cds": os.path.join(
                            args_srna.out_folder, "tmp_incds"),
                        "merge_table": os.path.join(
                            args_srna.out_folder, "tmp_merge_table"),
                        "utr_table": os.path.join(
                            args_srna.out_folder, "tmp_utrsrna_table"),
                        "normal_table": os.path.join(
                            args_srna.out_folder, "tmp_normal_table"),
                        "in_cds_table": os.path.join(
                            args_srna.out_folder, "tmp_incds_table"),
                        "basic": os.path.join(
                            args_srna.out_folder, "tmp_basic"),
                        "energy": os.path.join(
                            args_srna.out_folder, "tmp_energy")}
        self.tmps = {"nr": os.path.join(args_srna.out_folder, "tmp_nr"),
                     "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")}
        self.best_table = os.path.join(self.table_output, "best")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.all_best = {"all_gff": os.path.join(
                             self.gff_output, "all_candidates"),
                         "best_gff": os.path.join(self.gff_output, "best"),
                         "all_table": os.path.join(
                             self.table_output, "all_candidates"),
                         "best_table": os.path.join(self.table_output, "best")}

    def _check_folder_exist(self, folder):
        if folder is not None:
            path = os.path.join(folder, "tmp")
        else:
            path = None
        return path

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _run_format(self, blast_path, database, type_, db_file, err):
        call([os.path.join(blast_path, "makeblastdb"), "-in", database,
              "-dbtype", type_, "-out", db_file], stderr=err)

    def _formatdb(self, database, type_, out_folder,
                  blast_path, database_type):
        err = open(os.path.join(out_folder, "log.txt"), "w")
        if (database.endswith(".fa")) or (
                database.endswith(".fna")) or (
                database.endswith(".fasta")):
            pass
        else:
            folders = database.split("/")
            filename = folders[-1]
            folder = "/".join(folders[:-1])
            for fasta in os.listdir(folder):
                if (fasta.endswith(".fa")) or (
                        fasta.endswith(".fna")) or (
                        fasta.endswith(".fasta")):
                    if ".".join(fasta.split(".")[:-1]) == filename:
                        database = os.path.join(folder, fasta)
        if database_type == "sRNA":
            change_format(database, "tmp_srna_database")
            os.remove(database)
            shutil.move("tmp_srna_database", database)
        db_file = ".".join(database.split(".")[:-1])
        self._run_format(blast_path, database, type_, db_file, err)
        err.close()

    def _merge_frag_tex_file(self, files, args_srna):
        if (args_srna.frag_wigs is not None) and (
                args_srna.tex_wigs is not None):
            self.helper.merge_file(files["frag_gff"], files["tex_gff"])
            self.helper.merge_file(files["frag_csv"], files["tex_csv"])
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
            os.remove(files["frag_csv"])
            os.remove(files["frag_gff"])
            os.remove(files["tex_gff"])
        elif (args_srna.frag_wigs is not None):
            shutil.move(files["frag_csv"], files["merge_csv"])
            self.helper.sort_gff(files["frag_gff"], files["merge_gff"])
            os.remove(files["frag_gff"])
        elif (args_srna.tex_wigs is not None):
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])

    def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna):
        if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter"))
        files = {"frag_gff": None, "frag_csv": None,
                 "tex_gff": None, "tex_csv": None,
                 "merge_gff": None, "merge_csv": None}
        if ("tss" in args_srna.import_info):
            tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff",
                                               prefix, None, None)
        else:
            tss = None
        if self.pro_path is not None:
            pro = self.helper.get_correct_file(
                    self.pro_path, "_processing.gff", prefix, None, None)
        else:
            pro = None
        if args_srna.frag_wigs is not None:
            files["frag_gff"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_frag", prefix]))
            files["frag_csv"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_frag_table", prefix]))

            args_srna = self.args_container.container_intersrna(
                             "frag", files, args_srna, prefix,
                             os.path.join(args_srna.gffs, gff), tran, tss,
                             pro, fuzzy_tss)
            intergenic_srna(args_srna)
        if args_srna.tex_wigs is not None:
            files["tex_gff"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_tex", prefix]))
            files["tex_csv"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_tex_table", prefix]))
            args_srna = self.args_container.container_intersrna(
                           "tex", files, args_srna, prefix,
                           os.path.join(args_srna.gffs, gff), tran, tss,
                           pro, fuzzy_tss)
            intergenic_srna(args_srna)
        files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["normal"], prefix])
        self._merge_frag_tex_file(files, args_srna)
        if "TSS_class" in os.listdir(args_srna.out_folder):
            tss = os.path.join(args_srna.out_folder,
                               "TSS_class", prefix + "_TSS.gff")
        return tss

    def _run_utrsrna(self, gff, tran, prefix, tss, pro, args_srna):
        if "tmp_median" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_median"))
        files = {"frag_gff": None, "frag_csv": None,
                 "tex_gff": None, "tex_csv": None,
                 "merge_gff": None, "merge_csv": None}
        if args_srna.tex_wigs is not None:
            files["tex_gff"] = os.path.join(
                    args_srna.out_folder, "_".join(["tmp_utr_tex", prefix]))
            files["tex_csv"] = os.path.join(
                    args_srna.out_folder,
                    "_".join(["tmp_utr_tex_table", prefix]))
            args_srna = self.args_container.container_utrsrna(
                    os.path.join(args_srna.gffs, gff), tran, tss, files,
                    pro, os.path.join(self.fasta_path, prefix + ".fa"),
                    "tex", prefix, args_srna)
            utr_derived_srna(args_srna)
        if args_srna.frag_wigs is not None:
            files["frag_gff"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag", prefix]))
            files["frag_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix]))
            args_srna = self.args_container.container_utrsrna(
                    os.path.join(args_srna.gffs, gff), tran, tss, files,
                    pro, os.path.join(self.fasta_path, prefix + ".fa"),
                    "frag", prefix, args_srna)
            utr_derived_srna(args_srna)
        files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["utr"], prefix])
        self._merge_frag_tex_file(files, args_srna)
        filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr)

    def _check_necessary_file(self, args_srna):
        if (args_srna.gffs is None) or (args_srna.trans is None) or (
                (args_srna.tex_wigs is None) and (
                args_srna.frag_wigs is None)):
            print("Error: lack required files!!!!")
            sys.exit()
        if args_srna.utr_srna:
            if (args_srna.tss_folder is None):
                print("Error: lack required TSS files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            if (args_srna.pro_folder is None):
                print("Warning: lack Processing site files for UTR "
                      "derived sRNA detection!!!")
                print("it may effect the results!!!!")
        self._check_gff(args_srna.gffs)
        self._check_gff(args_srna.trans)
        if args_srna.tss_folder is not None:
            self._check_gff(args_srna.tss_folder)
            self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
            self.multiparser.combine_gff(args_srna.gffs, self.tss_path,
                                         None, "TSS")
        if args_srna.pro_folder is not None:
            self._check_gff(args_srna.pro_folder)
            self.multiparser.parser_gff(args_srna.pro_folder, "processing")
            self.multiparser.combine_gff(args_srna.gffs, self.pro_path,
                                         None, "processing")
        if args_srna.sorf_file is not None:
            self._check_gff(args_srna.sorf_file)
            self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
            self.multiparser.combine_gff(args_srna.gffs, self.sorf_path,
                                         None, "sORF")
        if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
           "blast_nr" in args_srna.import_info) or (
           "blast_srna" in args_srna.import_info):
            if args_srna.fastas is None:
                print("Error: lack required fasta files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            self.multiparser.parser_fasta(args_srna.fastas)
            self.multiparser.combine_fasta(args_srna.gffs,
                                           self.fasta_path, None)
        if args_srna.terms is not None:
            self._check_gff(args_srna.terms)
            self.multiparser.parser_gff(args_srna.terms, "term")
            self.multiparser.combine_gff(args_srna.gffs, self.term_path,
                                         None, "term")
        else:
            self.term_path = None

    def _run_program(self, args_srna):
        prefixs = []
        tss = None
        for gff in os.listdir(args_srna.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                prefixs.append(prefix)
                print("Running sRNA detection of {0}....".format(prefix))
                tran = self.helper.get_correct_file(
                        self.tran_path, "_transcript.gff", prefix, None, None)
                gffs = {"merge": "_".join([self.prefixs["merge"], prefix]),
                        "utr": "_".join([self.prefixs["utr"], prefix]),
                        "normal": "_".join([self.prefixs["normal"], prefix])}
                csvs = {"merge": "_".join([
                            self.prefixs["merge_table"], prefix]),
                        "utr": "_".join([self.prefixs["utr_table"], prefix]),
                        "normal": "_".join([
                            self.prefixs["normal_table"], prefix])}
                tss = self._run_normal(
                        prefix, gff, tran, args_srna.fuzzy_tsss["inter"],
                        args_srna)
                if args_srna.utr_srna:
                    print("Running UTR derived sRNA detection of {0}".format(
                          prefix))
                    if tss is None:
                        tss = self.helper.get_correct_file(
                                self.tss_path, "_TSS.gff", prefix, None, None)
                    if self.pro_path is not None:
                        pro = self.helper.get_correct_file(
                                self.pro_path, "_processing.gff",
                                prefix, None, None)
                    else:
                        pro = None
                    if tss is not None:
                        self._run_utrsrna(gff, tran, prefix,
                                          tss, pro, args_srna)
                self._merge_srna(args_srna, gffs, csvs, prefix,
                                 os.path.join(args_srna.gffs, gff), tss)
                filter_frag(csvs["merge"], gffs["merge"])
                self.helper.sort_gff(gffs["merge"],
                                     "_".join([self.prefixs["basic"], prefix]))
        return prefixs

    def _merge_srna(self, args_srna, gffs, csvs, prefix, gff_file, tss):
        print("merging data of intergenic and UTR_derived sRNA...")
        merge_srna_gff(gffs, args_srna.in_cds,
                       args_srna.cutoff_overlap, gff_file)
        merge_srna_table(gffs["merge"], csvs, os.path.join(args_srna.wig_path,
                         "_".join([prefix, "forward.wig"])),
                         os.path.join(args_srna.wig_path,
                         "_".join([prefix, "reverse.wig"])),
                         tss, args_srna)

    def _run_RNAfold(self, seq_file, vienna_path, sec_file):
        os.system(" ".join(["cat", seq_file, "|",
                  os.path.join(vienna_path, "RNAfold"),
                  "-p", ">", sec_file]))

    def _get_seq_sec(self, fasta_path, out_folder, prefix, sec_path,
                     dot_path, vienna_path):
        detect = False
        for fasta in os.listdir(fasta_path):
            if fasta.endswith(".fa") and (
               fasta.replace(".fa", "") == prefix):
                detect = True
                break
        if detect:
            detect = False
            seq_file = os.path.join(out_folder, "_".join(["sRNA_seq", prefix]))
            sec_file = os.path.join(out_folder, "_".join(["sRNA_2d", prefix]))
            self.helper.get_seq("_".join([self.prefixs["basic"], prefix]),
                                os.path.join(fasta_path, fasta), seq_file)
        else:
            print("Error:There is not fasta file of {0}".format(prefix))
            print("please check your imported information")
            sys.exit()
        tmp_path = os.path.join(out_folder, "tmp_srna")
        self.helper.check_make_folder(tmp_path)
        main_path = os.getcwd()
        os.chdir(tmp_path)
        sec_file = os.path.join(main_path, sec_file)
        seq_file = os.path.join(main_path, seq_file)
        tmp_sec_path = os.path.join(main_path, sec_path)
        tmp_dot_path = os.path.join(main_path, dot_path)
        self._run_RNAfold(seq_file, vienna_path, sec_file)
        extract_energy(os.path.join(main_path,
                       "_".join([self.prefixs["basic"], prefix])),
                       sec_file, os.path.join(main_path,
                       "_".join([self.prefixs["energy"], prefix])))
        for ps in os.listdir(os.getcwd()):
            new_ps = ps.replace("|", "_")
            shutil.move(ps, new_ps)
        return {"sec": tmp_sec_path, "dot": tmp_dot_path, "main": main_path,
                "tmp": os.path.join(main_path, tmp_path)}

    def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file):
        os.system(" ".join([os.path.join(vienna_util, "relplot.pl"),
                  os.path.join(tmp_paths["tmp"], file_),
                  os.path.join(tmp_paths["tmp"], dot_file),
                  ">", os.path.join(tmp_paths["tmp"], rel_file)]))

    def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file):
        call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file])

    def _replot_sec_to_pdf(self, vienna_util, tmp_paths,
                           ps2pdf14_path, prefix):
        for file_ in os.listdir(os.getcwd()):
            if file_.endswith("ss.ps"):
                dot_file = file_.replace("ss.ps", "dp.ps")
                rel_file = file_.replace("ss.ps", "rss.ps")
                print("replot {0}".format(file_))
                self._run_replot(vienna_util, tmp_paths, file_,
                                 dot_file, rel_file)
        for file_ in os.listdir(tmp_paths["tmp"]):
            if (file_.endswith("rss.ps")) or (file_.endswith("dp.ps")):
                pdf_file = file_.replace(".ps", ".pdf")
                print("convert {0} to pdf".format(file_))
                self._convert_pdf(ps2pdf14_path, tmp_paths,
                                  file_, pdf_file)
        os.mkdir(os.path.join(tmp_paths["sec"], prefix))
        os.mkdir(os.path.join(tmp_paths["dot"], prefix))
        self.helper.move_all_content(
                tmp_paths["tmp"], os.path.join(tmp_paths["sec"], prefix),
                ["rss.pdf"])
        self.helper.move_all_content(
                tmp_paths["tmp"], os.path.join(tmp_paths["dot"], prefix),
                ["dp.pdf"])

    def _run_mountain(self, vienna_util, tmp_paths, dot_file, out):
        call([os.path.join(vienna_util, "mountain.pl"),
              os.path.join(tmp_paths["tmp"], dot_file)], stdout=out)

    def _plot_mountain(self, mountain, moun_path,
                       tmp_paths, prefix, vienna_util):
        if mountain:
            tmp_moun_path = os.path.join(tmp_paths["main"], moun_path)
            os.mkdir(os.path.join(tmp_moun_path, prefix))
            txt_path = os.path.join(tmp_paths["tmp"], "tmp_txt")
            self.helper.check_make_folder(txt_path)
            print("Generating mountain plot of {0}....".format(prefix))
            for dot_file in os.listdir(tmp_paths["tmp"]):
                if dot_file.endswith("dp.ps"):
                    moun_txt = os.path.join(tmp_paths["tmp"], "mountain.txt")
                    out = open(moun_txt, "w")
                    moun_file = dot_file.replace("dp.ps", "mountain.pdf")
                    print("Generating {0}".format(moun_file))
                    self._run_mountain(vienna_util, tmp_paths, dot_file, out)
                    plot_mountain_plot(moun_txt, moun_file)
                    shutil.move(moun_file,
                                os.path.join(tmp_moun_path, prefix, moun_file))
                    out.close()
                    os.remove(moun_txt)

    def _compute_2d_and_energy(self, args_srna, prefixs):
        print("Running energy calculation....")
        moun_path = os.path.join(args_srna.out_folder, "mountain_plot")
        sec_path = os.path.join(args_srna.out_folder, "sec_structure",
                                "sec_plot")
        dot_path = os.path.join(args_srna.out_folder, "sec_structure",
                                "dot_plot")
        self.helper.remove_all_content(sec_path, None, "dir")
        self.helper.remove_all_content(dot_path, None, "dir")
        self.helper.remove_all_content(moun_path, None, "dir")
        for prefix in prefixs:
            tmp_paths = self._get_seq_sec(
                    self.fasta_path, args_srna.out_folder, prefix, sec_path,
                    dot_path, args_srna.vienna_path)
            self._replot_sec_to_pdf(args_srna.vienna_util, tmp_paths,
                                    args_srna.ps2pdf14_path, prefix)
            self._plot_mountain(args_srna.mountain, moun_path, tmp_paths,
                                prefix, args_srna.vienna_util)
            self.helper.remove_all_content(os.getcwd(), ".ps", "file")
            os.chdir(tmp_paths["main"])
            shutil.move("_".join([self.prefixs["energy"], prefix]),
                        "_".join([self.prefixs["basic"], prefix]))
            shutil.rmtree(os.path.join(args_srna.out_folder, "tmp_srna"))

    def _run_blast(self, blast_path, program, database, e, seq_file,
                   blast_file, strand):
        call([os.path.join(blast_path, program), "-db", database,
              "-evalue", str(e), "-strand", strand, "-query", seq_file,
              "-out", blast_file])

    def _get_strand_fasta(self, seq_file, out_folder):
        tmp_plus = os.path.join(out_folder, "tmp_plus.fa")
        tmp_minus = os.path.join(out_folder, "tmp_minus.fa")
        out_p = open(tmp_plus, "w")
        out_m = open(tmp_minus, "w")
        strand = ""
        with open(seq_file) as sh:
            for line in sh:
                line = line.strip()
                if line.startswith(">"):
                    if line[-1] == "+":
                        out_p.write(line + "\n")
                        strand = "plus"
                    elif line[-1] == "-":
                        out_m.write(line + "\n")
                        strand = "minus"
                else:
                    if strand == "plus":
                        out_p.write(line + "\n")
                    elif strand == "minus":
                        out_m.write(line + "\n")
        out_p.close()
        out_m.close()
        return tmp_plus, tmp_minus

    def _blast(self, database, database_format, data_type, args_srna,
               prefixs, program, database_type, e):
        if (database is None):
            print("Error: No database assigned!")
        else:
            if database_format:
                self._formatdb(database, data_type, args_srna.out_folder,
                               args_srna.blast_path, database_type)
            for prefix in prefixs:
                blast_file = os.path.join(
                        args_srna.out_folder, "blast_result_and_misc",
                        "_".join([database_type, "blast", prefix + ".txt"]))
                srna_file = "_".join([self.prefixs["basic"], prefix])
                out_file = os.path.join(
                        args_srna.out_folder,
                        "_".join(["tmp", database_type, prefix]))
                print("Running Blast of {0}".format(prefix))
                seq_file = os.path.join(
                        args_srna.out_folder, "_".join(["sRNA_seq", prefix]))
                if seq_file not in os.listdir(args_srna.out_folder):
                    self.helper.get_seq(
                            srna_file,
                            os.path.join(self.fasta_path, prefix + ".fa"),
                            seq_file)
                if database_type == "nr":
                    tmp_plus, tmp_minus = self._get_strand_fasta(
                            seq_file, args_srna.out_folder)
                    tmp_blast = os.path.join("tmp_blast.txt")
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    tmp_plus, tmp_blast, "plus")
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    tmp_minus, blast_file, "minus")
                    self.helper.merge_file(tmp_blast, blast_file)
                    os.remove(tmp_blast)
                    os.remove(tmp_plus)
                    os.remove(tmp_minus)
                else:
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    seq_file, blast_file, "both")
                extract_blast(blast_file, srna_file, out_file,
                              out_file + ".csv", database_type)
                shutil.move(out_file, srna_file)

    def _class_srna(self, prefixs, args_srna):
        if (len(args_srna.import_info) != 1) or (
                len(args_srna.import_info) != 0):
            for prefix in prefixs:
                print("classifying sRNA of {0}".format(prefix))
                class_gff = os.path.join(self.gff_output, "for_class")
                class_table = os.path.join(self.table_output, "for_class")
                self.helper.check_make_folder(os.path.join(class_table,
                                                           prefix))
                self.helper.check_make_folder(os.path.join(class_gff, prefix))
                class_gff = os.path.join(class_gff, prefix)
                class_table = os.path.join(class_table, prefix)
                self.helper.check_make_folder(class_table)
                self.helper.check_make_folder(class_gff)
                out_stat = os.path.join(
                        self.stat_path, "_".join([
                            "stat_sRNA_class", prefix + ".csv"]))
                classify_srna(os.path.join(self.all_best["all_gff"],
                              "_".join([prefix, "sRNA.gff"])), class_gff,
                              out_stat, args_srna)
                for srna in os.listdir(class_gff):
                    out_table = os.path.join(
                            class_table, srna.replace(".gff", ".csv"))
                    gen_srna_table(
                        os.path.join(class_gff, srna),
                        "_".join([self.prefixs["merge_table"], prefix]),
                        "_".join([self.tmps["nr"], prefix + ".csv"]),
                        "_".join([self.tmps["srna"], prefix + ".csv"]),
                        args_srna, out_table)

    def _get_best_result(self, prefixs, args_srna):
        for prefix in prefixs:
            best_gff = os.path.join(self.all_best["best_gff"],
                                    "_".join([prefix, "sRNA.gff"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            gen_best_srna(os.path.join(self.all_best["all_gff"],
                                       "_".join([prefix, "sRNA.gff"])),
                          best_gff, args_srna)
            gen_srna_table(os.path.join(self.all_best["best_gff"],
                           "_".join([prefix, "sRNA.gff"])),
                           "_".join([self.prefixs["merge_table"], prefix]),
                           "_".join([self.tmps["nr"], prefix + ".csv"]),
                           "_".join([self.tmps["srna"], prefix + ".csv"]),
                           args_srna, best_table)

    def _remove_file(self, args_srna):
        self.helper.remove_all_content(args_srna.out_folder, "tmp_", "dir")
        self.helper.remove_all_content(args_srna.out_folder, "tmp_", "file")
        self.helper.remove_tmp(args_srna.fastas)
        self.helper.remove_tmp(args_srna.gffs)
        if args_srna.frag_wigs is not None:
            self.helper.remove_tmp(args_srna.frag_wigs)
        if args_srna.tex_wigs is not None:
            self.helper.remove_tmp(args_srna.tex_wigs)
        if (args_srna.frag_wigs is not None) and (
                args_srna.tex_wigs is not None):
            shutil.rmtree(args_srna.merge_wigs)
        self.helper.remove_tmp(args_srna.trans)
        if args_srna.tss_folder is not None:
            self.helper.remove_tmp(args_srna.tss_folder)
        if args_srna.pro_folder is not None:
            self.helper.remove_tmp(args_srna.pro_folder)
        if args_srna.sorf_file is not None:
            self.helper.remove_tmp(args_srna.sorf_file)
        if "tmp_median" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_median"))
        if self.term_path is not None:
            self.helper.remove_tmp(args_srna.terms)

    def _filter_srna(self, args_srna, prefixs):
        if "sec_str" in args_srna.import_info:
            self._compute_2d_and_energy(args_srna, prefixs)
        if "blast_nr" in args_srna.import_info:
            self._blast(args_srna.nr_database, args_srna.nr_format, "prot",
                        args_srna, prefixs, "blastx", "nr", args_srna.e_nr)
        if "blast_srna" in args_srna.import_info:
            self._blast(args_srna.srna_database, args_srna.srna_format, "nucl",
                        args_srna, prefixs, "blastn", "sRNA", args_srna.e_srna)
        if "sorf" in args_srna.import_info:
            for prefix in prefixs:
                if ("_".join([prefix, "sORF.gff"]) in
                        os.listdir(self.sorf_path)):
                    tmp_srna = os.path.join(args_srna.out_folder,
                                            "".join(["tmp_srna_sorf", prefix]))
                    tmp_sorf = os.path.join(args_srna.out_folder,
                                            "".join(["tmp_sorf_srna", prefix]))
                    srna_sorf_comparison(
                            "_".join([self.prefixs["basic"], prefix]),
                            os.path.join(self.sorf_path,
                                         "_".join([prefix, "sORF.gff"])),
                            tmp_srna, tmp_sorf)
                    os.remove(tmp_sorf)
                    shutil.move(tmp_srna,
                                "_".join([self.prefixs["basic"], prefix]))

    def _import_info_format(self, import_info):
        new_info = []
        for info in import_info:
            info = info.lower()
            new_info.append(info)
        return new_info

    def _gen_table(self, prefixs, args_srna):
        for prefix in prefixs:
            out_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            gen_srna_table(os.path.join(self.all_best["all_gff"],
                           "_".join([prefix, "sRNA.gff"])),
                           "_".join([self.prefixs["merge_table"], prefix]),
                           "_".join([self.tmps["nr"], prefix + ".csv"]),
                           "_".join([self.tmps["srna"], prefix + ".csv"]),
                           args_srna, out_table)

    def _print_rank_all(self, prefixs):
        for prefix in prefixs:
            all_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            print_rank_all(all_table, best_table)

    def _filter_min_utr(self, prefixs, min_utr):
        for prefix in prefixs:
            filter_utr(os.path.join(self.all_best["all_gff"],
                                    "_".join([prefix, "sRNA.gff"])),
                       os.path.join(self.all_best["all_table"],
                                    "_".join([prefix, "sRNA.csv"])), min_utr)

    def _antisense(self, gffs, prefixs):
        for prefix in prefixs:
            all_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            all_gff = os.path.join(self.all_best["all_gff"],
                                   "_".join([prefix, "sRNA.gff"]))
            best_gff = os.path.join(self.all_best["best_gff"],
                                    "_".join([prefix, "sRNA.gff"]))
            srna_antisense(all_gff, all_table,
                           os.path.join(gffs, prefix + ".gff"))
            srna_antisense(best_gff, best_table,
                           os.path.join(gffs, prefix + ".gff"))

    def _blast_stat(self, stat_path, srna_tables):
        for srna_table in os.listdir(os.path.join(srna_tables, "best")):
            out_srna_blast = os.path.join(
                    stat_path, "stat_" +
                    srna_table.replace(".csv", "_blast.csv"))
        blast_class(os.path.join(srna_tables, "best", srna_table),
                    out_srna_blast)

    def _compare_term_promoter(self, out_table, prefix, args_srna):
        if ("term" in args_srna.import_info) and (
                self.term_path is not None):
            compare_srna_term(os.path.join(self.all_best["all_gff"],
                              "_".join([prefix, "sRNA.gff"])),
                              out_table, os.path.join(self.term_path,
                              "_".join([prefix, "term.gff"])),
                              args_srna.fuzzy_b, args_srna.fuzzy_a)
        if ("promoter" in args_srna.import_info) and (
                args_srna.promoter_table is not None) and (
                "tss" in args_srna.import_info):
            compare_srna_promoter(os.path.join(self.all_best["all_gff"],
                                  "_".join([prefix, "sRNA.gff"])),
                                  out_table, args_srna)

    def run_srna_detection(self, args_srna):
        self._check_necessary_file(args_srna)
        self.multiparser.parser_gff(args_srna.trans, "transcript")
        self.multiparser.combine_gff(args_srna.gffs, self.tran_path,
                                     None, "transcript")
        args_srna.import_info = self._import_info_format(args_srna.import_info)
        prefixs = self._run_program(args_srna)
        self._filter_srna(args_srna, prefixs)
        for prefix in prefixs:
            shutil.copyfile("_".join([self.prefixs["basic"], prefix]),
                            os.path.join(self.all_best["all_gff"],
                            "_".join([prefix, "sRNA.gff"])))
            self._compare_term_promoter("_".join([self.prefixs["merge_table"],
                                        prefix]), prefix, args_srna)
        self._gen_table(prefixs, args_srna)
        self._class_srna(prefixs, args_srna)
        self._get_best_result(prefixs, args_srna)
        self._print_rank_all(prefixs)
        if "blast_srna" in args_srna.import_info:
            self._blast_stat(self.stat_path, self.table_output)
        self._remove_file(args_srna)
示例#7
0
class Controller(object):

    """Manage the actions of the subcommands.

    The Controller take care of providing the argumentes like path
    names and the parallel processing of tasks.

    """
    def __init__(self, args):
        """Create an instance."""
        self._args = args
        if (len(args.__dict__) > 3):
            if not os.path.exists(args.project_path):
                print("Error: --project_path does not exists!")
                sys.exit()
        self._paths = Paths(args.project_path)
        self.args_container = ArgsContainer()
        self.helper = Helper()

    def check_folder(self, folders, flags):
        '''Check the emtpy or wrong assigned folder'''
        for folder, flag in zip(folders, flags):
            if folder is None:
                print("Error: {0} is wrong. Please check it!".format(flag))
                sys.exit()
            else:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: {0} is a empty folder!".format(flag))
                        sys.exit()
                else:
                    print("Error: {0} is wrong. Please check it!".format(
                          flag))
                    sys.exit()

    def check_multi_files(self, input_files, flags):
        if input_files is not None:
            for files, flag in zip(input_files, flags):
                if files is not None:
                    for file_ in files:
                        if not os.path.exists(file_):
                            print("Error: Some files in {0} do "
                                  "not exist!".format(flag))
                            sys.exit()

    def check_parameter(self, paras, names):
        '''Check the parameter is assigned correct or not'''
        for i in range(len(paras)):
            if paras[i] is None:
                print("Error: {0} is wrong. "
                      "Please check it!".format(names[i]))
                sys.exit()

    def check_no_require_folder(self, folders):
        '''Check the folders which are not necessary.
        It should not be assigned a empty or wrong folder'''
        for folder in folders:
            if folder is not None:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: There is a empty folder. "
                              "Please check it!")
                        sys.exit()
                else:
                    print("Error: There is a wrong folder. "
                          "Please check it!")
                    sys.exit()

    def check_execute_file(self, exe):
        detect = False
        if os.path.exists(exe):
            detect = True
            full_exe = os.path.realpath(exe)
        for folder in os.environ["PATH"].split(":"):
            if os.path.exists(os.path.join(folder, exe)):
                detect = True
                full_exe = exe
        if not detect:
            if os.path.exists(os.path.realpath(exe)):
                full_exe = os.path.realpath(exe)
            else:
                print("Error: {0} can't be found!".format(exe))
                print("Please assign the correct path!")
                sys.exit()
        return full_exe

    def check_file(self, files, names, require):
        '''Check the path of file'''
        for i in range(len(files)):
            if require:
                if files[i] is None:
                    print("Error: {0} is wrong. "
                          "Please check it!".format(names[i]))
                    sys.exit()
                else:
                    if not os.path.isfile(files[i]):
                        print("Error: There is a wrong path of {0}. "
                              "Please check it!".format(names[i]))
                        sys.exit()
            else:
                if files[i] is not None:
                    if not os.path.isfile(files[i]):
                        print("Error: There is a wrong path of {0}. "
                              "Please check it!".format(names[i]))
                        sys.exit()

    def create_project(self, version):
        """Create a new project."""
        project_creator.create_root_folder(self._args.project_path)
        project_creator.create_subfolders(self._paths.required_folders("root"))
        project_creator.create_version_file(
            self._paths.version_path, version)
        sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (
            self._args.project_path))

    def get_input(self):
        """Download required files from website."""
        print("Running get input files")
        if self._args.ftp_path is None:
            print("Error: Please assign the path for downloading the data!")
            sys.exit()
            annotation_folder = self._paths.ref_annotation_folder
            fasta_folder = self._paths.ref_fasta_folder
        self.helper.check_make_folder(self._paths.ref_annotation_folder)
        self.helper.check_make_folder(self._paths.ref_fasta_folder)
        if self._args.ref_gff is True:
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "gff")
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "_genomic.gff.gz")
        if self._args.ref_fasta is True:
            get_file(self._args.ftp_path, self._paths.ref_fasta_folder,
                     "fna")
            get_file(self._args.ftp_path, self._paths.ref_fasta_folder,
                     "_genomic.fna.gz")
        if self._args.ref_gbk is True:
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "gbk")
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "gbff")
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "_genomic.gbff.gz")
        if self._args.ref_ptt is True:
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "ptt")
        if self._args.ref_rnt is True:
            get_file(self._args.ftp_path, self._paths.ref_annotation_folder,
                     "rnt")
        if self._args.convert_embl is True:
            annotation_files = os.listdir(self._paths.ref_annotation_folder)
            if len(annotation_files) == 0:
                sys.stdout.write("No gff files!!\n")
            else:
                Converter().convert_gbk2embl(self._paths.ref_annotation_folder)

    def get_target_fasta(self):
        """Get target fasta"""
        print("Running update genome fasta")
        self.check_multi_files([self._args.related_fasta_files],
                               ["--related_fasta_files"])
        self.check_file([self._args.mutation_table], "--mutation_table", True)
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        target = TargetFasta(self._paths.tar_fasta_folder,
                             self._args.related_fasta_files)
        target.get_target_fasta(
                self._args.mutation_table, self._paths.tar_fasta_folder,
                self._args.related_fasta_files, self._args.combine_to_one_fasta,
                self._paths.target_base_folder)

    def ratt(self):
        """Run RATT to transfer annotation file from reference to target."""
        print("Running annotation transfer")
        if (self._args.transfer_type != "Strain") and (
                self._args.transfer_type != "Assembly") and (
                self._args.transfer_type != "Species") and (
                self._args.transfer_type != "Assembly.Repetitive") and (
                self._args.transfer_type != "Strain.Repetitive") and (
                self._args.transfer_type != "Species.Repetitive") and (
                self._args.transfer_type != "Multiple") and (
                self._args.transfer_type != "Free"):
            print("Error: please assign correct --transfer_type!")
            sys.exit()
        if (self._args.related_embl_files is None) and (
                self._args.related_gbk_files is None):
            print("Error: please assign proper embl or genbank files")
            sys.exit()
        elif (self._args.related_embl_files is not None) and (
                self._args.related_gbk_files is not None):
            print("Error: please choose embl as input or genbank as input")
            sys.exit()
        self._args.ratt_path = self.check_execute_file(self._args.ratt_path)
        self.check_multi_files(
                [self._args.target_fasta_files, self._args.related_fasta_files],
                ["--target_fasta_files", "--closed_fasta_files"])
        self.check_parameter([self._args.element, self._args.compare_pair],
                             ["--element", "--compare_pair"])
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        project_creator.create_subfolders(
            self._paths.required_folders("annotation_transfer"))
        args_ratt = self.args_container.container_ratt(
            self._args.ratt_path, self._args.element, self._args.transfer_type,
            self._args.related_embl_files, self._args.related_gbk_files,
            self._args.target_fasta_files, self._args.related_fasta_files,
            self._paths.ratt_folder, self._args.convert_to_gff_rnt_ptt,
            self._paths.tar_annotation_folder, self._args.compare_pair)
        ratt = RATT(args_ratt)
        ratt.annotation_transfer(args_ratt)

    def tsspredator(self):
        """Run TSSpredator for predicting TSS candidates."""
        self.check_multi_files(
                [self._args.fasta_files, self._args.annotation_files,
                 self._args.compare_overlap_gff, self._args.manual_files,
                 self._args.compare_transcript_files],
                ["--fasta_files", "--annotation_files", "--compare_overlap_gff",
                 "--manual_files","--compare_transcript_files"])
        self.check_parameter([self._args.tex_notex_libs, self._args.condition_names],
                             ["--tex_notex_libs", "--condition_names"])
        self._args.tsspredator_path = self.check_execute_file(
                self._args.tsspredator_path)
        if self._args.program.lower() == "tss":
            print("Running TSS prediction")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.program.lower() == "ps":
            print("Running processing site prediction")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error: No such program!")
            sys.exit()
        args_tss = self.args_container.container_tsspredator(
            self._args.tsspredator_path, self._args.program,
            self._args.fasta_files, self._args.annotation_files,
            self._args.tex_notex_libs, self._args.condition_names,
            self._args.height, self._args.height_reduction,
            self._args.factor, self._args.factor_reduction,
            self._args.base_height, self._args.enrichment_factor,
            self._args.processing_factor, self._args.replicate_tex,
            out_folder, self._args.validate_gene,
            self._args.manual_files, self._args.curated_sequence_length,
            self._args.compare_transcript_files, self._args.tolerance,
            self._args.utr_length, self._args.cluster,
            self._args.re_check_orphan,
            self._args.remove_overlap_feature, self._args.compare_overlap_gff,
            self._args.remove_low_expression)
        tsspredator = TSSpredator(args_tss)
        tsspredator.run_tsspredator(args_tss)

    def optimize(self):
        """opimize TSSpredator"""
        self.check_multi_files(
                [self._args.fasta_files, self._args.annotation_files,
                 self._args.manual_files],
                ["--fasta_files", "--annotation_files", "--manual_files"])
        self._args.tsspredator_path = self.check_execute_file(
                self._args.tsspredator_path)
        self.check_parameter([self._args.tex_notex_libs,
                              self._args.condition_names],
                             ["--tex_notex_lib",
                              "--condition_names"])
        if self._args.program.lower() == "tss":
            print("Running optimization of TSS prediction")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.program.lower() == "ps":
            print("Running optimization of processing site prediction")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error: No such program!")
            sys.exit()
        args_ops = self.args_container.container_optimize(
            self._args.tsspredator_path, self._args.fasta_files,
            self._args.annotation_files,
            self._args.manual_files, out_folder, self._args.max_height,
            self._args.max_height_reduction, self._args.max_factor,
            self._args.max_factor_reduction, self._args.max_base_height,
            self._args.max_enrichment_factor, self._args.max_processing_factor,
            self._args.utr_length, self._args.tex_notex_libs,
            self._args.condition_names, self._args.cluster,
            self._args.curated_sequence_length, self._args.parallels,
            self._args.program, self._args.replicate_tex,
            self._args.steps)
        optimize_tss(args_ops)

    def color(self):
        """color the screenshots"""
        print("Running png files coloring")
        self.check_parameter([self._args.track_number], ["--track_numer"])
        self.check_folder([self._args.screenshot_folder], ["--screenshot_folder"])
        self._args.imagemagick_covert_path = self.check_execute_file(
                self._args.imagemagick_covert_path)
        color = ColorPNG()
        color.generate_color_png(
                self._args.track_number, self._args.screenshot_folder,
                self._args.imagemagick_covert_path)

    def terminator(self):
        """Run TransTermHP and Gene converaged for detecting terminators"""
        print("Running terminator prediction")
        if self._args.transterm_path is None:
            print("Please assign the path of transterm in TransTermHP.")
        self.check_multi_files(
                [self._args.fasta_files, self._args.annotation_files,
                 self._args.transcript_files, self._args.srna_files],
                ["--fasta_files", "--annotation_files",
                 "--transcript_files", "--srna_files"])
        for prop in ("transterm_path", "expterm_path", "rnafold_path"):
            setattr(self._args, prop,
                    self.check_execute_file(getattr(self._args, prop)))
        project_creator.create_subfolders(
            self._paths.required_folders("terminator"))
        args_term = self.args_container.container_terminator(
            self._args.transterm_path, self._args.expterm_path,
            self._args.rnafold_path,
            self._paths.transterm_folder, self._args.fasta_files,
            self._args.annotation_files, self._args.transcript_files,
            self._args.srna_files, self._args.decrease,
            self._args.highest_coverage, self._args.tolerance_detect_coverage,
            self._args.tolerance_within_transcript,
            self._args.tolerance_downstream_transcript,
            self._args.tolerance_within_gene,
            self._args.tolerance_downstream_gene, self._paths.transtermhp_folder,
            self._args.tex_notex_libs, self._args.frag_libs,
            self._args.tex_notex, self._args.replicate_tex,
            self._args.replicate_frag, self._args.table_best,
            self._args.min_loop_length, self._args.max_loop_length,
            self._args.min_stem_length, self._args.max_stem_length,
            self._args.min_u_tail, self._args.miss_rate,
            self._args.mutation_u_tail, self._args.keep_multi_term,
            self._args.window_size, self._args.window_shift)
        terminator = Terminator(args_term)
        terminator.run_terminator(args_term)

    def transcript(self):
        """Run Transcript detection"""
        print("Running transcript detection")
        self.check_multi_files(
                [self._args.annotation_files, self._args.tss_files,
                 self._args.terminator_files],
                ["--annotation_files", "--tss_files", "--terminator_files"])
        project_creator.create_subfolders(
            self._paths.required_folders("transcript"))
        args_tran = self.args_container.container_transcript(
            self._args.tex_notex, self._args.modify_transcript,
            self._args.length, self._args.annotation_files,
            self._args.height, self._args.width,
            self._args.tolerance, self._args.tolerance_coverage,
            self._args.replicate_tex, self._args.replicate_frag,
            self._paths.transcript_output_folder,
            self._args.tss_files, self._args.tss_tolerance,
            self._args.tex_notex_libs, self._args.frag_libs,
            self._args.compare_feature_genome, self._args.table_best,
            self._args.terminator_files, self._args.terminator_tolerance,
            self._args.max_length_distribution)
        transcript = TranscriptDetection(args_tran)
        transcript.run_transcript(args_tran)

    def utr_detection(self):
        """Run UTR detection."""
        print("Running UTR detection")
        self.check_multi_files(
            [self._args.annotation_files, self._args.terminator_files,
             self._args.transcript_files, self._args.tss_files],
            ["--annotation_files", "--terminator_files",
             "--transcript_files", "--tss_files"])
        project_creator.create_subfolders(self._paths.required_folders("utr"))
        args_utr = self.args_container.container_utr(
                self._args.tss_files, self._args.annotation_files,
                self._args.transcript_files, self._args.terminator_files,
                self._args.terminator_tolerance, self._paths.utr_folder,
                self._args.tss_source, self._args.base_5utr,
                self._args.utr_length, self._args.base_3utr,
                self._args.tolerance_3utr, self._args.tolerance_5utr)
        utr = UTRDetection(args_utr)
        utr.run_utr_detection(args_utr)

    def _check_filter_input(self, files, info, filters):
        if files is None:
            print("Error: The {0} has to be provided "
                  "if \"{1}\" in --filter_info!".format(info, filters))
            sys.exit()

    def _check_database(self, database, flag, info):
        wrong = False
        if database is None:
            wrong = True
        elif not os.path.isfile(database):
            if (os.path.isfile(database + ".fa")) or (
                    os.path.isfile(database + ".fna")) or (
                    os.path.isfile(database + ".fasta")):
                wrong = False
            else:
                wrong = True
        if wrong:
            print("Error: {0} is required if {1} is in --filter_info. "
                  "But the assignment of {0} is empty or wrong. "
                  "Please check the {0} or remove {1} from "
                  "--filter_info!".format(flag, info))
            sys.exit()

    def srna_detection(self):
        """sRNA_detection."""
        print("Running sRNA prediction")
        self.check_multi_files(
                [self._args.annotation_files, self._args.transcript_files,
                 self._args.fasta_files, self._args.sorf_files,
                 self._args.terminator_files, self._args.promoter_tables,
                 self._args.processing_site_files],
                ["--annotation_files", "--transcript_files",
                 "--fasta_files", "--sorf_files", "--terminator_files",
                 "--promoter_tables", "--processing_site_files"])
        for info in self._args.filter_info:
            if "sec_str" == info:
                if not self._args.compute_sec_structures:
                    print("Error: --compute_sec_structures is not switch on, "
                          "but sec_str is still in --filter_info.")
                    sys.exit()
                self._check_filter_input(
                        self._args.fasta_files, "fasta file", "sec_str")
                for prop in ("rnafold_path", "relplot_path",
                             "mountain_path"):
                    setattr(self._args, prop,
                            self.check_execute_file(getattr(self._args, prop)))
            elif ("blast_nr" == info) or (
                    "blast_srna"== info):
                for prop in ("blastn_path", "blastx_path", "makeblastdb_path"):
                    setattr(self._args, prop,
                            self.check_execute_file(getattr(self._args, prop)))
                if ("blast_nr" == info):
                    self._check_database(self._args.nr_database_path,
                                         "--nr_database_path", "blast_nr")
                if ("blast_srna" == info):
                    self._check_database(self._args.srna_database_path,
                                         "--srna_database_path", "blast_srna")
            elif "sorf" == info:
                self._check_filter_input(
                        self._args.sorf_files, "sORF", "sorf")
            elif "term" == info:
                self._check_filter_input(self._args.terminator_files,
                                         "terminator", "term")
            elif "promoter" == info:
                self._check_filter_input(self._args.promoter_tables,
                                         "Promoter", "promoter")
            elif "tss" == info:
                self._check_filter_input(self._args.tss_files,
                                         "TSS", "tss")
            else:
                if "none" != info.lower():
                    print("Error: Please check the --filter_info, "
                          "invalid value was assigned!")
                    sys.exit()
        if self._args.utr_derived_srna:
            if self._args.tss_files is None:
                print("Error: The TSS has to be provided "
                      "if you want to compute UTR-derived sRNA!")
                sys.exit()
        if self._args.search_poly_u != 0:
            if self._args.fasta_files is None:
                print("Error: The fasta files have to be provided "
                      "if you want to extend 3'end of sRNA by "
                      "searching poly U tail!")
                sys.exit()
        project_creator.create_subfolders(self._paths.required_folders("srna"))
        args_srna = self.args_container.container_srna(
                self._args.rnafold_path, self._args.relplot_path,
                self._args.mountain_path, self._args.blastn_path,
                self._args.blastx_path, self._args.makeblastdb_path,
                self._paths.srna_folder, self._args.utr_derived_srna,
                self._args.annotation_files, self._args.tss_files,
                self._args.transcript_files,
                self._args.tss_intergenic_antisense_tolerance,
                self._args.tss_5utr_tolerance, self._args.tss_3utr_tolerance,
                self._args.tss_intercds_tolerance, self._args.filter_info,
                self._args.processing_site_files, self._args.fasta_files,
                self._args.mountain_plot, self._args.nr_format,
                self._args.srna_format, self._args.srna_database_path,
                self._args.nr_database_path, self._args.cutoff_energy,
                self._args.parallel_blast,
                self._args.min_intergenic_tex_coverage,
                self._args.min_intergenic_notex_coverage,
                self._args.min_intergenic_fragmented_coverage,
                self._args.min_complete_5utr_transcript_coverage,
                self._args.min_antisense_tex_coverage,
                self._args.min_antisense_notex_coverage,
                self._args.min_antisense_fragmented_coverage,
                self._args.min_utr_tex_coverage,
                self._args.min_utr_notex_coverage,
                self._args.min_utr_fragmented_coverage,
                self._args.max_length, self._args.min_length,
                self._args.tex_notex_libs, self._args.frag_libs,
                self._args.replicate_tex, self._args.replicate_frag,
                self._args.tex_notex, self._args.blast_e_nr,
                self._args.blast_e_srna, self._args.detect_srna_in_cds,
                self._args.table_best, self._args.decrease_intergenic_antisense,
                self._args.decrease_utr, self._args.tolerance_intergenic_antisense,
                self._args.tolerance_utr, self._args.cutoff_nr_hit,
                self._args.sorf_files, self._args.overlap_percent_cds,
                self._args.terminator_files,
                self._args.terminator_tolerance_in_srna,
                self._args.terminator_tolerance_out_srna,
                self._args.ignore_hypothetical_protein, self._args.tss_source,
                self._args.min_all_utr_coverage, self._args.promoter_tables,
                self._args.ranking_time_promoter, self._args.promoter_names,
                self._args.compute_sec_structures, self._args.search_poly_u,
                self._args.min_u_poly_u, self._args.mutation_poly_u)
        srna = sRNADetection(args_srna)
        srna.run_srna_detection(args_srna)

    def sorf_detection(self):
        """sORF_detection."""
        print("Running sORF prediction")
        self.check_multi_files(
                [self._args.transcript_files, self._args.annotation_files,
                 self._args.fasta_files, self._args.srna_files,
                 self._args.tss_files],
                ["--transcript_files", "--annotation_files",
                 "--fasta_files", "--srna_files", "--tss_files"])
        project_creator.create_subfolders(
            self._paths.required_folders("sorf"))
        args_sorf = self.args_container.container_sorf(
            self._paths.sorf_folder, self._args.utr_derived_sorf,
            self._args.transcript_files,
            self._args.annotation_files,
            self._args.tss_files, self._args.utr_length,
            self._args.min_length, self._args.max_length,
            self._args.cutoff_intergenic_coverage,
            self._args.cutoff_antisense_coverage,
            self._args.cutoff_5utr_coverage,
            self._args.cutoff_3utr_coverage,
            self._args.cutoff_intercds_coverage,
            self._args.fasta_files, self._args.tex_notex_libs,
            self._args.frag_libs, self._args.tex_notex,
            self._args.replicate_tex, self._args.replicate_frag,
            self._args.table_best, self._args.srna_files,
            self._args.start_codon, self._args.stop_codon,
            self._args.cutoff_base_coverage, self._args.tolerance_rbs,
            self._args.rbs_not_after_tss, self._args.print_all_combination,
            self._args.best_no_srna, self._args.best_no_tss,
            self._args.ignore_hypothetical_protein,
            self._args.min_rbs_distance, self._args.max_rbs_distance,
            self._args.tolerance_3end, self._args.tolerance_5end)
        sorf = sORFDetection(args_sorf)
        sorf.run_sorf_detection(args_sorf)

    def meme(self):
        """promoter detectopn"""
        print("Running promoter detection")
        self.check_multi_files(
                [self._args.tss_files, self._args.fasta_files],
                ["--tss_files", "--fasta_files"])
        if not self._args.tss_source:
            self.check_multi_files([self._args.annotation_files],
                                   ["--annotation_files"])
        if (self._args.program == "both") or (
                self._args.program == "meme"):
            self._args.meme_path = self.check_execute_file(self._args.meme_path)
        elif (self._args.program == "both") or (
                self._args.program == "glam2"):
            self._args.glam2_path = self.check_execute_file(self._args.glam2_path)
        project_creator.create_subfolders(
            self._paths.required_folders("promoter"))
        args_pro = self.args_container.container_promoter(
            self._args.meme_path, self._args.glam2_path,
            self._paths.promoter_output_folder, self._args.tex_libs,
            self._args.tss_files, self._args.fasta_files,
            self._args.num_motifs, self._args.nt_before_tss,
            self._args.motif_width, self._args.tss_source,
            self._args.annotation_files, self._args.end_run,
            self._args.combine_all, self._args.e_value,
            self._args.parallels, self._args.program)
        meme = MEME(args_pro)
        meme.run_meme(args_pro)

    def operon(self):
        """operon detection"""
        print("Running operon detection")
        self.check_multi_files(
                [self._args.tss_files, self._args.annotation_files,
                 self._args.transcript_files, self._args.utr5_files,
                 self._args.utr3_files, self._args.terminator_files],
                ["--tss_files", "--annotation_files",
                 "--transcript_files", "--utr5_files",
                 "--utr3_files", "--terminator_files"])
        project_creator.create_subfolders(
            self._paths.required_folders("operon"))
        args_op = self.args_container.container_operon(
            self._args.tss_files, self._args.annotation_files,
            self._args.transcript_files, self._args.utr5_files,
            self._args.utr3_files, self._args.terminator_files,
            self._args.tss_tolerance, self._args.terminator_tolerance,
            self._args.min_length, self._paths.operon_output_folder,
            self._paths.operon_statistics_folder)
        operon = OperonDetection(args_op)
        operon.run_operon(args_op)

    def circrna(self):
        """circRNA detection"""
        print("Running circular RNA prediction")
        if self._args.read_files:
            self._args.segemehl_path = self.check_execute_file(
                    self._args.segemehl_path)
        for prop in ("testrealign_path", "samtools_path"):
            setattr(self._args, prop,
                    self.check_execute_file(getattr(self._args, prop)))
        self.check_multi_files(
                [self._args.fasta_files, self._args.annotation_files],
                ["--fasta_files", "--annotation_files"])
        project_creator.create_subfolders(
            self._paths.required_folders("circrna"))
        args_circ = self.args_container.container_circrna(
            self._args.parallels, self._args.fasta_files,
            self._args.annotation_files, self._args.bam_files,
            self._args.read_files, self._paths.circrna_stat_folder,
            self._args.support_reads, self._args.segemehl_path,
            self._args.testrealign_path, self._args.samtools_path,
            self._args.start_ratio, self._args.end_ratio,
            self._args.ignore_hypothetical_protein,
            self._paths.circrna_output_folder)
        circ = CircRNADetection(args_circ)
        circ.run_circrna(args_circ)

    def goterm(self):
        """Go term discovery"""
        print("Running GO term mapping")
        self.check_multi_files(
                [self._args.annotation_files, self._args.transcript_files],
                ["--annotation_files", "--transcript_files"])
        self.check_file([self._args.uniprot_id, self._args.go_obo,
                         self._args.goslim_obo],
                        ["--uniprot_id", "--go.obo", "--goslim_obo"], True)
        project_creator.create_subfolders(
            self._paths.required_folders("go_term"))
        args_go = self.args_container.container_goterm(
            self._args.annotation_files,
            self._paths.goterm_output_folder, self._args.uniprot_id,
            self._args.go_obo, self._args.goslim_obo,
            self._args.transcript_files)
        goterm = GoTermFinding(args_go)
        goterm.run_go_term(args_go)

    def srna_target(self):
        """sRNA target prediction"""
        print("Running sRNA target prediction")
        self.check_multi_files(
                [self._args.fasta_files, self._args.srna_files,
                 self._args.annotation_files],
                ["--fasta_files", "--srna_files",
                 "--annotation_files"])
        if "RNAup" in self._args.program:
            self._args.rnaup_path = self.check_execute_file(
                    self._args.rnaup_path)
        if "RNAplex" in self._args.program:
            for prop in ("rnaplfold_path", "rnaplex_path"):
                setattr(self._args, prop,
                        self.check_execute_file(getattr(self._args, prop)))
        if "IntaRNA" in self._args.program:
            self._args.intarna_path = self.check_execute_file(
                    self._args.intarna_path)
            if self._args.mode_intarna is None:
                print("Error: --mode_IntaRNA need to be assigned!")
                sys.exit()
        project_creator.create_subfolders(
            self._paths.required_folders("srna_target"))
        args_tar = self.args_container.container_srna_target(
            self._args.rnaplfold_path, self._args.rnaplex_path,
            self._args.rnaup_path, self._args.intarna_path,
            self._args.annotation_files,
            self._args.fasta_files, self._args.srna_files,
            self._args.query_srnas, self._args.program,
            self._args.interaction_length,
            self._args.window_size_target_rnaplex,
            self._args.span_target_rnaplex,
            self._args.window_size_srna_rnaplfold,
            self._args.span_srna_rnaplfold,
            self._args.unstructured_region_rnaplex_target,
            self._args.unstructured_region_rnaplex_srna,
            self._args.unstructured_region_rnaup,
            self._args.energy_threshold_rnaplex,
            self._args.duplex_distance_rnaplex, self._args.top,
            self._paths.starget_output_folder, self._args.parallels_rnaplex,
            self._args.parallels_rnaup, self._args.parallels_intarna,
            self._args.continue_rnaup,
            self._args.slide_window_size_srna_intarna,
            self._args.max_loop_length_srna_intarna,
            self._args.slide_window_size_target_intarna,
            self._args.max_loop_length_target_intarna,
            self._args.mode_intarna, self._args.potential_target_start,
            self._args.potential_target_end, self._args.target_feature)
        srnatarget = sRNATargetPrediction(args_tar)
        srnatarget.run_srna_target_prediction(args_tar)

    def snp(self):
        """SNP transcript detection"""
        print("Running SNP/mutations calling")
        self.check_multi_files(
                [self._args.fasta_files],
                ["--fasta_files"])
        if (self._args.bam_type != "related_genome") and (
                self._args.bam_type != "reference_genome"):
            print("Error: Please assign \"related_genome\" or"
                  " \"reference_genome\" to --bam_type!")
            sys.exit()
        if (self._args.ploidy != "haploid") and (
                self._args.ploidy != "diploid"):
            print("Error: Please assign \"haploid\" or"
                  " \"diploid\" to --chromosome_type!")
        if (self._args.caller != "c") and (
                self._args.caller != "m"):
            print("Error: Please assign \"c\" or"
                  " \"m\" to --caller!")
        for prop in ("bcftools_path", "samtools_path"):
            setattr(self._args, prop,
                    self.check_execute_file(getattr(self._args, prop)))
        project_creator.create_subfolders(self._paths.required_folders("snp"))
        args_snp = self.args_container.container_snp(
            self._args.samtools_path, self._args.bcftools_path,
            self._args.bam_type,
            self._args.program, self._args.fasta_files,
            self._args.bam_files,
            self._args.quality, self._args.read_depth_range,
            self._paths.snp_output_folder, self._args.indel_fraction,
            self._args.ploidy, self._args.rg_tag, self._args.caller,
            self._args.filter_tag_info, self._args.dp4_cutoff)
        snp = SNPCalling(args_snp)
        snp.run_snp_calling(args_snp)

    def ppi(self):
        """PPI network retrieve"""
        print("Running protein-protein interaction networks prediction")
        self.check_multi_files([self._args.annotation_files],
                               ["--annotation_files"])
        self.check_parameter([self._args.query_strains,
                              self._args.species_string],
                             ["--query_strains", "--species_string"])
        project_creator.create_subfolders(
            self._paths.required_folders("ppi_network"))
        args_ppi = self.args_container.container_ppi(
            self._args.annotation_files, self._args.query_strains,
            self._args.without_strain_pubmed, self._args.species_string,
            self._args.score, self._paths.ppi_output_folder,
            self._args.node_size, self._args.query)
        ppi = PPINetwork(self._paths.ppi_output_folder)
        ppi.retrieve_ppi_network(args_ppi)

    def sublocal(self):
        """Subcellular Localization prediction"""
        print("Running subcellular localization prediction")
        self.check_multi_files(
                [self._args.annotation_files, self._args.fasta_files,
                 self._args.transcript_files],
                ["--annotation_files", "--fasta_files",
                 "--transcript_files"])
        if (self._args.bacteria_type != "positive") and (
                self._args.bacteria_type != "negative"):
            print("Error: Please assign \"positive\" or"
                  " \"negative\" to --bacteria_type!")
            sys.exit()
        self._args.psortb_path = self.check_execute_file(self._args.psortb_path)
        project_creator.create_subfolders(
            self._paths.required_folders("subcellular_localization"))
        args_sub = self.args_container.container_sublocal(
            self._args.psortb_path, self._args.annotation_files,
            self._args.fasta_files, self._args.bacteria_type,
            self._args.difference_multi,
            self._paths.sublocal_output_folder, self._args.transcript_files)
        sublocal = SubLocal(args_sub)
        sublocal.run_sub_local(args_sub)

    def ribos(self):
        """riboswitch and RNA thermometer prediction"""
        print("Running riboswitch and RNA thermometer prediction")
        self.check_multi_files(
                [self._args.annotation_files, self._args.fasta_files,
                 self._args.tss_files, self._args.transcript_files],
                ["--annotation_files", "--fasta_files", "--tss_files",
                 "--transcript_files"])
        if (self._args.program == "both"):
            self.check_file([self._args.riboswitch_id_file, self._args.rfam_path],
                            ["--riboswitch_id_file", "--rfam_path"], True)
            self.check_file([self._args.rna_thermometer_id_file,
                             self._args.rfam_path],
                            ["--rna_thermometer_id_file", "--rfam_path"], True)
            project_creator.create_subfolders(
                    self._paths.required_folders("riboswitch"))
            project_creator.create_subfolders(
                    self._paths.required_folders("thermometer"))
            ribos_path = self._paths.ribos_output_folder
            thermo_path = self._paths.thermo_output_folder
        elif (self._args.program == "thermometer"):
            self.check_file([self._args.rna_thermometer_id_file,
                             self._args.rfam_path],
                            ["--thermometer_id_file", "--rfam_path"], True)
            project_creator.create_subfolders(
                    self._paths.required_folders("thermometer"))
            ribos_path = None
            thermo_path = self._paths.thermo_output_folder
        elif (self._args.program == "riboswitch"):
            self.check_file([self._args.riboswitch_id_file, self._args.rfam_path],
                            ["--riboswitch_id_file", "--rfam_path"], True)
            project_creator.create_subfolders(
                    self._paths.required_folders("riboswitch"))
            ribos_path = self._paths.ribos_output_folder
            thermo_path = None
        else:
            print("Error: Please assign \"thermometer\", \"riboswitch\" "
                  "or \"both\" in --program!")
            sys.exit()
        self._args.cmscan_path = self.check_execute_file(self._args.cmscan_path)
        self._args.cmpress_path = self.check_execute_file(self._args.cmpress_path)
        args_ribo = self.args_container.container_ribos(
            self._args.program, self._args.rna_thermometer_id_file,
            self._args.cmscan_path, self._args.cmpress_path,
            self._args.riboswitch_id_file,
            self._args.annotation_files, self._args.fasta_files,
            self._args.tss_files, self._args.transcript_files,
            self._args.rfam_path, ribos_path,
            thermo_path, self._args.e_value,
            self._args.output_all, self._paths.database_folder,
            self._args.tolerance,
            self._args.tolerance_rbs, self._args.utr_length)
        ribos = Ribos(args_ribo)
        ribos.run_ribos(args_ribo)

    def crispr(self):
        """CRISPR prediction"""
        print("Running CRISPR prediction")
        self.check_multi_files(
                [self._args.fasta_files, self._args.annotation_files],
                ["--fasta_files", "--annotation_files"])
        self._args.crt_path = self.check_execute_file(self._args.crt_path)
        project_creator.create_subfolders(
            self._paths.required_folders("crispr"))
        args_cris = self.args_container.container_cris(
            self._args.fasta_files, self._args.annotation_files,
            self._args.crt_path, self._args.window_size,
            self._args.min_number_repeats, self._args.min_length_repeat,
            self._args.Max_length_repeat, self._args.min_length_spacer,
            self._args.Max_length_spacer, self._paths.crispr_output_folder,
            self._args.ignore_hypothetical_protein)
        cris = Crispr(args_cris)
        cris.run_crispr(args_cris)

    def merge(self):
        """Merge all features"""
        print("Merging all features to one gff file")
        merge_folder = os.path.join(self._paths.output_folder,
                                    "merge_all_features")
        self.helper.check_make_folder(merge_folder)
        other_features = self._args.other_features_files
        self.check_file([self._args.transcript_file] + other_features,
                        ["--transcript_file", "--other_features_files"],
                        False)
        self.check_parameter([self._args.output_prefix], ["--output_prefix"])
        run_merge(merge_folder, self._args.transcript_file,
                  self._args.other_features_files,
                  self._args.terminator_tolerance, self._args.tss_tolerance,
                  os.path.join(merge_folder, self._args.output_prefix))

    def screen(self):
        """generate screenshot"""
        print("Running screenshot generation")
        self.check_file([self._args.main_gff, self._args.fasta_file],
                        ["--main_gff", "--fasta_file"], True)
        if self._args.side_gffs is not None:
            for gff in (self._args.side_gffs):
                gff = gff.strip()
                if not os.path.isfile(gff):
                    print("Error: The --side_gffs do not exist!")
                    sys.exit()
        if self._args.output_folder is None:
            print("Error: Please assign --output_folder!")
            sys.exit()
        if (self._args.present != "expand") and (
                self._args.present != "collapse") and (
                self._args.present != "squish"):
            print("Error: Please assign \"expand\" or "
                  "\"collapse\" or \"squish\" to --present!")
            sys.exit()
        args_sc = self.args_container.container_screen(
            self._args.main_gff, self._args.side_gffs,
            self._args.fasta_file, self._args.height,
            self._args.tex_notex_libs, self._args.frag_libs,
            self._args.present, self._args.output_folder)
        screen = Screen(args_sc)
        screen.screenshot(args_sc)
示例#8
0
class sRNADetection(object):
    '''detection of sRNA'''
    def __init__(self, args_srna):
        self.args_container = ArgsContainer()
        self.helper = Helper()
        self.multiparser = Multiparser()
        self.gff_output = os.path.join(args_srna.out_folder, "gffs")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.tss_path = self._check_folder_exist(args_srna.tss_folder)
        self.pro_path = self._check_folder_exist(args_srna.pro_folder)
        self.sorf_path = self._check_folder_exist(args_srna.sorf_file)
        self.fasta_path = os.path.join(args_srna.fastas, "tmp")
        self.tran_path = os.path.join(args_srna.trans, "tmp")
        self.term_path = self._check_folder_exist(args_srna.terms)
        self.merge_wigs = os.path.join(args_srna.out_folder, "merge_wigs")
        self.prefixs = {
            "merge": os.path.join(args_srna.out_folder, "tmp_merge"),
            "utr": os.path.join(args_srna.out_folder, "tmp_utrsrna"),
            "normal": os.path.join(args_srna.out_folder, "tmp_normal"),
            "in_cds": os.path.join(args_srna.out_folder, "tmp_incds"),
            "merge_table": os.path.join(args_srna.out_folder,
                                        "tmp_merge_table"),
            "utr_table": os.path.join(args_srna.out_folder,
                                      "tmp_utrsrna_table"),
            "normal_table": os.path.join(args_srna.out_folder,
                                         "tmp_normal_table"),
            "in_cds_table": os.path.join(args_srna.out_folder,
                                         "tmp_incds_table"),
            "basic": os.path.join(args_srna.out_folder, "tmp_basic"),
            "energy": os.path.join(args_srna.out_folder, "tmp_energy")
        }
        self.tmps = {
            "nr": os.path.join(args_srna.out_folder, "tmp_nr"),
            "srna": os.path.join(args_srna.out_folder, "tmp_sRNA")
        }
        self.best_table = os.path.join(self.table_output, "best")
        self.table_output = os.path.join(args_srna.out_folder, "tables")
        self.stat_path = os.path.join(args_srna.out_folder, "statistics")
        self.all_best = {
            "all_gff": os.path.join(self.gff_output, "all_candidates"),
            "best_gff": os.path.join(self.gff_output, "best"),
            "all_table": os.path.join(self.table_output, "all_candidates"),
            "best_table": os.path.join(self.table_output, "best")
        }

    def _check_folder_exist(self, folder):
        if folder is not None:
            path = os.path.join(folder, "tmp")
        else:
            path = None
        return path

    def _check_gff(self, gffs):
        for gff in os.listdir(gffs):
            if gff.endswith(".gff"):
                self.helper.check_uni_attributes(os.path.join(gffs, gff))

    def _run_format(self, blast_path, database, type_, db_file, err):
        call([
            os.path.join(blast_path, "makeblastdb"), "-in", database,
            "-dbtype", type_, "-out", db_file
        ],
             stderr=err)

    def _formatdb(self, database, type_, out_folder, blast_path,
                  database_type):
        err = open(os.path.join(out_folder, "log.txt"), "w")
        if (database.endswith(".fa")) or (database.endswith(".fna")) or (
                database.endswith(".fasta")):
            pass
        else:
            folders = database.split("/")
            filename = folders[-1]
            folder = "/".join(folders[:-1])
            for fasta in os.listdir(folder):
                if (fasta.endswith(".fa")) or (fasta.endswith(".fna")) or (
                        fasta.endswith(".fasta")):
                    if ".".join(fasta.split(".")[:-1]) == filename:
                        database = os.path.join(folder, fasta)
        if database_type == "sRNA":
            change_format(database, "tmp_srna_database")
            os.remove(database)
            shutil.move("tmp_srna_database", database)
        db_file = ".".join(database.split(".")[:-1])
        self._run_format(blast_path, database, type_, db_file, err)
        err.close()

    def _merge_frag_tex_file(self, files, args_srna):
        '''merge the results of fragmented and tex treated libs'''
        if (args_srna.frag_wigs is not None) and (args_srna.tex_wigs
                                                  is not None):
            self.helper.merge_file(files["frag_gff"], files["tex_gff"])
            self.helper.merge_file(files["frag_csv"], files["tex_csv"])
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])
            os.remove(files["frag_csv"])
            os.remove(files["frag_gff"])
            os.remove(files["tex_gff"])
        elif (args_srna.frag_wigs is not None):
            shutil.move(files["frag_csv"], files["merge_csv"])
            self.helper.sort_gff(files["frag_gff"], files["merge_gff"])
            os.remove(files["frag_gff"])
        elif (args_srna.tex_wigs is not None):
            shutil.move(files["tex_csv"], files["merge_csv"])
            self.helper.sort_gff(files["tex_gff"], files["merge_gff"])

    def _read_lib_wig(self, args_srna):
        libs, texs = read_libs(args_srna.input_libs, args_srna.wig_folder)
        wigs_f = read_wig(args_srna.wig_f_file, "+", libs)
        wigs_r = read_wig(args_srna.wig_r_file, "-", libs)
        return [libs, texs, wigs_f, wigs_r]

    def _run_normal(self, prefix, gff, tran, fuzzy_tss, args_srna):
        '''detection of intergenic and antisense sRNA'''
        tex_datas = None
        frag_datas = None
        if "tmp_cutoff_inter" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_cutoff_inter"))
        files = {
            "frag_gff": None,
            "frag_csv": None,
            "tex_gff": None,
            "tex_csv": None,
            "merge_gff": None,
            "merge_csv": None
        }
        if self.tss_path is not None:
            tss = self.helper.get_correct_file(self.tss_path, "_TSS.gff",
                                               prefix, None, None)
        else:
            tss = None
        if self.pro_path is not None:
            pro = self.helper.get_correct_file(self.pro_path,
                                               "_processing.gff", prefix, None,
                                               None)
        else:
            pro = None
        if args_srna.frag_wigs is not None:
            files["frag_gff"] = os.path.join(args_srna.out_folder,
                                             "_".join(["tmp_frag", prefix]))
            files["frag_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_frag_table", prefix]))
            args_srna = self.args_container.container_intersrna(
                "frag", files, args_srna, prefix,
                os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss)
            frag_datas = self._read_lib_wig(args_srna)
            intergenic_srna(args_srna, frag_datas[0], frag_datas[1],
                            frag_datas[2], frag_datas[3])
        if args_srna.tex_wigs is not None:
            files["tex_gff"] = os.path.join(args_srna.out_folder,
                                            "_".join(["tmp_tex", prefix]))
            files["tex_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_tex_table", prefix]))
            args_srna = self.args_container.container_intersrna(
                "tex", files, args_srna, prefix,
                os.path.join(args_srna.gffs, gff), tran, tss, pro, fuzzy_tss)
            tex_datas = self._read_lib_wig(args_srna)
            intergenic_srna(args_srna, tex_datas[0], tex_datas[1],
                            tex_datas[2], tex_datas[3])
        files["merge_csv"] = "_".join([self.prefixs["normal_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["normal"], prefix])
        self._merge_frag_tex_file(files, args_srna)
        if ("TSS_class" in os.listdir(
                args_srna.out_folder)) and (not args_srna.tss_source):
            tss = os.path.join(args_srna.out_folder, "TSS_class",
                               prefix + "_TSS.gff")
        return tss, frag_datas, tex_datas

    def _run_utrsrna(self, gff, tran, prefix, tss, pro, args_srna, frag_datas,
                     tex_datas):
        '''detection of UTR-derived sRNA'''
        if "tmp_median" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_median"))
        files = {
            "frag_gff": None,
            "frag_csv": None,
            "tex_gff": None,
            "tex_csv": None,
            "merge_gff": None,
            "merge_csv": None
        }
        if args_srna.tex_wigs is not None:
            files["tex_gff"] = os.path.join(args_srna.out_folder,
                                            "_".join(["tmp_utr_tex", prefix]))
            files["tex_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_tex_table", prefix]))
            args_srna = self.args_container.container_utrsrna(
                os.path.join(args_srna.gffs, gff), tran, tss, files, pro,
                os.path.join(self.fasta_path, prefix + ".fa"), "tex", prefix,
                args_srna)
            utr_derived_srna(args_srna, tex_datas[0], tex_datas[1],
                             tex_datas[2], tex_datas[3])
        if args_srna.frag_wigs is not None:
            files["frag_gff"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag", prefix]))
            files["frag_csv"] = os.path.join(
                args_srna.out_folder, "_".join(["tmp_utr_frag_table", prefix]))
            args_srna = self.args_container.container_utrsrna(
                os.path.join(args_srna.gffs, gff), tran, tss, files, pro,
                os.path.join(self.fasta_path, prefix + ".fa"), "frag", prefix,
                args_srna)
            utr_derived_srna(args_srna, frag_datas[0], frag_datas[1],
                             frag_datas[2], frag_datas[3])
        files["merge_csv"] = "_".join([self.prefixs["utr_table"], prefix])
        files["merge_gff"] = "_".join([self.prefixs["utr"], prefix])
        self._merge_frag_tex_file(files, args_srna)
        filter_utr(files["merge_gff"], files["merge_csv"], args_srna.min_utr)

    def _check_necessary_file(self, args_srna):
        if (args_srna.gffs is None) or (args_srna.trans is None) or (
            (args_srna.tex_wigs is None) and (args_srna.frag_wigs is None)):
            print("Error: lack required files!!!!")
            sys.exit()
        if args_srna.utr_srna:
            if (args_srna.tss_folder is None):
                print("Error: lack required TSS files for UTR "
                      "derived sRNA detection!!!!")
                sys.exit()
            if (args_srna.pro_folder is None):
                print("Warning: lack Processing site files for UTR "
                      "derived sRNA detection!!!")
                print("it may effect the results!!!!")
        self._check_gff(args_srna.gffs)
        self._check_gff(args_srna.trans)
        if args_srna.tss_folder is not None:
            self._check_gff(args_srna.tss_folder)
            self.multiparser.parser_gff(args_srna.tss_folder, "TSS")
            self.multiparser.combine_gff(args_srna.gffs, self.tss_path, None,
                                         "TSS")
        if args_srna.pro_folder is not None:
            self._check_gff(args_srna.pro_folder)
            self.multiparser.parser_gff(args_srna.pro_folder, "processing")
            self.multiparser.combine_gff(args_srna.gffs, self.pro_path, None,
                                         "processing")
        if args_srna.sorf_file is not None:
            self._check_gff(args_srna.sorf_file)
            self.multiparser.parser_gff(args_srna.sorf_file, "sORF")
            self.multiparser.combine_gff(args_srna.gffs, self.sorf_path, None,
                                         "sORF")
        if args_srna.import_info is not None:
            if args_srna.utr_srna or ("sec_str" in args_srna.import_info) or (
                    args_srna.nr_database
                    is not None) or (args_srna.srna_database is not None):
                if args_srna.fastas is None:
                    print("Error: lack required fasta files for UTR "
                          "derived sRNA detection!!!!")
                    sys.exit()
                self.multiparser.parser_fasta(args_srna.fastas)
                self.multiparser.combine_fasta(args_srna.gffs, self.fasta_path,
                                               None)
        if args_srna.terms is not None:
            self._check_gff(args_srna.terms)
            self.multiparser.parser_gff(args_srna.terms, "term")
            self.multiparser.combine_gff(args_srna.gffs, self.term_path, None,
                                         "term")
        else:
            self.term_path = None

    def _merge_tex_frag_datas(self, tex_datas, frag_datas):
        if (tex_datas is not None) and (frag_datas is not None):
            for index in [2, 3]:
                for strain, conds in frag_datas[index].items():
                    if strain not in tex_datas[index].keys():
                        tex_datas[index][strain] = conds
                    else:
                        for cond, tracks in conds.items():
                            tex_datas[index][strain][cond] = tracks
        elif (tex_datas is None) and (frag_datas is not None):
            tex_datas = frag_datas
        return tex_datas

    def _run_program(self, args_srna):
        prefixs = []
        tss = None
        for gff in os.listdir(args_srna.gffs):
            if gff.endswith(".gff"):
                prefix = gff.replace(".gff", "")
                prefixs.append(prefix)
                print("Running sRNA detection of {0}....".format(prefix))
                tran = self.helper.get_correct_file(self.tran_path,
                                                    "_transcript.gff", prefix,
                                                    None, None)
                gffs = {
                    "merge": "_".join([self.prefixs["merge"], prefix]),
                    "utr": "_".join([self.prefixs["utr"], prefix]),
                    "normal": "_".join([self.prefixs["normal"], prefix])
                }
                csvs = {
                    "merge": "_".join([self.prefixs["merge_table"], prefix]),
                    "utr": "_".join([self.prefixs["utr_table"], prefix]),
                    "normal": "_".join([self.prefixs["normal_table"], prefix])
                }
                tss, frag_datas, tex_datas = self._run_normal(
                    prefix, gff, tran, args_srna.fuzzy_tsss["inter"],
                    args_srna)
                if args_srna.utr_srna:
                    print("Running UTR derived sRNA detection of {0}".format(
                        prefix))
                    if tss is None:
                        tss = self.helper.get_correct_file(
                            self.tss_path, "_TSS.gff", prefix, None, None)
                    if self.pro_path is not None:
                        pro = self.helper.get_correct_file(
                            self.pro_path, "_processing.gff", prefix, None,
                            None)
                    else:
                        pro = None
                    if tss is not None:
                        self._run_utrsrna(gff, tran, prefix, tss, pro,
                                          args_srna, frag_datas, tex_datas)
                tex_datas = self._merge_tex_frag_datas(tex_datas, frag_datas)
                del frag_datas
                gc.collect()
                self._merge_srna(args_srna, gffs, csvs, prefix,
                                 os.path.join(args_srna.gffs, gff), tss,
                                 tex_datas)
                del tex_datas
                filter_frag(csvs["merge"], gffs["merge"])
                self.helper.sort_gff(gffs["merge"],
                                     "_".join([self.prefixs["basic"], prefix]))
        return prefixs

    def _merge_srna(self, args_srna, gffs, csvs, prefix, gff_file, tss,
                    tex_datas):
        print("merging data of sRNA...")
        merge_srna_gff(gffs, args_srna.in_cds, args_srna.cutoff_overlap,
                       gff_file)
        merge_srna_table(gffs["merge"], csvs, tex_datas[2], tex_datas[3], tss,
                         args_srna)

    def _run_RNAfold(self, seq_file, vienna_path, sec_file):
        os.system(" ".join([
            "cat", seq_file, "|",
            os.path.join(vienna_path, "RNAfold"), "-p", ">", sec_file
        ]))

    def _get_seq_sec(self, fasta_path, out_folder, prefix, sec_path, dot_path,
                     vienna_path):
        '''extract the sec str energy'''
        detect = False
        for fasta in os.listdir(fasta_path):
            if fasta.endswith(".fa") and (fasta.replace(".fa", "") == prefix):
                detect = True
                break
        if detect:
            detect = False
            seq_file = os.path.join(out_folder, "_".join(["sRNA_seq", prefix]))
            sec_file = os.path.join(out_folder, "_".join(["sRNA_2d", prefix]))
            self.helper.get_seq("_".join([self.prefixs["basic"], prefix]),
                                os.path.join(fasta_path, fasta), seq_file)
        else:
            print("Error:There is not fasta file of {0}".format(prefix))
            print("please check your imported information")
            sys.exit()
        tmp_path = os.path.join(out_folder, "tmp_srna")
        self.helper.check_make_folder(tmp_path)
        main_path = os.getcwd()
        os.chdir(tmp_path)
        sec_file = os.path.join(main_path, sec_file)
        seq_file = os.path.join(main_path, seq_file)
        tmp_sec_path = os.path.join(main_path, sec_path)
        tmp_dot_path = os.path.join(main_path, dot_path)
        self._run_RNAfold(seq_file, vienna_path, sec_file)
        extract_energy(
            os.path.join(main_path, "_".join([self.prefixs["basic"], prefix])),
            sec_file,
            os.path.join(main_path, "_".join([self.prefixs["energy"],
                                              prefix])))
        for ps in os.listdir(os.getcwd()):
            new_ps = ps.replace("|", "_")
            shutil.move(ps, new_ps)
        return {
            "sec": tmp_sec_path,
            "dot": tmp_dot_path,
            "main": main_path,
            "tmp": os.path.join(main_path, tmp_path)
        }

    def _run_replot(self, vienna_util, tmp_paths, file_, dot_file, rel_file):
        os.system(" ".join([
            os.path.join(vienna_util, "relplot.pl"),
            os.path.join(tmp_paths["tmp"], file_),
            os.path.join(tmp_paths["tmp"], dot_file), ">",
            os.path.join(tmp_paths["tmp"], rel_file)
        ]))

    def _convert_pdf(self, ps2pdf14_path, tmp_paths, file_, pdf_file):
        call([ps2pdf14_path, os.path.join(tmp_paths["tmp"], file_), pdf_file])

    def _replot_sec_to_pdf(self, vienna_util, tmp_paths, ps2pdf14_path,
                           prefix):
        for file_ in os.listdir(os.getcwd()):
            if file_.endswith("ss.ps"):
                dot_file = file_.replace("ss.ps", "dp.ps")
                rel_file = file_.replace("ss.ps", "rss.ps")
                print("replot {0}".format(file_))
                self._run_replot(vienna_util, tmp_paths, file_, dot_file,
                                 rel_file)
        for file_ in os.listdir(tmp_paths["tmp"]):
            if (file_.endswith("rss.ps")) or (file_.endswith("dp.ps")):
                pdf_file = file_.replace(".ps", ".pdf")
                print("convert {0} to pdf".format(file_))
                self._convert_pdf(ps2pdf14_path, tmp_paths, file_, pdf_file)
        os.mkdir(os.path.join(tmp_paths["sec"], prefix))
        os.mkdir(os.path.join(tmp_paths["dot"], prefix))
        self.helper.move_all_content(tmp_paths["tmp"],
                                     os.path.join(tmp_paths["sec"], prefix),
                                     ["rss.pdf"])
        self.helper.move_all_content(tmp_paths["tmp"],
                                     os.path.join(tmp_paths["dot"], prefix),
                                     ["dp.pdf"])

    def _run_mountain(self, vienna_util, tmp_paths, dot_file, out):
        call([
            os.path.join(vienna_util, "mountain.pl"),
            os.path.join(tmp_paths["tmp"], dot_file)
        ],
             stdout=out)

    def _plot_mountain(self, mountain, moun_path, tmp_paths, prefix,
                       vienna_util):
        if mountain:
            tmp_moun_path = os.path.join(tmp_paths["main"], moun_path)
            os.mkdir(os.path.join(tmp_moun_path, prefix))
            txt_path = os.path.join(tmp_paths["tmp"], "tmp_txt")
            self.helper.check_make_folder(txt_path)
            print("Generating mountain plot of {0}....".format(prefix))
            for dot_file in os.listdir(tmp_paths["tmp"]):
                if dot_file.endswith("dp.ps"):
                    moun_txt = os.path.join(tmp_paths["tmp"], "mountain.txt")
                    out = open(moun_txt, "w")
                    moun_file = dot_file.replace("dp.ps", "mountain.pdf")
                    print("Generating {0}".format(moun_file))
                    self._run_mountain(vienna_util, tmp_paths, dot_file, out)
                    plot_mountain_plot(moun_txt, moun_file)
                    shutil.move(moun_file,
                                os.path.join(tmp_moun_path, prefix, moun_file))
                    out.close()
                    os.remove(moun_txt)

    def _compute_2d_and_energy(self, args_srna, prefixs):
        print("Running energy calculation....")
        moun_path = os.path.join(args_srna.out_folder, "mountain_plot")
        sec_path = os.path.join(args_srna.out_folder, "sec_structure",
                                "sec_plot")
        dot_path = os.path.join(args_srna.out_folder, "sec_structure",
                                "dot_plot")
        self.helper.remove_all_content(sec_path, None, "dir")
        self.helper.remove_all_content(dot_path, None, "dir")
        self.helper.remove_all_content(moun_path, None, "dir")
        for prefix in prefixs:
            tmp_paths = self._get_seq_sec(self.fasta_path,
                                          args_srna.out_folder, prefix,
                                          sec_path, dot_path,
                                          args_srna.vienna_path)
            self._replot_sec_to_pdf(args_srna.vienna_util, tmp_paths,
                                    args_srna.ps2pdf14_path, prefix)
            self._plot_mountain(args_srna.mountain, moun_path, tmp_paths,
                                prefix, args_srna.vienna_util)
            self.helper.remove_all_content(os.getcwd(), ".ps", "file")
            os.chdir(tmp_paths["main"])
            shutil.move("_".join([self.prefixs["energy"], prefix]),
                        "_".join([self.prefixs["basic"], prefix]))
            shutil.rmtree(os.path.join(args_srna.out_folder, "tmp_srna"))

    def _run_blast(self, blast_path, program, database, e, seq_file,
                   blast_file, strand):
        call([
            os.path.join(blast_path, program), "-db", database, "-evalue",
            str(e), "-strand", strand, "-query", seq_file, "-out", blast_file
        ])

    def _get_strand_fasta(self, seq_file, out_folder):
        tmp_plus = os.path.join(out_folder, "tmp_plus.fa")
        tmp_minus = os.path.join(out_folder, "tmp_minus.fa")
        out_p = open(tmp_plus, "w")
        out_m = open(tmp_minus, "w")
        strand = ""
        with open(seq_file) as sh:
            for line in sh:
                line = line.strip()
                if line.startswith(">"):
                    if line[-1] == "+":
                        out_p.write(line + "\n")
                        strand = "plus"
                    elif line[-1] == "-":
                        out_m.write(line + "\n")
                        strand = "minus"
                else:
                    if strand == "plus":
                        out_p.write(line + "\n")
                    elif strand == "minus":
                        out_m.write(line + "\n")
        out_p.close()
        out_m.close()
        return tmp_plus, tmp_minus

    def _blast(self, database, database_format, data_type, args_srna, prefixs,
               program, database_type, e):
        if (database is None):
            print("Error: No database assigned!")
        else:
            if database_format:
                self._formatdb(database, data_type, args_srna.out_folder,
                               args_srna.blast_path, database_type)
            for prefix in prefixs:
                blast_file = os.path.join(
                    args_srna.out_folder, "blast_result_and_misc",
                    "_".join([database_type, "blast", prefix + ".txt"]))
                srna_file = "_".join([self.prefixs["basic"], prefix])
                out_file = os.path.join(
                    args_srna.out_folder,
                    "_".join(["tmp", database_type, prefix]))
                print("Running Blast of {0} in {1}".format(prefix, database))
                seq_file = os.path.join(args_srna.out_folder,
                                        "_".join(["sRNA_seq", prefix]))
                if seq_file not in os.listdir(args_srna.out_folder):
                    self.helper.get_seq(
                        srna_file, os.path.join(self.fasta_path,
                                                prefix + ".fa"), seq_file)
                if database_type == "nr":
                    tmp_plus, tmp_minus = self._get_strand_fasta(
                        seq_file, args_srna.out_folder)
                    tmp_blast = os.path.join("tmp_blast.txt")
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    tmp_plus, tmp_blast, "plus")
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    tmp_minus, blast_file, "minus")
                    self.helper.merge_file(tmp_blast, blast_file)
                    os.remove(tmp_blast)
                    os.remove(tmp_plus)
                    os.remove(tmp_minus)
                else:
                    self._run_blast(args_srna.blast_path, program, database, e,
                                    seq_file, blast_file, "both")
                extract_blast(blast_file, srna_file, out_file,
                              out_file + ".csv", database_type)
                shutil.move(out_file, srna_file)

    def _class_srna(self, prefixs, args_srna):
        '''classify the sRNA based on the filters'''
        if (args_srna.import_info
                is not None) or (args_srna.srna_database is not None) or (
                    args_srna.nr_database
                    is not None) or (self.sorf_path is not None) or (
                        self.tss_path
                        is not None) or (self.term_path is not None) or (
                            args_srna.promoter_table is not None):
            for prefix in prefixs:
                print("classifying sRNA of {0}".format(prefix))
                class_gff = os.path.join(self.gff_output, "for_class")
                class_table = os.path.join(self.table_output, "for_class")
                self.helper.check_make_folder(os.path.join(
                    class_table, prefix))
                self.helper.check_make_folder(os.path.join(class_gff, prefix))
                class_gff = os.path.join(class_gff, prefix)
                class_table = os.path.join(class_table, prefix)
                self.helper.check_make_folder(class_table)
                self.helper.check_make_folder(class_gff)
                out_stat = os.path.join(
                    self.stat_path,
                    "_".join(["stat_sRNA_class", prefix + ".csv"]))
                classify_srna(
                    os.path.join(self.all_best["all_gff"],
                                 "_".join([prefix, "sRNA.gff"])), class_gff,
                    out_stat, args_srna)
                for srna in os.listdir(class_gff):
                    out_table = os.path.join(class_table,
                                             srna.replace(".gff", ".csv"))
                    gen_srna_table(
                        os.path.join(class_gff, srna),
                        "_".join([self.prefixs["merge_table"], prefix]),
                        "_".join([self.tmps["nr"], prefix + ".csv"]),
                        "_".join([self.tmps["srna"], prefix + ".csv"]),
                        args_srna, out_table, self.term_path)

    def _get_best_result(self, prefixs, args_srna):
        '''get the best results based on the filters'''
        for prefix in prefixs:
            best_gff = os.path.join(self.all_best["best_gff"],
                                    "_".join([prefix, "sRNA.gff"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            gen_best_srna(
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])), best_gff,
                args_srna)
            gen_srna_table(
                os.path.join(self.all_best["best_gff"],
                             "_".join([prefix, "sRNA.gff"])),
                "_".join([self.prefixs["merge_table"], prefix]),
                "_".join([self.tmps["nr"], prefix + ".csv"]),
                "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna,
                best_table, self.term_path)

    def _remove_file(self, args_srna):
        self.helper.remove_all_content(args_srna.out_folder, "tmp_", "dir")
        self.helper.remove_all_content(args_srna.out_folder, "tmp_", "file")
        self.helper.remove_tmp(args_srna.fastas)
        self.helper.remove_tmp(args_srna.gffs)
        self.helper.remove_tmp(self.gff_output)
        if args_srna.frag_wigs is not None:
            self.helper.remove_tmp(args_srna.frag_wigs)
        if args_srna.tex_wigs is not None:
            self.helper.remove_tmp(args_srna.tex_wigs)
        if (args_srna.frag_wigs is not None) and (args_srna.tex_wigs
                                                  is not None):
            shutil.rmtree(args_srna.merge_wigs)
        self.helper.remove_tmp(args_srna.trans)
        if args_srna.tss_folder is not None:
            self.helper.remove_tmp(args_srna.tss_folder)
        if args_srna.pro_folder is not None:
            self.helper.remove_tmp(args_srna.pro_folder)
        if args_srna.sorf_file is not None:
            self.helper.remove_tmp(args_srna.sorf_file)
        if "tmp_median" in os.listdir(args_srna.out_folder):
            os.remove(os.path.join(args_srna.out_folder, "tmp_median"))
        if self.term_path is not None:
            self.helper.remove_tmp(args_srna.terms)

    def _filter_srna(self, args_srna, prefixs):
        '''set the filter of sRNA'''
        if args_srna.import_info is not None:
            if "sec_str" in args_srna.import_info:
                self._compute_2d_and_energy(args_srna, prefixs)
        if args_srna.nr_database is not None:
            self._blast(args_srna.nr_database, args_srna.nr_format, "prot",
                        args_srna, prefixs, "blastx", "nr", args_srna.e_nr)
        if self.sorf_path is not None:
            for prefix in prefixs:
                if ("_".join([prefix, "sORF.gff"])
                        in os.listdir(self.sorf_path)):
                    tmp_srna = os.path.join(args_srna.out_folder,
                                            "".join(["tmp_srna_sorf", prefix]))
                    tmp_sorf = os.path.join(args_srna.out_folder,
                                            "".join(["tmp_sorf_srna", prefix]))
                    srna_sorf_comparison(
                        "_".join([self.prefixs["basic"], prefix]),
                        os.path.join(self.sorf_path,
                                     "_".join([prefix, "sORF.gff"])), tmp_srna,
                        tmp_sorf)
                    os.remove(tmp_sorf)
                    shutil.move(tmp_srna,
                                "_".join([self.prefixs["basic"], prefix]))
        if args_srna.srna_database is not None:
            self._blast(args_srna.srna_database, args_srna.srna_format, "nucl",
                        args_srna, prefixs, "blastn", "sRNA", args_srna.e_srna)

    def _import_info_format(self, import_info):
        new_info = []
        for info in import_info:
            info = info.lower()
            new_info.append(info)
        return new_info

    def _gen_table(self, prefixs, args_srna):
        for prefix in prefixs:
            out_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            gen_srna_table(
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])),
                "_".join([self.prefixs["merge_table"], prefix]),
                "_".join([self.tmps["nr"], prefix + ".csv"]),
                "_".join([self.tmps["srna"], prefix + ".csv"]), args_srna,
                out_table, self.term_path)

    def _print_rank_all(self, prefixs):
        for prefix in prefixs:
            all_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            print_rank_all(all_table, best_table)

    def _filter_min_utr(self, prefixs, min_utr):
        '''filter out the low expressed UTR-derived sRNA'''
        for prefix in prefixs:
            filter_utr(
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])),
                os.path.join(self.all_best["all_table"],
                             "_".join([prefix, "sRNA.csv"])), min_utr)

    def _antisense(self, gffs, prefixs):
        '''detection of antisense'''
        for prefix in prefixs:
            all_table = os.path.join(self.all_best["all_table"],
                                     "_".join([prefix, "sRNA.csv"]))
            best_table = os.path.join(self.all_best["best_table"],
                                      "_".join([prefix, "sRNA.csv"]))
            all_gff = os.path.join(self.all_best["all_gff"],
                                   "_".join([prefix, "sRNA.gff"]))
            best_gff = os.path.join(self.all_best["best_gff"],
                                    "_".join([prefix, "sRNA.gff"]))
            srna_antisense(all_gff, all_table,
                           os.path.join(gffs, prefix + ".gff"))
            srna_antisense(best_gff, best_table,
                           os.path.join(gffs, prefix + ".gff"))

    def _blast_stat(self, stat_path, srna_tables):
        '''do statistics for blast result'''
        for srna_table in os.listdir(os.path.join(srna_tables, "best")):
            out_srna_blast = os.path.join(
                stat_path, "stat_" + srna_table.replace(".csv", "_blast.csv"))
        blast_class(os.path.join(srna_tables, "best", srna_table),
                    out_srna_blast)

    def _compare_term_promoter(self, out_table, prefix, args_srna):
        '''compare sRNA with terminator and promoter'''
        if self.term_path is not None:
            compare_srna_term(
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])), out_table,
                os.path.join(self.term_path, "_".join([prefix, "term.gff"])),
                args_srna.fuzzy_b, args_srna.fuzzy_a)
        if (args_srna.promoter_table is not None):
            compare_srna_promoter(
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])), out_table,
                args_srna)

    def run_srna_detection(self, args_srna):
        self._check_necessary_file(args_srna)
        self.multiparser.parser_gff(args_srna.trans, "transcript")
        self.multiparser.combine_gff(args_srna.gffs, self.tran_path, None,
                                     "transcript")
        if args_srna.import_info is not None:
            args_srna.import_info = self._import_info_format(
                args_srna.import_info)
        prefixs = self._run_program(args_srna)
        self._filter_srna(args_srna, prefixs)
        for prefix in prefixs:
            shutil.copyfile(
                "_".join([self.prefixs["basic"], prefix]),
                os.path.join(self.all_best["all_gff"],
                             "_".join([prefix, "sRNA.gff"])))
            self._compare_term_promoter(
                "_".join([self.prefixs["merge_table"], prefix]), prefix,
                args_srna)
        self._gen_table(prefixs, args_srna)
        self._class_srna(prefixs, args_srna)
        self._get_best_result(prefixs, args_srna)
        self._print_rank_all(prefixs)
        if args_srna.srna_database is not None:
            if "blast_srna" in args_srna.import_info:
                self._blast_stat(self.stat_path, self.table_output)
        self._remove_file(args_srna)
示例#9
0
 def __init__(self, args):
     """Create an instance."""
     self._args = args
     self._paths = Paths(args.project_path)
     self.args_container = ArgsContainer()
     self.helper = Helper()
示例#10
0
 def __init__(self, args):
     """Create an instance."""
     self._args = args
     self._paths = Paths(args.project_path)
     self.args_container = ArgsContainer()
     self.helper = Helper()
示例#11
0
class Controller(object):

    """Manage the actions of the subcommands.

    The Controller take care of providing the argumentes like path
    names and the parallel processing of tasks.

    """
    def __init__(self, args):
        """Create an instance."""
        self._args = args
        self._paths = Paths(args.project_path)
        self.args_container = ArgsContainer()
        self.helper = Helper()

    def check_folder(self, folders):
        for folder in folders:
            if folder is None:
                print("Error: There is wrong path of folder assigned, "
                      "please check it!!")
                sys.exit()
            else:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: There is empty folder, "
                              "please check it!!")
                        sys.exit()
                else:
                    print("Error: There is wrong folder, please check it!!")
                    sys.exit()

    def check_parameter(self, paras, names):
        for i in range(len(paras)):
            if paras[i] is None:
                print("Error: {0} is wrong, "
                      "please check it!!".format(names[i]))
                sys.exit()

    def check_no_require_folder(self, folders):
        for folder in folders:
            if folder is not None:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: There is empty folder, "
                              "please check it!!")
                        sys.exit()
                else:
                    print("Error: There is wrong folder, "
                          "please check it!!")
                    sys.exit()

    def check_file(self, files, names, require):
        for i in range(len(files)):
            if require:
                if files[i] is None:
                    print("Error: {0} is wrong, "
                          "please check it!!".format(names[i]))
                    sys.exit()
                else:
                    if not os.path.isfile(files[i]):
                        print("Error: There is wrong path of {0}, "
                              "please check it!!".format(names[i]))
                        sys.exit()
            else:
                if files[i] is not None:
                    if not os.path.isfile(files[i]):
                        print("Error: There is wrong path of {0}, "
                              "please check it!!".format(names[i]))
                        sys.exit()

    def create_project(self, version):
        """Create a new project."""
        project_creator.create_root_folder(self._args.project_path)
        project_creator.create_subfolders(self._paths.required_folders("root"))
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        project_creator.create_version_file(
            self._paths.version_path, version)
        sys.stdout.write("Created folder \"%s\" and required subfolders.\n" % (
            self._args.project_path))

    def get_input(self):
        """Download required files from website."""
        print("Running get input files...")
        if self._args.FTP_path is None:
            print("Error: Please assign the path for downloading the data!!")
            sys.exit()
        if self._args.for_target:
            annotation_folder = self._paths.tar_annotation_folder
            fasta_folder = self._paths.tar_fasta_folder
        else:
            annotation_folder = self._paths.ref_annotation_folder
            fasta_folder = self._paths.ref_fasta_folder
        self.helper.check_make_folder(annotation_folder)
        self.helper.check_make_folder(fasta_folder)
        if self._args.ref_gff is True:
            get_file(self._args.FTP_path, annotation_folder,
                     "gff", self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder,
                     "_genomic.gff.gz", self._args.for_target)
        if self._args.ref_fasta is True:
            get_file(self._args.FTP_path, fasta_folder,
                     "fna", self._args.for_target)
            get_file(self._args.FTP_path, fasta_folder,
                     "_genomic.fna.gz", self._args.for_target)
        if self._args.ref_gbk is True:
            get_file(self._args.FTP_path, annotation_folder,
                     "gbk", self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder,
                     "gbff", self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder,
                     "_genomic.gbff.gz", self._args.for_target)
        if self._args.ref_ptt is True:
            get_file(self._args.FTP_path, annotation_folder,
                     "ptt", self._args.for_target)
        if self._args.ref_rnt is True:
            get_file(self._args.FTP_path, annotation_folder,
                     "rnt", self._args.for_target)
        if self._args.convert_embl is True:
            annotation_files = os.listdir(annotation_folder)
            if len(annotation_files) == 0:
                sys.stdout.write("No gbk files!!\n")
            else:
                Converter().convert_gbk2embl(annotation_folder)

    def get_target_fasta(self):
        """Get target fasta"""
        print("Running get target fasta...")
        self.check_parameter([self._args.output_format], ["--output_format"])
        self.check_folder([self._args.ref_fasta_folder])
        self.check_file([self._args.mutation_table], "--mutation_table", True)
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        outputs = self._args.output_format.split(",")
        for output in outputs:
            output = output.strip()
        target = TargetFasta(self._paths.tar_fasta_folder,
                             self._args.ref_fasta_folder)
        target.get_target_fasta(
                self._args.mutation_table, self._paths.tar_fasta_folder,
                self._args.ref_fasta_folder, outputs)

    def ratt(self):
        """Run RATT to transfer annotation file from reference to target."""
        print("Running annotation transfer...")
        if (self._args.transfer_type != "Strain") and (
                self._args.transfer_type != "Assembly") and (
                self._args.transfer_type != "Species") and (
                self._args.transfer_type != "Assembly.Repetitive") and (
                self._args.transfer_type != "Strain.Repetitive") and (
                self._args.transfer_type != "Species.Repetitive") and (
                self._args.transfer_type != "Multiple") and (
                self._args.transfer_type != "Free"):
            print("Error: please assign correct --transfer_type!!")
            sys.exit()
        self.check_folder([self._args.ref_embl_gbk, self._args.target_fasta,
                           self._args.ref_fasta])
        self.check_parameter([self._args.element, self._args.compare_pair],
                             ["--element", "--compare_pair"])
        project_creator.create_subfolders(
            self._paths.required_folders("annotation_transfer"))
        args_ratt = self.args_container.container_ratt(
            self._args.RATT_path, self._args.element, self._args.transfer_type,
            self._args.ref_embl_gbk, self._args.target_fasta,
            self._args.ref_fasta, self._paths.ratt_folder,
            self._args.convert_to_gff_rnt_ptt,
            self._paths.tar_annotation_folder, self._args.compare_pair)
        ratt = RATT(args_ratt)
        ratt.annotation_transfer(args_ratt)

    def tsspredator(self):
        """Run TSSpredator for predicting TSS candidates."""
        self.check_folder([self._args.fasta_folder,
                           self._args.annotation_folder,
                           self._args.wig_folder])
        self.check_parameter([self._args.lib, self._args.output_prefix],
                             ["--lib", "--output_prefix"])
        self.check_no_require_folder([self._args.compare_transcript_assembly,
                                      self._args.reference_gff_folder])
        self.check_file([self._args.merge_manual], ["--merge_manual"], False)
        if self._args.compute_program.lower() == "tss":
            print("Running TSS prediction...")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.compute_program.lower() == "processing_site":
            print("Running processing site prediction...")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error:No such program!!!!")
            sys.exit()
        args_tss = self.args_container.container_tsspredator(
            self._args.TSSpredator_path, self._args.compute_program,
            self._args.fasta_folder, self._args.annotation_folder,
            self._args.wig_folder, self._args.lib,
            self._args.output_prefix,
            self._args.height, self._args.height_reduction,
            self._args.factor, self._args.factor_reduction,
            self._args.base_height, self._args.enrichment_factor,
            self._args.processing_factor, self._args.replicate_match,
            out_folder, self._args.statistics,
            self._args.validate_gene, self._args.merge_manual,
            self._args.compare_transcript_assembly, self._args.fuzzy,
            self._args.utr_length, self._args.cluster,
            self._args.length, self._args.re_check_orphan,
            self._args.overlap_feature, self._args.reference_gff_folder,
            self._args.remove_low_expression)
        tsspredator = TSSpredator(args_tss)
        tsspredator.run_tsspredator(args_tss)

    def optimize(self):
        """opimize TSSpredator"""
        self.check_folder([self._args.wig_folder, self._args.fasta_file,
                           self._args.annotation_file])
        self.check_file([self._args.manual],
                        ["--manual"], True)
        self.check_parameter([self._args.strain_name, self._args.lib,
                              self._args.output_prefix],
                             ["--strain_name", "--lib", "--output_prefix"])
        if self._args.program.lower() == "tss":
            print("Running optimization of TSS prediction...")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.program.lower() == "processing_site":
            print("Running optimization of processing site prediction...")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error:No such program!!!!")
            sys.exit()
        args_ops = self.args_container.container_optimize(
            self._args.TSSpredator_path, self._args.fasta_file,
            self._args.annotation_file, self._args.wig_folder,
            self._args.manual, out_folder,
            self._args.strain_name, self._args.max_height,
            self._args.max_height_reduction, self._args.max_factor,
            self._args.max_factor_reduction, self._args.max_base_height,
            self._args.max_enrichment_factor, self._args.max_processing_factor,
            self._args.utr_length, self._args.lib,
            self._args.output_prefix, self._args.cluster,
            self._args.length, self._args.core,
            self._args.program, self._args.replicate_match,
            self._args.steps)
        optimize_tss(args_ops)

    def color(self):
        """color the screenshots"""
        print("Running png files coloring...")
        self.check_parameter([self._args.track_number], ["--track_numer"])
        self.check_folder([self._args.screenshot_folder])
        color = ColorPNG()
        color.generate_color_png(
                self._args.track_number, self._args.screenshot_folder,
                self._args.ImageMagick_covert_path)

    def terminator(self):
        """Run TransTermHP for detecting terminators."""
        print("Running terminator prediction...")
        if self._args.TransTermHP_path is None:
            print("Please assign the folder where you install TransTermHP.")
        self.check_folder([self._args.fasta_folder,
                           self._args.annotation_folder,
                           self._args.transcript_folder])
        self.check_no_require_folder([self._args.sRNA])
        project_creator.create_subfolders(
            self._paths.required_folders("terminator"))
        args_term = self.args_container.container_terminator(
            self._args.TransTermHP_path, self._args.expterm_path,
            self._args.RNAfold_path,
            self._paths.transterm_folder, self._args.fasta_folder,
            self._args.annotation_folder, self._args.transcript_folder,
            self._args.sRNA, self._args.statistics,
            self._args.tex_wig_folder, self._args.frag_wig_folder,
            self._args.decrease, self._args.highest_coverage,
            self._args.fuzzy_detect_coverage,
            self._args.fuzzy_within_transcript,
            self._args.fuzzy_downstream_transcript,
            self._args.fuzzy_within_gene,
            self._args.fuzzy_downstream_gene, self._paths.transtermhp_folder,
            self._args.tex_notex_libs, self._args.frag_libs,
            self._args.tex_notex, self._args.replicates_tex,
            self._args.replicates_frag, self._args.table_best,
            self._args.min_loop_length, self._args.max_loop_length,
            self._args.min_stem_length,
            self._args.max_stem_length, self._args.min_U_tail_length,
            self._args.miss_rate, self._args.range_U_tail)
        terminator = Terminator(args_term)
        terminator.run_terminator(args_term)

    def transcript(self):
        """Run Transcriptome assembly."""
        print("Running transcript assembly...")
        self.check_folder([self._args.annotation_folder])
        self.check_no_require_folder([
            self._args.compare_TSS, self._args.compare_genome_annotation,
            self._args.terminator_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("transcript_assembly"))
        args_tran = self.args_container.container_transcript(
            self._args.frag_wig_path, self._args.tex_wig_path,
            self._args.tex_notex,
            self._args.length, self._args.annotation_folder,
            self._args.height, self._args.width,
            self._args.tolerance, self._args.tolerance_coverage,
            self._args.replicates_tex, self._args.replicates_frag,
            self._paths.transcript_assembly_output_folder,
            self._args.compare_TSS, self._args.compare_genome_annotation,
            self._args.TSS_fuzzy, self._args.Tex_treated_libs,
            self._args.fragmented_libs, self._args.compare_feature_genome,
            self._args.table_best, self._args.terminator_folder,
            self._args.fuzzy_term)
        transcript = TranscriptAssembly(args_tran)
        transcript.run_transcript_assembly(args_tran)

    def utr_detection(self):
        """Run UTR detection."""
        print("Running UTR detection...")
        self.check_folder([self._args.annotation_folder,
                           self._args.transcript_assembly_folder,
                           self._args.TSS_folder])
        self.check_no_require_folder([self._args.terminator_folder])
        project_creator.create_subfolders(self._paths.required_folders("utr"))
        args_utr = self.args_container.container_utr(
                self._args.TSS_folder, self._args.annotation_folder,
                self._args.transcript_assembly_folder,
                self._args.terminator_folder,
                self._args.terminator_fuzzy, self._paths.utr_folder,
                self._args.TSS_source, self._args.base_5UTR,
                self._args.UTR_length, self._args.base_3UTR)
        utr = UTRDetection(args_utr)
        utr.run_utr_detection(args_utr)

    def srna_detection(self):
        """sRNA_detection."""
        print("Running sRNA prediction...")
        self.check_folder([self._args.annotation_folder,
                           self._args.transcript_assembly_folder])
        self.check_no_require_folder([self._args.fasta_folder,
                                      self._args.sORF,
                                      self._args.terminator_folder])
        self.check_file([self._args.promoter_table],
                        ["--promoter_table"], False)
        if self._args.UTR_derived_sRNA:
            self.check_folder([self._args.TSS_folder,
                               self._args.processing_site_folder])
        else:
            self.check_no_require_folder([self._args.TSS_folder,
                                          self._args.processing_site_folder])
        project_creator.create_subfolders(self._paths.required_folders("srna"))
        args_srna = self.args_container.container_srna(
                self._args.Vienna_folder, self._args.Vienna_utils,
                self._args.blast_plus_folder,
                self._args.ps2pdf14_path, self._paths.srna_folder,
                self._args.UTR_derived_sRNA, self._args.annotation_folder,
                self._args.TSS_folder, self._args.transcript_assembly_folder,
                self._args.TSS_intergenic_fuzzy, self._args.TSS_5UTR_fuzzy,
                self._args.TSS_3UTR_fuzzy, self._args.TSS_interCDS_fuzzy,
                self._args.import_info, self._args.tex_wig_folder,
                self._args.frag_wig_folder, self._args.processing_site_folder,
                self._args.fasta_folder, self._args.mountain_plot,
                self._args.nr_format, self._args.srna_format,
                self._args.sRNA_database_path, self._args.nr_database_path,
                self._args.cutoff_energy,
                self._args.run_intergenic_TEX_coverage,
                self._args.run_intergenic_noTEX_coverage,
                self._args.run_intergenic_fragmented_coverage,
                self._args.run_antisense_TEX_coverage,
                self._args.run_antisense_noTEX_coverage,
                self._args.run_antisense_fragmented_coverage,
                self._args.intergenic_tolerance,
                self._args.run_utr_TEX_coverage,
                self._args.run_utr_noTEX_coverage,
                self._args.run_utr_fragmented_coverage,
                self._args.max_length, self._args.min_length,
                self._args.tex_notex_libs, self._args.frag_libs,
                self._args.replicates_tex, self._args.replicates_frag,
                self._args.tex_notex, self._args.blast_e_nr,
                self._args.blast_e_srna, self._args.detect_sRNA_in_CDS,
                self._args.table_best, self._args.decrease_intergenic,
                self._args.decrease_utr, self._args.fuzzy_intergenic,
                self._args.fuzzy_utr, self._args.cutoff_nr_hit,
                self._args.sORF, self._args.best_with_all_sRNAhit,
                self._args.best_without_sORF_candidate,
                self._args.overlap_percent_CDS,
                self._args.terminator_folder,
                self._args.terminator_fuzzy_in_CDS,
                self._args.terminator_fuzzy_out_CDS,
                self._args.best_with_terminator,
                self._args.ignore_hypothetical_protein, self._args.TSS_source,
                self._args.min_utr_coverage, self._args.promoter_table,
                self._args.best_with_promoter,
                self._args.ranking_time_promoter, self._args.promoter_name)
        srna = sRNADetection(args_srna)
        srna.run_srna_detection(args_srna)

    def sorf_detection(self):
        """sORF_detection."""
        print("Running sORF prediction...")
        self.check_folder([self._args.transcript_assembly_folder,
                           self._args.annotation_folder,
                           self._args.fasta_folder])
        self.check_no_require_folder([
            self._args.sRNA_folder, self._args.TSS_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("sorf"))
        args_sorf = self.args_container.container_sorf(
            self._paths.sorf_folder, self._args.UTR_derived_sORF,
            self._args.transcript_assembly_folder,
            self._args.annotation_folder,
            self._args.TSS_folder, self._args.utr_length,
            self._args.min_length, self._args.max_length,
            self._args.tex_wig_folder, self._args.frag_wig_folder,
            self._args.cutoff_intergenic_coverage,
            self._args.cutoff_antisense_coverage,
            self._args.cutoff_5utr_coverage,
            self._args.cutoff_3utr_coverage,
            self._args.cutoff_interCDS_coverage,
            self._args.fasta_folder, self._args.tex_notex_libs,
            self._args.frag_libs, self._args.tex_notex,
            self._args.replicates_tex, self._args.replicates_frag,
            self._args.table_best, self._args.sRNA_folder,
            self._args.start_codon, self._args.stop_codon,
            self._args.cutoff_background, self._args.fuzzy_rbs,
            self._args.rbs_not_after_TSS, self._args.print_all_combination,
            self._args.best_no_sRNA, self._args.best_no_TSS,
            self._args.ignore_hypothetical_protein,
            self._args.min_rbs_distance, self._args.max_rbs_distance)
        sorf = sORFDetection(args_sorf)
        sorf.run_sorf_detection(args_sorf)

    def meme(self):
        """promoter detectopn"""
        print("Running promoter detection...")
        self.check_folder([self._args.TSS_folder, self._args.fasta_folder])
        if not self._args.TSS_source:
            self.check_folder([self._args.annotation_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("promoter"))
        args_pro = self.args_container.container_promoter(
            self._args.MEME_path,
            self._paths.promoter_output_folder, self._args.tex_libs,
            self._args.TSS_folder, self._args.fasta_folder,
            self._args.num_motif, self._args.nt_before_TSS,
            self._args.motif_width, self._args.TSS_source,
            self._args.tex_wig_path, self._args.annotation_folder,
            self._args.combine_all, self._args.e_value)
        meme = MEME(args_pro)
        meme.run_meme(args_pro)

    def operon(self):
        """operon detection"""
        print("Running operon detection...")
        self.check_folder([self._args.TSS_folder, self._args.annotation_folder,
                           self._args.transcript_folder,
                           self._args.UTR5_folder, self._args.UTR3_folder])
        self.check_no_require_folder([self._args.term_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("operon"))
        args_op = self.args_container.container_operon(
            self._args.TSS_folder, self._args.annotation_folder,
            self._args.transcript_folder, self._args.UTR5_folder,
            self._args.UTR3_folder, self._args.term_folder,
            self._args.TSS_fuzzy, self._args.term_fuzzy,
            self._args.min_length, self._args.statistics,
            self._paths.operon_output_folder, self._args.combine_gff,
            self._paths.operon_statistics_folder)
        operon = OperonDetection(args_op)
        operon.run_operon(args_op)

    def circrna(self):
        """circRNA detection"""
        print("Running circular RNA prediction...")
        self.check_folder([self._args.fasta_path, self._args.annotation_path])
        self.check_no_require_folder([self._args.tex_bam_path,
                                      self._args.fragmented_bam_path])
        project_creator.create_subfolders(
            self._paths.required_folders("circrna"))
        args_circ = self.args_container.container_circrna(
            self._args.align, self._args.process, self._args.fasta_path,
            self._args.annotation_path, self._args.tex_bam_path,
            self._args.fragmented_bam_path,
            self._paths.read_folder, self._paths.circrna_stat_folder,
            self._args.support_reads,
            self._args.segemehl_folder, self._args.samtools_path,
            self._args.start_ratio, self._args.end_ratio,
            self._args.ignore_hypothetical_protein,
            self._paths.circrna_output_folder)
        circ = CircRNADetection(args_circ)
        circ.run_circrna(args_circ)

    def goterm(self):
        """Go term discovery"""
        print("Running GO term mapping...")
        self.check_folder([self._args.annotation_path])
        self.check_no_require_folder([self._args.transcript_path])
        self.check_file([self._args.UniProt_id, self._args.go_obo,
                         self._args.goslim_obo],
                        ["--UniProt_id", "--go.obo", "--goslim_obo"], True)
        project_creator.create_subfolders(
            self._paths.required_folders("go_term"))
        args_go = self.args_container.container_goterm(
            self._args.annotation_path,
            self._paths.goterm_output_folder, self._args.UniProt_id,
            self._args.go_obo, self._args.goslim_obo,
            self._args.transcript_path)
        goterm = GoTermFinding(args_go)
        goterm.run_go_term(args_go)

    def srna_target(self):
        """sRNA target prediction"""
        print("Running sRNA target prediction...")
        self.check_folder([self._args.fasta_path, self._args.sRNA_path,
                           self._args.annotation_path])
        project_creator.create_subfolders(
            self._paths.required_folders("srna_target"))
        args_tar = self.args_container.container_srna_target(
            self._args.Vienna_folder, self._args.annotation_path,
            self._args.fasta_path, self._args.sRNA_path,
            self._args.query_sRNA, self._args.program,
            self._args.interaction_length, self._args.window_size_target,
            self._args.span_target, self._args.window_size_srna,
            self._args.span_srna,
            self._args.unstructured_region_RNAplex_target,
            self._args.unstructured_region_RNAplex_srna,
            self._args.unstructured_region_RNAup, self._args.energy_threshold,
            self._args.duplex_distance, self._args.top,
            self._paths.starget_output_folder, self._args.process_rnaplex,
            self._args.process_rnaup, self._args.continue_rnaup,
            self._args.potential_target_start, self._args.potential_target_end,
            self._args.target_feature)
        srnatarget = sRNATargetPrediction(args_tar)
        srnatarget.run_srna_target_prediction(args_tar)

    def snp(self):
        """SNP transcript detection"""
        print("Running SNP/mutations calling...")
        self.check_folder([self._args.fasta_path])
        if (self._args.bam_type != "target") and (
                self._args.bam_type != "reference"):
            print("Error: please assign \"target\" or"
                  " \"reference\" to --bam_type!!")
            sys.exit()
        if (self._args.ploidy != "haploid") and (
                self._args.ploidy != "diploid"):
            print("Error: please assign \"haploid\" or"
                  " \"diploid\" to --chromosome_type!!")
        project_creator.create_subfolders(self._paths.required_folders("snp"))
        args_snp = self.args_container.container_snp(
            self._args.samtools_path, self._args.bcftools_path,
            self._args.bam_type,
            self._args.program, self._args.fasta_path,
            self._args.tex_bam_path, self._args.frag_bam_path,
            self._args.quality, self._args.read_depth,
            self._paths.snp_output_folder, self._args.indel_fraction,
            self._args.ploidy)
        snp = SNPCalling(args_snp)
        snp.run_snp_calling(args_snp)

    def ppi(self):
        """PPI network retrieve"""
        print("Running protein-protein interaction networks prediction...")
        self.check_folder([self._args.gff_path])
        self.check_parameter([self._args.proteinID_strains,
                              self._args.species_STRING],
                             ["--proteinID_strains", "--species_STRING"])
        project_creator.create_subfolders(
            self._paths.required_folders("ppi_network"))
        args_ppi = self.args_container.container_ppi(
            self._args.gff_path, self._args.proteinID_strains,
            self._args.without_strain_pubmed, self._args.species_STRING,
            self._args.score, self._paths.ppi_output_folder,
            self._args.node_size, self._args.query)
        ppi = PPINetwork(self._paths.ppi_output_folder)
        ppi.retrieve_ppi_network(args_ppi)

    def sublocal(self):
        """Subcellular Localization prediction"""
        print("Running subcellular localization prediction...")
        self.check_folder([self._args.gff_path, self._args.fasta_path])
        self.check_no_require_folder([self._args.transcript_path])
        if (self._args.bacteria_type != "positive") and (
                self._args.bacteria_type != "negative"):
            print("Error: please assign \"positive\" or"
                  " \"negative\" to --bacteria_type!!")
            sys.exit()
        project_creator.create_subfolders(
            self._paths.required_folders("subcellular_localization"))
        args_sub = self.args_container.container_sublocal(
            self._args.Psortb_path, self._args.gff_path,
            self._args.fasta_path, self._args.bacteria_type,
            self._args.difference_multi, self._args.merge_to_gff,
            self._paths.sublocal_output_folder, self._args.transcript_path)
        sublocal = SubLocal(args_sub)
        sublocal.run_sub_local(args_sub)

    def ribos(self):
        """riboswitch prediction"""
        print("Running riboswitch prediction...")
        self.check_folder([self._args.gff_path, self._args.fasta_path,
                           self._args.tss_path, self._args.transcript_path])
        self.check_file([self._args.riboswitch_ID, self._args.Rfam],
                        ["--riboswitch_ID", "--Rfam"], True)
        project_creator.create_subfolders(
            self._paths.required_folders("riboswitch"))
        args_ribo = self.args_container.container_ribos(
            self._args.infernal_path, self._args.riboswitch_ID,
            self._args.gff_path, self._args.fasta_path,
            self._args.tss_path, self._args.transcript_path,
            self._args.Rfam, self._paths.ribos_output_folder,
            self._args.e_value,
            self._args.output_all, self._paths.database_folder,
            self._args.fuzzy, self._args.start_codon,
            self._args.min_dist_rbs, self._args.max_dist_rbs,
            self._args.fuzzy_rbs, self._args.UTR_length)
        ribos = Ribos(args_ribo)
        ribos.run_ribos(args_ribo)

    def screen(self):
        """generate screenshot"""
        print("Running screenshot generating...")
        self.check_file([self._args.main_gff, self._args.fasta],
                        ["--main_gff", "--fasta"], True)
        if self._args.side_gffs is not None:
            for gff in (self._args.side_gffs.split(",")):
                gff = gff.strip()
                if not os.path.isfile(gff):
                    print("Error: The --side_gffs no exist!!")
                    sys.exit()
        if self._args.output_folder is None:
            print("Error: please assign --output_folder!!")
            sys.exit()
        if (self._args.present != "expand") and (
                self._args.present != "collapse") and (
                self._args.present != "squish"):
            print("Error: please assign \"expand\" or "
                  "\"collapse\" or \"squish\" to --present!!")
            sys.exit()
        args_sc = self.args_container.container_screen(
            self._args.main_gff, self._args.side_gffs,
            self._args.fasta, self._args.frag_wig_folder,
            self._args.tex_wig_folder, self._args.height,
            self._args.tex_libs, self._args.frag_libs,
            self._args.present, self._args.output_folder)
        screen = Screen(args_sc)
        screen.screenshot(args_sc)
示例#12
0
class Controller(object):
    """Manage the actions of the subcommands.

    The Controller take care of providing the argumentes like path
    names and the parallel processing of tasks.

    """
    def __init__(self, args):
        """Create an instance."""
        self._args = args
        self._paths = Paths(args.project_path)
        self.args_container = ArgsContainer()
        self.helper = Helper()

    def check_folder(self, folders):
        for folder in folders:
            if folder is None:
                print("Error: There is wrong path of folder assigned, "
                      "please check it!!")
                sys.exit()
            else:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: There is empty folder, "
                              "please check it!!")
                        sys.exit()
                else:
                    print("Error: There is wrong folder, please check it!!")
                    sys.exit()

    def check_parameter(self, paras, names):
        for i in range(len(paras)):
            if paras[i] is None:
                print("Error: {0} is wrong, "
                      "please check it!!".format(names[i]))
                sys.exit()

    def check_no_require_folder(self, folders):
        for folder in folders:
            if folder is not None:
                if os.path.exists(folder):
                    if len(os.listdir(folder)) == 0:
                        print("Error: There is empty folder, "
                              "please check it!!")
                        sys.exit()
                else:
                    print("Error: There is wrong folder, " "please check it!!")
                    sys.exit()

    def check_file(self, files, names, require):
        for i in range(len(files)):
            if require:
                if files[i] is None:
                    print("Error: {0} is wrong, "
                          "please check it!!".format(names[i]))
                    sys.exit()
                else:
                    if not os.path.isfile(files[i]):
                        print("Error: There is wrong path of {0}, "
                              "please check it!!".format(names[i]))
                        sys.exit()
            else:
                if files[i] is not None:
                    if not os.path.isfile(files[i]):
                        print("Error: There is wrong path of {0}, "
                              "please check it!!".format(names[i]))
                        sys.exit()

    def create_project(self, version):
        """Create a new project."""
        project_creator.create_root_folder(self._args.project_path)
        project_creator.create_subfolders(self._paths.required_folders("root"))
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        project_creator.create_version_file(self._paths.version_path, version)
        sys.stdout.write("Created folder \"%s\" and required subfolders.\n" %
                         (self._args.project_path))

    def get_input(self):
        """Download required files from website."""
        print("Running get input files...")
        if self._args.FTP_path is None:
            print("Error: Please assign the path for downloading the data!!")
            sys.exit()
        if self._args.for_target:
            annotation_folder = self._paths.tar_annotation_folder
            fasta_folder = self._paths.tar_fasta_folder
        else:
            annotation_folder = self._paths.ref_annotation_folder
            fasta_folder = self._paths.ref_fasta_folder
        self.helper.check_make_folder(annotation_folder)
        self.helper.check_make_folder(fasta_folder)
        if self._args.ref_gff is True:
            get_file(self._args.FTP_path, annotation_folder, "gff",
                     self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder, "_genomic.gff.gz",
                     self._args.for_target)
        if self._args.ref_fasta is True:
            get_file(self._args.FTP_path, fasta_folder, "fna",
                     self._args.for_target)
            get_file(self._args.FTP_path, fasta_folder, "_genomic.fna.gz",
                     self._args.for_target)
        if self._args.ref_gbk is True:
            get_file(self._args.FTP_path, annotation_folder, "gbk",
                     self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder, "gbff",
                     self._args.for_target)
            get_file(self._args.FTP_path, annotation_folder,
                     "_genomic.gbff.gz", self._args.for_target)
        if self._args.ref_ptt is True:
            get_file(self._args.FTP_path, annotation_folder, "ptt",
                     self._args.for_target)
        if self._args.ref_rnt is True:
            get_file(self._args.FTP_path, annotation_folder, "rnt",
                     self._args.for_target)
        if self._args.convert_embl is True:
            annotation_files = os.listdir(annotation_folder)
            if len(annotation_files) == 0:
                sys.stdout.write("No gbk files!!\n")
            else:
                Converter().convert_gbk2embl(annotation_folder)

    def get_target_fasta(self):
        """Get target fasta"""
        print("Running get target fasta...")
        self.check_parameter([self._args.output_format], ["--output_format"])
        self.check_folder([self._args.ref_fasta_folder])
        self.check_file([self._args.mutation_table], "--mutation_table", True)
        project_creator.create_subfolders(
            self._paths.required_folders("get_target_fasta"))
        outputs = self._args.output_format.split(",")
        for output in outputs:
            output = output.strip()
        target = TargetFasta(self._paths.tar_fasta_folder,
                             self._args.ref_fasta_folder)
        target.get_target_fasta(self._args.mutation_table,
                                self._paths.tar_fasta_folder,
                                self._args.ref_fasta_folder, outputs)

    def ratt(self):
        """Run RATT to transfer annotation file from reference to target."""
        print("Running annotation transfer...")
        if (self._args.transfer_type != "Strain") and (
                self._args.transfer_type !=
                "Assembly") and (self._args.transfer_type != "Species") and (
                    self._args.transfer_type != "Assembly.Repetitive"
                ) and (self._args.transfer_type != "Strain.Repetitive") and (
                    self._args.transfer_type != "Species.Repetitive") and (
                        self._args.transfer_type !=
                        "Multiple") and (self._args.transfer_type != "Free"):
            print("Error: please assign correct --transfer_type!!")
            sys.exit()
        self.check_folder([
            self._args.ref_embl_gbk, self._args.target_fasta,
            self._args.ref_fasta
        ])
        self.check_parameter([self._args.element, self._args.compare_pair],
                             ["--element", "--compare_pair"])
        project_creator.create_subfolders(
            self._paths.required_folders("annotation_transfer"))
        args_ratt = self.args_container.container_ratt(
            self._args.RATT_path, self._args.element, self._args.transfer_type,
            self._args.ref_embl_gbk, self._args.target_fasta,
            self._args.ref_fasta, self._paths.ratt_folder,
            self._args.convert_to_gff_rnt_ptt,
            self._paths.tar_annotation_folder, self._args.compare_pair)
        ratt = RATT(args_ratt)
        ratt.annotation_transfer(args_ratt)

    def tsspredator(self):
        """Run TSSpredator for predicting TSS candidates."""
        self.check_folder([
            self._args.fasta_folder, self._args.annotation_folder,
            self._args.wig_folder
        ])
        self.check_parameter([self._args.lib, self._args.output_prefix],
                             ["--lib", "--output_prefix"])
        self.check_no_require_folder([
            self._args.compare_transcript_assembly,
            self._args.reference_gff_folder
        ])
        self.check_file([self._args.merge_manual], ["--merge_manual"], False)
        if self._args.compute_program.lower() == "tss":
            print("Running TSS prediction...")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.compute_program.lower() == "processing_site":
            print("Running processing site prediction...")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error:No such program!!!!")
            sys.exit()
        args_tss = self.args_container.container_tsspredator(
            self._args.TSSpredator_path, self._args.compute_program,
            self._args.fasta_folder, self._args.annotation_folder,
            self._args.wig_folder, self._args.lib, self._args.output_prefix,
            self._args.height, self._args.height_reduction, self._args.factor,
            self._args.factor_reduction, self._args.base_height,
            self._args.enrichment_factor, self._args.processing_factor,
            self._args.replicate_match, out_folder, self._args.statistics,
            self._args.validate_gene, self._args.merge_manual,
            self._args.compare_transcript_assembly, self._args.fuzzy,
            self._args.utr_length, self._args.cluster, self._args.length,
            self._args.re_check_orphan, self._args.overlap_feature,
            self._args.reference_gff_folder, self._args.remove_low_expression)
        tsspredator = TSSpredator(args_tss)
        tsspredator.run_tsspredator(args_tss)

    def optimize(self):
        """opimize TSSpredator"""
        self.check_folder([
            self._args.wig_folder, self._args.fasta_file,
            self._args.annotation_file
        ])
        self.check_file([self._args.manual], ["--manual"], True)
        self.check_parameter(
            [self._args.strain_name, self._args.lib, self._args.output_prefix],
            ["--strain_name", "--lib", "--output_prefix"])
        if self._args.program.lower() == "tss":
            print("Running optimization of TSS prediction...")
            project_creator.create_subfolders(
                self._paths.required_folders("TSS"))
            out_folder = self._paths.tsspredator_folder
        elif self._args.program.lower() == "processing_site":
            print("Running optimization of processing site prediction...")
            out_folder = self._paths.processing_site_folder
            project_creator.create_subfolders(
                self._paths.required_folders("processing"))
        else:
            print("Error:No such program!!!!")
            sys.exit()
        args_ops = self.args_container.container_optimize(
            self._args.TSSpredator_path, self._args.fasta_file,
            self._args.annotation_file, self._args.wig_folder,
            self._args.manual, out_folder, self._args.strain_name,
            self._args.max_height, self._args.max_height_reduction,
            self._args.max_factor, self._args.max_factor_reduction,
            self._args.max_base_height, self._args.max_enrichment_factor,
            self._args.max_processing_factor, self._args.utr_length,
            self._args.lib, self._args.output_prefix, self._args.cluster,
            self._args.length, self._args.core, self._args.program,
            self._args.replicate_match, self._args.steps)
        optimize_tss(args_ops)

    def color(self):
        """color the screenshots"""
        print("Running png files coloring...")
        self.check_parameter([self._args.track_number], ["--track_numer"])
        self.check_folder([self._args.screenshot_folder])
        color = ColorPNG()
        color.generate_color_png(self._args.track_number,
                                 self._args.screenshot_folder,
                                 self._args.ImageMagick_covert_path)

    def terminator(self):
        """Run TransTermHP for detecting terminators."""
        print("Running terminator prediction...")
        if self._args.TransTermHP_path is None:
            print("Please assign the folder where you install TransTermHP.")
        self.check_folder([
            self._args.fasta_folder, self._args.annotation_folder,
            self._args.transcript_folder
        ])
        self.check_no_require_folder([self._args.sRNA])
        project_creator.create_subfolders(
            self._paths.required_folders("terminator"))
        args_term = self.args_container.container_terminator(
            self._args.TransTermHP_path, self._args.expterm_path,
            self._args.RNAfold_path, self._paths.transterm_folder,
            self._args.fasta_folder, self._args.annotation_folder,
            self._args.transcript_folder, self._args.sRNA,
            self._args.statistics, self._args.tex_wig_folder,
            self._args.frag_wig_folder, self._args.decrease,
            self._args.highest_coverage, self._args.fuzzy_detect_coverage,
            self._args.fuzzy_within_transcript,
            self._args.fuzzy_downstream_transcript,
            self._args.fuzzy_within_gene, self._args.fuzzy_downstream_gene,
            self._paths.transtermhp_folder, self._args.tex_notex_libs,
            self._args.frag_libs, self._args.tex_notex,
            self._args.replicates_tex, self._args.replicates_frag,
            self._args.table_best, self._args.min_loop_length,
            self._args.max_loop_length, self._args.min_stem_length,
            self._args.max_stem_length, self._args.min_U_tail_length,
            self._args.miss_rate, self._args.range_U_tail)
        terminator = Terminator(args_term)
        terminator.run_terminator(args_term)

    def transcript(self):
        """Run Transcriptome assembly."""
        print("Running transcript assembly...")
        self.check_folder([self._args.annotation_folder])
        self.check_no_require_folder([
            self._args.compare_TSS, self._args.compare_genome_annotation,
            self._args.terminator_folder
        ])
        project_creator.create_subfolders(
            self._paths.required_folders("transcript_assembly"))
        args_tran = self.args_container.container_transcript(
            self._args.frag_wig_path, self._args.tex_wig_path,
            self._args.tex_notex, self._args.length,
            self._args.annotation_folder, self._args.height, self._args.width,
            self._args.tolerance, self._args.tolerance_coverage,
            self._args.replicates_tex, self._args.replicates_frag,
            self._paths.transcript_assembly_output_folder,
            self._args.compare_TSS, self._args.compare_genome_annotation,
            self._args.TSS_fuzzy, self._args.Tex_treated_libs,
            self._args.fragmented_libs, self._args.compare_feature_genome,
            self._args.table_best, self._args.terminator_folder,
            self._args.fuzzy_term)
        transcript = TranscriptAssembly(args_tran)
        transcript.run_transcript_assembly(args_tran)

    def utr_detection(self):
        """Run UTR detection."""
        print("Running UTR detection...")
        self.check_folder([
            self._args.annotation_folder,
            self._args.transcript_assembly_folder, self._args.TSS_folder
        ])
        self.check_no_require_folder([self._args.terminator_folder])
        project_creator.create_subfolders(self._paths.required_folders("utr"))
        args_utr = self.args_container.container_utr(
            self._args.TSS_folder, self._args.annotation_folder,
            self._args.transcript_assembly_folder,
            self._args.terminator_folder, self._args.terminator_fuzzy,
            self._paths.utr_folder, self._args.TSS_source,
            self._args.base_5UTR, self._args.UTR_length, self._args.base_3UTR)
        utr = UTRDetection(args_utr)
        utr.run_utr_detection(args_utr)

    def srna_detection(self):
        """sRNA_detection."""
        print("Running sRNA prediction...")
        self.check_folder([
            self._args.annotation_folder, self._args.transcript_assembly_folder
        ])
        self.check_no_require_folder([
            self._args.fasta_folder, self._args.sORF,
            self._args.terminator_folder
        ])
        self.check_file([self._args.promoter_table], ["--promoter_table"],
                        False)
        if self._args.UTR_derived_sRNA:
            self.check_folder(
                [self._args.TSS_folder, self._args.processing_site_folder])
        else:
            self.check_no_require_folder(
                [self._args.TSS_folder, self._args.processing_site_folder])
        project_creator.create_subfolders(self._paths.required_folders("srna"))
        args_srna = self.args_container.container_srna(
            self._args.Vienna_folder, self._args.Vienna_utils,
            self._args.blast_plus_folder, self._args.ps2pdf14_path,
            self._paths.srna_folder, self._args.UTR_derived_sRNA,
            self._args.annotation_folder, self._args.TSS_folder,
            self._args.transcript_assembly_folder,
            self._args.TSS_intergenic_fuzzy, self._args.TSS_5UTR_fuzzy,
            self._args.TSS_3UTR_fuzzy, self._args.TSS_interCDS_fuzzy,
            self._args.import_info, self._args.tex_wig_folder,
            self._args.frag_wig_folder, self._args.processing_site_folder,
            self._args.fasta_folder, self._args.mountain_plot,
            self._args.nr_format, self._args.srna_format,
            self._args.sRNA_database_path, self._args.nr_database_path,
            self._args.cutoff_energy, self._args.run_intergenic_TEX_coverage,
            self._args.run_intergenic_noTEX_coverage,
            self._args.run_intergenic_fragmented_coverage,
            self._args.run_antisense_TEX_coverage,
            self._args.run_antisense_noTEX_coverage,
            self._args.run_antisense_fragmented_coverage,
            self._args.intergenic_tolerance, self._args.run_utr_TEX_coverage,
            self._args.run_utr_noTEX_coverage,
            self._args.run_utr_fragmented_coverage, self._args.max_length,
            self._args.min_length, self._args.tex_notex_libs,
            self._args.frag_libs, self._args.replicates_tex,
            self._args.replicates_frag, self._args.tex_notex,
            self._args.blast_e_nr, self._args.blast_e_srna,
            self._args.detect_sRNA_in_CDS, self._args.table_best,
            self._args.decrease_intergenic, self._args.decrease_utr,
            self._args.fuzzy_intergenic, self._args.fuzzy_utr,
            self._args.cutoff_nr_hit, self._args.sORF,
            self._args.best_with_all_sRNAhit,
            self._args.best_without_sORF_candidate,
            self._args.overlap_percent_CDS, self._args.terminator_folder,
            self._args.terminator_fuzzy_in_CDS,
            self._args.terminator_fuzzy_out_CDS,
            self._args.best_with_terminator,
            self._args.ignore_hypothetical_protein, self._args.TSS_source,
            self._args.min_utr_coverage, self._args.promoter_table,
            self._args.best_with_promoter, self._args.ranking_time_promoter,
            self._args.promoter_name)
        srna = sRNADetection(args_srna)
        srna.run_srna_detection(args_srna)

    def sorf_detection(self):
        """sORF_detection."""
        print("Running sORF prediction...")
        self.check_folder([
            self._args.transcript_assembly_folder,
            self._args.annotation_folder, self._args.fasta_folder
        ])
        self.check_no_require_folder(
            [self._args.sRNA_folder, self._args.TSS_folder])
        project_creator.create_subfolders(self._paths.required_folders("sorf"))
        args_sorf = self.args_container.container_sorf(
            self._paths.sorf_folder, self._args.UTR_derived_sORF,
            self._args.transcript_assembly_folder,
            self._args.annotation_folder, self._args.TSS_folder,
            self._args.utr_length, self._args.min_length,
            self._args.max_length, self._args.tex_wig_folder,
            self._args.frag_wig_folder, self._args.cutoff_intergenic_coverage,
            self._args.cutoff_antisense_coverage,
            self._args.cutoff_5utr_coverage, self._args.cutoff_3utr_coverage,
            self._args.cutoff_interCDS_coverage, self._args.fasta_folder,
            self._args.tex_notex_libs, self._args.frag_libs,
            self._args.tex_notex, self._args.replicates_tex,
            self._args.replicates_frag, self._args.table_best,
            self._args.sRNA_folder, self._args.start_codon,
            self._args.stop_codon, self._args.cutoff_background,
            self._args.fuzzy_rbs, self._args.rbs_not_after_TSS,
            self._args.print_all_combination, self._args.best_no_sRNA,
            self._args.best_no_TSS, self._args.ignore_hypothetical_protein,
            self._args.min_rbs_distance, self._args.max_rbs_distance)
        sorf = sORFDetection(args_sorf)
        sorf.run_sorf_detection(args_sorf)

    def meme(self):
        """promoter detectopn"""
        print("Running promoter detection...")
        self.check_folder([self._args.TSS_folder, self._args.fasta_folder])
        if not self._args.TSS_source:
            self.check_folder([self._args.annotation_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("promoter"))
        args_pro = self.args_container.container_promoter(
            self._args.MEME_path, self._paths.promoter_output_folder,
            self._args.tex_libs, self._args.TSS_folder,
            self._args.fasta_folder, self._args.num_motif,
            self._args.nt_before_TSS, self._args.motif_width,
            self._args.TSS_source, self._args.tex_wig_path,
            self._args.annotation_folder, self._args.combine_all,
            self._args.e_value)
        meme = MEME(args_pro)
        meme.run_meme(args_pro)

    def operon(self):
        """operon detection"""
        print("Running operon detection...")
        self.check_folder([
            self._args.TSS_folder, self._args.annotation_folder,
            self._args.transcript_folder, self._args.UTR5_folder,
            self._args.UTR3_folder
        ])
        self.check_no_require_folder([self._args.term_folder])
        project_creator.create_subfolders(
            self._paths.required_folders("operon"))
        args_op = self.args_container.container_operon(
            self._args.TSS_folder, self._args.annotation_folder,
            self._args.transcript_folder, self._args.UTR5_folder,
            self._args.UTR3_folder, self._args.term_folder,
            self._args.TSS_fuzzy, self._args.term_fuzzy, self._args.min_length,
            self._args.statistics, self._paths.operon_output_folder,
            self._args.combine_gff, self._paths.operon_statistics_folder)
        operon = OperonDetection(args_op)
        operon.run_operon(args_op)

    def circrna(self):
        """circRNA detection"""
        print("Running circular RNA prediction...")
        self.check_folder([self._args.fasta_path, self._args.annotation_path])
        self.check_no_require_folder(
            [self._args.tex_bam_path, self._args.fragmented_bam_path])
        project_creator.create_subfolders(
            self._paths.required_folders("circrna"))
        args_circ = self.args_container.container_circrna(
            self._args.align, self._args.process, self._args.fasta_path,
            self._args.annotation_path, self._args.tex_bam_path,
            self._args.fragmented_bam_path, self._paths.read_folder,
            self._paths.circrna_stat_folder, self._args.support_reads,
            self._args.segemehl_folder, self._args.samtools_path,
            self._args.start_ratio, self._args.end_ratio,
            self._args.ignore_hypothetical_protein,
            self._paths.circrna_output_folder)
        circ = CircRNADetection(args_circ)
        circ.run_circrna(args_circ)

    def goterm(self):
        """Go term discovery"""
        print("Running GO term mapping...")
        self.check_folder([self._args.annotation_path])
        self.check_no_require_folder([self._args.transcript_path])
        self.check_file(
            [self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo],
            ["--UniProt_id", "--go.obo", "--goslim_obo"], True)
        project_creator.create_subfolders(
            self._paths.required_folders("go_term"))
        args_go = self.args_container.container_goterm(
            self._args.annotation_path, self._paths.goterm_output_folder,
            self._args.UniProt_id, self._args.go_obo, self._args.goslim_obo,
            self._args.transcript_path)
        goterm = GoTermFinding(args_go)
        goterm.run_go_term(args_go)

    def srna_target(self):
        """sRNA target prediction"""
        print("Running sRNA target prediction...")
        self.check_folder([
            self._args.fasta_path, self._args.sRNA_path,
            self._args.annotation_path
        ])
        project_creator.create_subfolders(
            self._paths.required_folders("srna_target"))
        args_tar = self.args_container.container_srna_target(
            self._args.Vienna_folder, self._args.annotation_path,
            self._args.fasta_path, self._args.sRNA_path, self._args.query_sRNA,
            self._args.program, self._args.interaction_length,
            self._args.window_size_target, self._args.span_target,
            self._args.window_size_srna, self._args.span_srna,
            self._args.unstructured_region_RNAplex_target,
            self._args.unstructured_region_RNAplex_srna,
            self._args.unstructured_region_RNAup, self._args.energy_threshold,
            self._args.duplex_distance, self._args.top,
            self._paths.starget_output_folder, self._args.process_rnaplex,
            self._args.process_rnaup, self._args.continue_rnaup,
            self._args.potential_target_start, self._args.potential_target_end,
            self._args.target_feature)
        srnatarget = sRNATargetPrediction(args_tar)
        srnatarget.run_srna_target_prediction(args_tar)

    def snp(self):
        """SNP transcript detection"""
        print("Running SNP/mutations calling...")
        self.check_folder([self._args.fasta_path])
        if (self._args.bam_type != "target") and (self._args.bam_type !=
                                                  "reference"):
            print("Error: please assign \"target\" or"
                  " \"reference\" to --bam_type!!")
            sys.exit()
        if (self._args.ploidy != "haploid") and (self._args.ploidy !=
                                                 "diploid"):
            print("Error: please assign \"haploid\" or"
                  " \"diploid\" to --chromosome_type!!")
        project_creator.create_subfolders(self._paths.required_folders("snp"))
        args_snp = self.args_container.container_snp(
            self._args.samtools_path, self._args.bcftools_path,
            self._args.bam_type, self._args.program, self._args.fasta_path,
            self._args.tex_bam_path, self._args.frag_bam_path,
            self._args.quality, self._args.read_depth,
            self._paths.snp_output_folder, self._args.indel_fraction,
            self._args.ploidy)
        snp = SNPCalling(args_snp)
        snp.run_snp_calling(args_snp)

    def ppi(self):
        """PPI network retrieve"""
        print("Running protein-protein interaction networks prediction...")
        self.check_folder([self._args.gff_path])
        self.check_parameter(
            [self._args.proteinID_strains, self._args.species_STRING],
            ["--proteinID_strains", "--species_STRING"])
        project_creator.create_subfolders(
            self._paths.required_folders("ppi_network"))
        args_ppi = self.args_container.container_ppi(
            self._args.gff_path, self._args.proteinID_strains,
            self._args.without_strain_pubmed, self._args.species_STRING,
            self._args.score, self._paths.ppi_output_folder,
            self._args.node_size, self._args.query)
        ppi = PPINetwork(self._paths.ppi_output_folder)
        ppi.retrieve_ppi_network(args_ppi)

    def sublocal(self):
        """Subcellular Localization prediction"""
        print("Running subcellular localization prediction...")
        self.check_folder([self._args.gff_path, self._args.fasta_path])
        self.check_no_require_folder([self._args.transcript_path])
        if (self._args.bacteria_type !=
                "positive") and (self._args.bacteria_type != "negative"):
            print("Error: please assign \"positive\" or"
                  " \"negative\" to --bacteria_type!!")
            sys.exit()
        project_creator.create_subfolders(
            self._paths.required_folders("subcellular_localization"))
        args_sub = self.args_container.container_sublocal(
            self._args.Psortb_path, self._args.gff_path, self._args.fasta_path,
            self._args.bacteria_type, self._args.difference_multi,
            self._args.merge_to_gff, self._paths.sublocal_output_folder,
            self._args.transcript_path)
        sublocal = SubLocal(args_sub)
        sublocal.run_sub_local(args_sub)

    def ribos(self):
        """riboswitch prediction"""
        print("Running riboswitch prediction...")
        self.check_folder([
            self._args.gff_path, self._args.fasta_path, self._args.tss_path,
            self._args.transcript_path
        ])
        self.check_file([self._args.riboswitch_ID, self._args.Rfam],
                        ["--riboswitch_ID", "--Rfam"], True)
        project_creator.create_subfolders(
            self._paths.required_folders("riboswitch"))
        args_ribo = self.args_container.container_ribos(
            self._args.infernal_path, self._args.riboswitch_ID,
            self._args.gff_path, self._args.fasta_path, self._args.tss_path,
            self._args.transcript_path, self._args.Rfam,
            self._paths.ribos_output_folder, self._args.e_value,
            self._args.output_all, self._paths.database_folder,
            self._args.fuzzy, self._args.start_codon, self._args.min_dist_rbs,
            self._args.max_dist_rbs, self._args.fuzzy_rbs,
            self._args.UTR_length)
        ribos = Ribos(args_ribo)
        ribos.run_ribos(args_ribo)

    def screen(self):
        """generate screenshot"""
        print("Running screenshot generating...")
        self.check_file([self._args.main_gff, self._args.fasta],
                        ["--main_gff", "--fasta"], True)
        if self._args.side_gffs is not None:
            for gff in (self._args.side_gffs.split(",")):
                gff = gff.strip()
                if not os.path.isfile(gff):
                    print("Error: The --side_gffs no exist!!")
                    sys.exit()
        if self._args.output_folder is None:
            print("Error: please assign --output_folder!!")
            sys.exit()
        if (self._args.present != "expand") and (
                self._args.present != "collapse") and (self._args.present !=
                                                       "squish"):
            print("Error: please assign \"expand\" or "
                  "\"collapse\" or \"squish\" to --present!!")
            sys.exit()
        args_sc = self.args_container.container_screen(
            self._args.main_gff, self._args.side_gffs, self._args.fasta,
            self._args.frag_wig_folder, self._args.tex_wig_folder,
            self._args.height, self._args.tex_libs, self._args.frag_libs,
            self._args.present, self._args.output_folder)
        screen = Screen(args_sc)
        screen.screenshot(args_sc)