Пример #1
0
    def mutect_caller(self, control_bam ,out_dir):
        """
        Ret:Use GATK Mutect to conduct Variant Discovery Step.  
        """
        config_dict = copy.deepcopy(self.config_dict)
        config_dict = set_jdk(config_dict, "jdk_17")
        java = config_dict["java"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["dbsnp"]
        cosmic = config_dict["cosmic"]
        intervals = config_dict["intervals"]
        tmp_dir = config_dict["gatk_tmp_dir"]
        mutect = config_dict["mutect"]
        extra_option = config_dict["bamfile_mutect_caller_extra"]
        create_dir(out_dir)
        info("Running Mutect step for " + self.path and control_bam)
        snp_flag = dbsnp != ""
        intervals_flag = intervals != ""
        out_vcf = out_dir + "/" + self.samplename + ".vcf"
        tmp = out_vcf + ".tmp"
        out_vcf = VcfFile(out_vcf,self.samplename, config_dict)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if isexist(tmp) and not out_vcf.isexist():
            runcmd("grep -v \'REJECT\' %s > %s" % (tmp, out_vcf.path))
        cmd = "%s -jar %s  -T MuTect -R %s -I:tumor %s -I:normal %s \
                --cosmic %s \
                %s \
                -o %s "\
                % (java, mutect, reffa, self.path, control_bam, cosmic, extra_option, tmp)
        if self.isexist():
            if not out_vcf.isexist() and not isexist(tmp):
                if snp_flag and intervals_flag :
                    cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) 
                elif snp_flag and not intervals_flag:
                    cmd = cmd + " --dbsnp %s" %(dbsnp) 
                elif not snp_flag and intervals_flag:
                    cmd = cmd + " --intervals %s" %(intervals) 
                
                log = " &> %s/log/%s.case.Mutect_caller.log" % (os.getcwd(), self.runid)
                cmd = cmd + log

                cmd = cmd + " && grep -v \'REJECT\' %s > %s" % (tmp, out_vcf.path)
                runcmd(cmd)
                savecmd(cmd, self.samplename)
                if not out_vcf.isexist():
                    return(False)
            else:
                savecmd(cmd, self.samplename)
            config_dict = set_jdk(config_dict, "jdk_18")
            return(out_vcf) # VcfFile Class instance
        else:
            config_dict = set_jdk(config_dict, "jdk_18")
            info("Bam File not exists, can not conduct mutect_caller step!")
            return(False)
Пример #2
0
    def freebayes_caller(self, out_dir, control_bam=""):
        """
        Ret:Use Freebayes to conduct Variant Discovery Step.  
        """
        config_dict = self.config_dict
        java = config_dict["java"]
        freebayes = config_dict["freebayes"]
        reffa = config_dict["reffa"]
        intervals = config_dict["intervals"]
        extra_option = config_dict["bamfile_freebayes_caller_extra"]
        create_dir(out_dir)
        def setcmd(bamfile, out_vcf, backrun=False):
            cmd = "%s -f %s %s" \
                      % (freebayes, reffa, extra_option)
            if intervals_flag:
                cmd = cmd + " -t %s" %(intervals) 
            if backrun:
                cmd = cmd + " &"
            cmd = cmd + bamfile + " > " + out_vcf
            return(cmd)
        intervals_flag = intervals != ""
        out_vcf = out_dir + "/" + self.samplename + ".vcf"
        out_vcf = VcfFile(out_vcf, self.samplename, config_dict)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if control_bam != "" and isexist(control_bam):
            info("Running frebayes_caller step for " + self.path + " and " + control_bam)
            out_case_vcf = VcfFile(out_vcf.path + ".case", self.samplename, config_dict)
            out_control_vcf = VcfFile(out_vcf.path + ".control" ,self.samplename, config_dict)
            case_cmd = setcmd(self.path, out_case_vcf.path)
            control_cmd = setcmd(control_bam, out_control_vcf.path)
            if self.isexist() and isexist(control_bam):
                if not out_vcf.isexist():
                    threads = []
                    if not out_case_vcf.isexist():
                        def func(cmd = case_cmd):
                            runcmd(cmd)
                        t1 = threading.Thread(target = func)
                        threads.append(t1)
                        savecmd(case_cmd, self.samplename)
                    if not out_control_vcf.isexist():
                        def func(cmd = control_cmd):
                            runcmd(cmd)
                        t2 = threading.Thread(target = func)
                        threads.append(t2)
                        savecmd(control_cmd, self.samplename)
                    for t in threads:
                        t.setDaemon(True)
                        t.start()

                    for t in threads:
                        t.join()

                    if not out_case_vcf.isexist() or not out_control_vcf.isexist():
                        return(False)
                    out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path)
                    if not out_vcf.isexist(): 
                        return(False)
                else:
                    savecmd(case_cmd, self.samplename)
                    savecmd(control_cmd, self.samplename)
                    out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path)
                    if not out_vcf.isexist(): 
                        return(False)
                return(out_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct unifiedgenotyper_caller step!")
                return(False)
        else:
            info("Running freebayes_caller step for " + self.path)
            cmd = setcmd(self.path, out_vcf.path)

            if self.isexist():
                if not out_vcf.isexist():
                    runcmd(cmd)
                    savecmd(cmd, self.samplename)
                    if not out_vcf.isexist():
                        return(False)
                else:
                    savecmd(cmd, self.samplename)
                return(out_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct freebayes_caller step!")
Пример #3
0
    def pindel_caller(self, out_dir, control_bam=""):
        """
        Ret:Use Pindel to conduct SVs Discovery Step.  
        """
        config_dict = self.config_dict
        reffa = config_dict["reffa"]
        pindel_dir = config_dict["pindel_dir"]
        thread = config_dict["bamfile_pindel_caller_thread"]
        genome_name = config_dict["bamfile_pindel_genome_name"]
        genome_date = config_dict["bamfile_pindel_genome_date"]
        insertsize = config_dict["bamfile_pindel_insertsize"]
        create_dir(out_dir)

        pindel = pindel_dir + "/pindel"
        pindel2vcf4tcga =  pindel_dir + "/pindel2vcf4tcga"
        def __pindelout2vcf(datadir, prefix, out_vcf):
            out_type_list = ["_D","_BP","_SI","_INV","_TD","_LI","_BP"]
            out_fnlist = [ prefix + i for i in out_type_list]
            fn = FundementalFile("/dev/null")
            if not isexist(out_vcf + ".pindelout"):
                fn.catmerge(out_fnlist, out_vcf + ".pindelout")
            cmd = "%s -p %s -r %s -R %s -d %s -v %s -G -so true" \
                    %(pindel2vcf4tcga, out_vcf + ".pindelout", reffa, genome_name, genome_date, out_vcf)
            if not isexist(out_vcf):
                runcmd(cmd)
            savecmd(cmd, self.samplename)
        info("Running Pindel step for " + self.path)
        runed_vcf = VcfFile(out_dir + "/" + self.samplename + ".vcf", self.samplename, config_dict)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        config_case = out_dir + "/pindel.case.config"
        config_casefn = open(config_case,"w")
        config_casefn.write(self.path + "\t" + insertsize + "\t" + self.samplename + "\n")
        config_casefn.flush()
        out_case = out_dir + "/" + self.samplename + ".case"
        config_control = out_dir + "/pindel.control.config"
        config_controlfn = open(config_control,"w")
        config_controlfn.write(control_bam + "\t" + insertsize + "\t" + self.samplename + "\n")
        config_controlfn.flush()
        out_control = out_dir + "/" + self.samplename + ".control"
        if self.isexist():
            case_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_case, thread, out_case) 
            case_cmd = case_cmd + " &> %s/log/%s.case.pindel_caller.log" % (os.getcwd(), self.runid)
            control_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_control, thread, out_control) 
            control_cmd = control_cmd + " &> %s/log/%s.control.pindel_caller.log" % (os.getcwd(), self.runid)
        else:
            out_case = out_dir + "/" + self.samplename + ".case"
            case_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_case, thread, out_case) 
            case_cmd = case_cmd + " &> %s/log/%s.case.pindel_caller.log" % (os.getcwd(), self.runid)
        if self.isexist():
            if control_bam != "" and isexist(control_bam):
                if not isexist(out_case + "_D"):
                    runcmd(case_cmd)
                savecmd(case_cmd, self.samplename)
                if not isexist(out_control + "_D"):
                    runcmd(control_cmd)
                savecmd(control_cmd, self.samplename)
                out_case_vcf = VcfFile(out_case + ".vcf", self.samplename, config_dict)
                out_control_vcf = VcfFile(out_control + ".vcf", self.samplename, config_dict)
                __pindelout2vcf(out_dir, out_case, out_case_vcf.path)
                __pindelout2vcf(out_dir, out_control, out_control_vcf.path)
                out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path)
            else:
                if not isexist(out_case + "_D"):
                    runcmd(case_cmd)
                savecmd(case_cmd, self.samplename)
                out_case_vcf = VcfFile(out_case + ".vcf", self.samplename, config_dict)
                __pindelout2vcf(out_dir, out_case, out_case_vcf.path)
                out_case_vcf.mv(runed_vcf.path)  
            if runed_vcf.isexist():
                return(runed_vcf)
            else:
                return(False)
                info("Pindel VariantCaller run fail!")
        else:
            info("Bam File not exists, can not conduct Pindel step!")
            return(False)
Пример #4
0
    def lofreq_caller(self, out_dir, control_bam = ""):
        """
        Ret:Use lofreq to conduct Variant Discovery Step.  
        """
        config_dict = self.config_dict
        java = config_dict["java"]
        gatk = config_dict["gatk"]
        lofreq = config_dict["lofreq"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["lofreq_dbsnp"]
        intervals = config_dict["intervals"]
        thread = config_dict["bamfile_lofreq_caller_thread"]
        extra_option_germline = config_dict["bamfile_lofreq_caller_extra_germline"]
        extra_option_somatic = config_dict["bamfile_lofreq_caller_extra_somatic"]
        create_dir(out_dir)

        info("Running Lofreq_caller step for " + self.path)
        out_fn = out_dir + "/" + self.samplename + "_"
        out_snp_vcf = out_dir + "/" + self.samplename + "_somatic_final.snvs.vcf"
        out_indel_vcf = out_dir + "/" + self.samplename + "_somatic_final.indels.vcf"
        runed_vcf = out_dir + "/" + self.samplename + ".vcf"
        runed_vcf = VcfFile(runed_vcf,self.samplename, config_dict)
        out_snp_vcf = VcfFile(out_snp_vcf, self.samplename, config_dict, runid = self.runid + ".Lofreq") 
        out_indel_vcf = VcfFile(out_indel_vcf, self.samplename, config_dict, runid = self.runid + ".Lofreq") 
        out_snp_vcfgz = FundementalFile(out_snp_vcf.path + ".gz") 
        out_indel_vcfgz = FundementalFile(out_indel_vcf.path + ".gz") 
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if control_bam != "" and isexist(control_bam):
            cmd = "%s somatic -n %s -t %s -f %s -d %s --threads %s --call-indels -o %s %s " \
                % (lofreq, control_bam, self.path, reffa, dbsnp, thread, out_fn, extra_option_somatic)
            if intervals != "" and isexist(intervals):
                cmd = cmd + " -l %s"%(intervals)
        else:
            cmd = "%s call-parallel --pp-threads %s -f %s --call-indels -o %s %s " %(lofreq, thread, reffa, runed_vcf, extra_option_germline)
            if intervals != "" and isexist(intervals):
                cmd = cmd + " -l %s %s"%(intervals, self.path)
            else:
                cmd = cmd + self.path
        cmd = cmd + " &> %s/log/%s.case.lofreq_caller.log" % (os.getcwd(), self.runid)
        if self.isexist():
            if control_bam == "" or (not isexist(control_bam)):
                runcmd(cmd)
                savecmd(cmd, self.samplename)
            else:
                if out_snp_vcfgz.isexist() and not out_snp_vcf.isexist():
                    out_snp_vcfgz.gzip_uncompress()
                if out_indel_vcfgz.isexist() and not out_indel_vcf.isexist():
                    out_indel_vcfgz.gzip_uncompress()
                if not runed_vcf.isexist() and out_snp_vcf.isexist() and out_indel_vcf.isexist():
                    out_snp_vcf.merge(runed_vcf, indelvcf = out_indel_vcf.path)
                if not runed_vcf.isexist():
                    runcmd(cmd)
                    savecmd(cmd, self.samplename)
                    out_snp_vcfgz.gzip_uncompress()
                    out_indel_vcfgz.gzip_uncompress()
                    out_snp_vcf.merge(runed_vcf, indelvcf = out_indel_vcf.path)
            if runed_vcf.isexist():
                return(runed_vcf)
            else:
                return(False)
        else:
            info("Bam File not exists, can not conduct lofreq_caller step!")
            return(False)
Пример #5
0
    def torrent_caller(self, out_dir, control_bam=""):
        """
        Ret:Use TVC-5.0.3 to conduct Variant Discovery Step.  
        """
        config_dict = self.config_dict
        java = config_dict["java"]
        gatk = config_dict["gatk"]
        tvc = config_dict["tvc"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["dbsnp"]
        intervals = config_dict["intervals"]
        tmp_dir = config_dict["tvc_tmp_dir"]
        thread = config_dict["bamfile_torrent_caller_thread"]
        extra_option = config_dict["bamfile_torrent_caller_extra"]
        json = config_dict["tvc_params_json"]
        create_dir(out_dir)

        runed_vcf = out_dir + "/" + self.samplename + ".vcf"
        runed_vcf = VcfFile(runed_vcf,self.samplename. config_dict)
        def setcmd(bamfile, reffa, out_dir, json ="", backrun=False):
            cmd = "%s -i %s -r %s -o %s %s " \
                      % (tvc, bamfile, reffa, out_dir, extra_option)
            if json != "":
                cmd = cmd + " -p %s" %(json)
            if backrun:
                cmd = cmd + " &"
            return(cmd)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if control_bam != "" and isexist(control_bam):
            info("Running TorrentVariantCaller step for " + self.path + " and " + control_bam)
            out_case_vcf = VcfFile(out_dir + "/case/TSVC_variants.vcf", self.samplename, config_dict)
            out_control_vcf = VcfFile(out_dir + "/control/TSVC_variants.vcf" ,self.samplename, config_dict)
            case_cmd = setcmd(self.path, reffa, out_case_vcf.dirname, json)
            case_cmd = case_cmd + " &> %s/log/%s.case.torrent_caller.log" % (os.getcwd(), self.runid)
            control_cmd = setcmd(control_bam, reffa, out_control_vcf.dirname, json)
            control_cmd = control_cmd + " &> %s/log/%s.control.torrent_caller.log" % (os.getcwd(), self.runid)
            if self.isexist() and isexist(control_bam):
                if not runed_vcf.isexist():
                    if not out_case_vcf.isexist():
                        runcmd(case_cmd)
                        savecmd(case_cmd, self.samplename)
                    if not out_control_vcf.isexist():
                        runcmd(control_cmd)
                        savecmd(control_cmd, self.samplename)
                    if not out_case_vcf.isexist() or not out_control_vcf.isexist():
                        return(False)
                    out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path)
                    if not runed_vcf.isexist(): 
                        return(False)
                else:
                    savecmd(case_cmd, self.samplename)
                    savecmd(control_cmd, self.samplename)
                    out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path)
                    if not runed_vcf.isexist(): 
                        return(False)
                return(runed_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct TorrentVariantCaller step!")
                return(False)
        else:
            info("Running TorrentVariantCaller step for " + self.path)
            out_vcf= out_dir + "/TSVC_variants.vcf"
            out_vcf = VcfFile(out_vcf, self.samplename, config_dict) 
            cmd = setcmd(self.path, reffa, out_dir, json)
            if out_vcf.isexist():
                out_vcf.mv(runed_vcf.path)
            if self.isexist():
                if not runed_vcf.isexist():
                    runcmd(cmd)
                    savecmd(cmd, self.samplename)
                    if out_vcf.isexist():
                        if not out_vcf.mv(runed_vcf.path):
                            return(False)
                    else:
                        return(False)
                else:
                    savecmd(cmd, self.samplename)
                return(runed_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct TorrentVariantCaller step!")
                return(False)
Пример #6
0
    def varscan_caller(self, out_dir="", control_bam = ""):
        config_dict = self.config_dict
        java = config_dict["java"]
        gatk = config_dict["gatk"]
        varscan = config_dict["varscan"]
        samtools = config_dict["samtools"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["dbsnp"]
        java_max_mem = config_dict["java_max_mem"]
        extra_option_somatic = config_dict["bamfile_varscan_caller_extra_somatic"]
        extra_option_germline = config_dict["bamfile_varscan_caller_extra_germline"]
        create_dir(out_dir)
        info("Running Varscan_caller step for " + self.path)
        out_snp_vcf = out_dir + "/" + self.samplename + ".snp.vcf"
        out_snp_vcf = VcfFile(out_snp_vcf, self.samplename, config_dict, runid = self.runid + ".Varscan")
        out_indel_vcf = out_dir + "/" + self.samplename + ".indel.vcf"
        out_indel_vcf = VcfFile(out_indel_vcf, self.samplename, config_dict, runid = self.runid + ".Varscan")
        out_vcf = out_dir + "/" + self.samplename + ".vcf"
        out_vcf = VcfFile(out_vcf, self.samplename, config_dict)
        case_bam = BamFile(self.path, self.samplename, config_dict)
        control_bam = BamFile(control_bam, self.samplename, config_dict)
        cmd = ""
        if self.isexist():
            if not out_vcf.isexist() and (not out_snp_vcf.isexist() or not out_indel_vcf.isexist()):
                case_mpileup_fn = MpileupFile(out_dir + "/" + self.samplename + ".mpileup.case", self.samplename, config_dict)
                control_mpileup_fn = MpileupFile(out_dir + "/" + self.samplename + ".mpileup.control", self.samplename, config_dict)
                threads = []
                if control_bam.path != "" and control_bam.isexist():
                    def func(case_bam = case_bam, case_mpileup_fn = case_mpileup_fn):
                        case_bam.mpileup(case_mpileup_fn.path)
                    t1 = threading.Thread(target = func)
                    def func(control_bam = control_bam, control_mpileup_fn = control_mpileup_fn):
                        control_bam.mpileup(control_mpileup_fn.path)
                    t2 = threading.Thread(target = func)
                    threads.append(t1)
                    threads.append(t2)
                    for t in threads:
                        t.setDaemon(True)
                        t.start()

                    for t in threads:
                        t.join()
                    cmd = "%s -Xmx%s -jar %s somatic %s %s --output-snp %s --output-indel %s --output-vcf %s"\
                        %(java, java_max_mem, varscan, case_mpileup_fn.path, control_mpileup_fn.path, out_snp_vcf.path, out_indel_vcf.path, extra_option_somatic)
                    log = " &> %s/log/%s.case.Varscan_caller.log" % (os.getcwd(), self.runid)
                    cmd = cmd + log
                    runcmd(cmd)
                    savecmd(cmd, self.samplename)
                else:
                    case_bam.mpileup(case_mpileup_fn.path)
                    snpcmd = "%s -Xmx%s -jar %s mpileup2snp %s --output-vcf 1 %s > %s"\
                        %(java, java_max_mem, varscan, case_mpileup_fn.path, extra_option_germline, out_snp_vcf.path)
                    indelcmd = "%s -Xmx%s -jar %s mpileup2indel %s --output-vcf 1 %s > %s"\
                        %(java, java_max_mem, varscan, case_mpileup_fn.path, extra_option_germline, out_indel_vcf.path)
                    snpcmd = snpcmd + " 2> %s/log/%s.case.Varscan_caller_snp.log" % (os.getcwd(), self.runid)
                    indelcmd = indelcmd + " 2> %s/log/%s.case.Varscan_caller_indel.log" % (os.getcwd(), self.runid)
                    t1 = threading.Thread(target = runcmd(snpcmd))
                    t2 = threading.Thread(target = runcmd(indelcmd))
                    threads.append(t1)
                    threads.append(t2)
                    savecmd(snpcmd, self.samplename)
                    savecmd(indelcmd, self.samplename)
                    for t in threads:
                        t.setDaemon(True)
                        t.start()

                    for t in threads:
                        t.join()
                if not out_snp_vcf.isexist() or not out_indel_vcf.isexist():
                    return(False)
                else:
                    out_snp_vcf.varscan2gatkfmt()
                    out_indel_vcf.varscan2gatkfmt()
                    out_snp_vcf.merge(out_vcf.path, indel=out_indel_vcf.path)
            else:
                savecmd(cmd, self.samplename)
                out_snp_vcf.varscan2gatkfmt()
                out_indel_vcf.varscan2gatkfmt()
                out_snp_vcf.merge(out_vcf.path, indel=out_indel_vcf.path)
            return(out_vcf) # VcfFile Class instance
        else:
            info("Bam File not exists, can not conduct varscan_caller step!")
            return(False)
Пример #7
0
    def unifiedgenotyper_caller(self, out_dir, control_bam = ""):
        """
        Ret:Use GATK UnifiedGenotyper to conduct Variant Discovery Step.  
        """
        config_dict = self.config_dict
        java = config_dict["java"]
        gatk = config_dict["gatk"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["dbsnp"]
        intervals = config_dict["intervals"]
        thread = config_dict["bamfile_unifiedgenotyper_caller_thread"]
        extra_option = config_dict["bamfile_unifiedgenotyper_caller_extra"]
        tmp_dir = config_dict["gatk_tmp_dir"]
        java_max_mem = config_dict["java_max_mem"]
        create_dir(out_dir)
        def setcmd(bamfile, out_vcf, backrun=False):
            cmd = "%s -Xmx%s -Djava.io.tmpdir=%s -jar %s -R %s %s -nt %s \
                      -T UnifiedGenotyper \
                      -I %s -o %s "\
                      % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option, thread, bamfile, out_vcf)
            if snp_flag and intervals_flag :
                cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) 
            elif snp_flag and not intervals_flag:
                cmd = cmd + " --dbsnp %s" %(dbsnp) 
            elif not snp_flag and intervals_flag:
                cmd = cmd + " --intervals %s" %(intervals) 
            if backrun:
                cmd = cmd + " &"
            return(cmd)
        snp_flag = dbsnp != ""
        intervals_flag = intervals != ""
        out_vcf = out_dir + "/" + self.samplename + ".vcf"
        out_vcf = VcfFile(out_vcf, self.samplename, config_dict)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if control_bam != "" and isexist(control_bam):
            info("Running Unifiedgenotyper_caller step for " + self.path + " and " + control_bam)
            out_case_vcf = VcfFile(out_vcf.path + ".case", self.samplename, config_dict)
            out_control_vcf = VcfFile(out_vcf.path + ".control" ,self.samplename, config_dict)
            case_cmd = setcmd(self.path, out_case_vcf.path)
            log = " &> %s/log/%s.case.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid)
            case_cmd = case_cmd + log
            control_cmd = setcmd(control_bam, out_control_vcf.path)
            log = " &> %s/log/%s.control.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid)
            control_cmd = control_cmd + log
            if self.isexist() and isexist(control_bam):
                if not out_vcf.isexist():
                    threads = []
                    if not out_case_vcf.isexist():
                        def func(cmd = case_cmd):
                            runcmd(cmd)
                        t1 = threading.Thread(target = func)
                        threads.append(t1)
                        savecmd(case_cmd, self.samplename)
                    if not out_control_vcf.isexist():
                        def func(cmd = control_cmd):
                            runcmd(cmd)
                        t2 = threading.Thread(target = func)
                        threads.append(t2)
                        savecmd(control_cmd, self.samplename)
                    for t in threads:
                        t.setDaemon(True)
                        t.start()

                    for t in threads:
                        t.join()

                    if not out_case_vcf.isexist() or not out_control_vcf.isexist():
                        return(False)
                    out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path)
                    if not out_vcf.isexist(): 
                        return(False)
                else:
                    savecmd(case_cmd, self.samplename)
                    savecmd(control_cmd, self.samplename)
                    out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path)
                    if not out_vcf.isexist(): 
                        return(False)
                return(out_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct unifiedgenotyper_caller step!")
                return(False)
        else:
            info("Running Unifiedgenotyper_caller step for " + self.path)
            cmd = setcmd(self.path, out_vcf.path)
            log = " &> %s/log/%s.case.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid)
            cmd = cmd + log

            if self.isexist():
                if not out_vcf.isexist():
                    runcmd(cmd)
                    savecmd(cmd, self.samplename)
                    if not out_vcf.isexist():
                        return(False)
                else:
                    savecmd(cmd, self.samplename)
                return(out_vcf) # VcfFile Class instance
            else:
                info("Bam File not exists, can not conduct unifiedgenotyper_caller step!")
                return(False)
Пример #8
0
    def haplotype_caller(self, out_dir, control_bam = "", seq_type="dna"):
        """
        Ret:Use GATK HaplotypeCaller to conduct Variant Discovery Step.  
        """
        config_dict = self.config_dict
        intervals = config_dict["intervals"]
        java = config_dict["java"]
        gatk = config_dict["gatk"]
        reffa = config_dict["reffa"]
        dbsnp = config_dict["dbsnp"]
        tmp_dir = config_dict["gatk_tmp_dir"]
        extra_option_rna = config_dict["bamfile_haplotype_caller_extra_rna"]
        extra_option_dna = config_dict["bamfile_haplotype_caller_extra_dna"]
        java_max_mem = config_dict["java_max_mem"]
        info("Running Haplotype_caller step for " + self.path)
        snp_flag = dbsnp != ""
        intervals_flag = intervals != ""
        create_dir(out_dir)
        out_vcf = out_dir + "/" + self.samplename + ".vcf"
        out_vcf = VcfFile(out_vcf,self.samplename, config_dict)
        if isinstance(control_bam, BamFile):
            control_bam = control_bam.path
        if control_bam != "" and isexist(control_bam):
            if seq_type == "dna":
                cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \
                      -jar %s -R %s \
                      -T HaplotypeCaller \
                      %s \
                      -I %s -I %s -o %s "\
                    % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_dna, self.path, control_bam, out_vcf.path)
            else:
                cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \
                      -jar %s -R %s \
                      -T HaplotypeCaller \
                      %s \
                      -I %s -I %s -o %s "\
                    % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_rna, self.path, control_bam, out_vcf.path)

        else:
            if seq_type == "dna":
                cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \
                      -jar %s -R %s \
                      -T HaplotypeCaller \
                      %s \
                      -I %s -o %s"\
                     % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_dna, self.path, out_vcf.path)
            else:
                cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \
                      -jar %s -R %s \
                      -T HaplotypeCaller \
                      %s \
                      -I %s -o %s"\
                     % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_rna, self.path, out_vcf.path)

        if snp_flag and intervals_flag :
            cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) 
        elif snp_flag and not intervals_flag:
            cmd = cmd + " --dbsnp %s" %(dbsnp) 
        elif not snp_flag and intervals_flag:
            cmd = cmd + " --intervals %s" %(intervals) 
        log = " &> %s/log/%s.case.Haplotypecaller.log" % (os.getcwd(), self.runid)
        cmd = cmd + log
        if self.isexist():
            if not out_vcf.isexist():
                runcmd(cmd)
                savecmd(cmd, self.samplename)
                if not out_vcf.isexist():
                    return(False)
            else:
                savecmd(cmd , self.samplename)
            return(out_vcf) # VcfFile Class instance
        else:
            info("Bam File not exists, can not conduct haplotype_caller step!")
            return(False)