def mutect_caller(self, control_bam ,out_dir): """ Ret:Use GATK Mutect to conduct Variant Discovery Step. """ config_dict = copy.deepcopy(self.config_dict) config_dict = set_jdk(config_dict, "jdk_17") java = config_dict["java"] reffa = config_dict["reffa"] dbsnp = config_dict["dbsnp"] cosmic = config_dict["cosmic"] intervals = config_dict["intervals"] tmp_dir = config_dict["gatk_tmp_dir"] mutect = config_dict["mutect"] extra_option = config_dict["bamfile_mutect_caller_extra"] create_dir(out_dir) info("Running Mutect step for " + self.path and control_bam) snp_flag = dbsnp != "" intervals_flag = intervals != "" out_vcf = out_dir + "/" + self.samplename + ".vcf" tmp = out_vcf + ".tmp" out_vcf = VcfFile(out_vcf,self.samplename, config_dict) if isinstance(control_bam, BamFile): control_bam = control_bam.path if isexist(tmp) and not out_vcf.isexist(): runcmd("grep -v \'REJECT\' %s > %s" % (tmp, out_vcf.path)) cmd = "%s -jar %s -T MuTect -R %s -I:tumor %s -I:normal %s \ --cosmic %s \ %s \ -o %s "\ % (java, mutect, reffa, self.path, control_bam, cosmic, extra_option, tmp) if self.isexist(): if not out_vcf.isexist() and not isexist(tmp): if snp_flag and intervals_flag : cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) elif snp_flag and not intervals_flag: cmd = cmd + " --dbsnp %s" %(dbsnp) elif not snp_flag and intervals_flag: cmd = cmd + " --intervals %s" %(intervals) log = " &> %s/log/%s.case.Mutect_caller.log" % (os.getcwd(), self.runid) cmd = cmd + log cmd = cmd + " && grep -v \'REJECT\' %s > %s" % (tmp, out_vcf.path) runcmd(cmd) savecmd(cmd, self.samplename) if not out_vcf.isexist(): return(False) else: savecmd(cmd, self.samplename) config_dict = set_jdk(config_dict, "jdk_18") return(out_vcf) # VcfFile Class instance else: config_dict = set_jdk(config_dict, "jdk_18") info("Bam File not exists, can not conduct mutect_caller step!") return(False)
def freebayes_caller(self, out_dir, control_bam=""): """ Ret:Use Freebayes to conduct Variant Discovery Step. """ config_dict = self.config_dict java = config_dict["java"] freebayes = config_dict["freebayes"] reffa = config_dict["reffa"] intervals = config_dict["intervals"] extra_option = config_dict["bamfile_freebayes_caller_extra"] create_dir(out_dir) def setcmd(bamfile, out_vcf, backrun=False): cmd = "%s -f %s %s" \ % (freebayes, reffa, extra_option) if intervals_flag: cmd = cmd + " -t %s" %(intervals) if backrun: cmd = cmd + " &" cmd = cmd + bamfile + " > " + out_vcf return(cmd) intervals_flag = intervals != "" out_vcf = out_dir + "/" + self.samplename + ".vcf" out_vcf = VcfFile(out_vcf, self.samplename, config_dict) if isinstance(control_bam, BamFile): control_bam = control_bam.path if control_bam != "" and isexist(control_bam): info("Running frebayes_caller step for " + self.path + " and " + control_bam) out_case_vcf = VcfFile(out_vcf.path + ".case", self.samplename, config_dict) out_control_vcf = VcfFile(out_vcf.path + ".control" ,self.samplename, config_dict) case_cmd = setcmd(self.path, out_case_vcf.path) control_cmd = setcmd(control_bam, out_control_vcf.path) if self.isexist() and isexist(control_bam): if not out_vcf.isexist(): threads = [] if not out_case_vcf.isexist(): def func(cmd = case_cmd): runcmd(cmd) t1 = threading.Thread(target = func) threads.append(t1) savecmd(case_cmd, self.samplename) if not out_control_vcf.isexist(): def func(cmd = control_cmd): runcmd(cmd) t2 = threading.Thread(target = func) threads.append(t2) savecmd(control_cmd, self.samplename) for t in threads: t.setDaemon(True) t.start() for t in threads: t.join() if not out_case_vcf.isexist() or not out_control_vcf.isexist(): return(False) out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path) if not out_vcf.isexist(): return(False) else: savecmd(case_cmd, self.samplename) savecmd(control_cmd, self.samplename) out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path) if not out_vcf.isexist(): return(False) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct unifiedgenotyper_caller step!") return(False) else: info("Running freebayes_caller step for " + self.path) cmd = setcmd(self.path, out_vcf.path) if self.isexist(): if not out_vcf.isexist(): runcmd(cmd) savecmd(cmd, self.samplename) if not out_vcf.isexist(): return(False) else: savecmd(cmd, self.samplename) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct freebayes_caller step!")
def pindel_caller(self, out_dir, control_bam=""): """ Ret:Use Pindel to conduct SVs Discovery Step. """ config_dict = self.config_dict reffa = config_dict["reffa"] pindel_dir = config_dict["pindel_dir"] thread = config_dict["bamfile_pindel_caller_thread"] genome_name = config_dict["bamfile_pindel_genome_name"] genome_date = config_dict["bamfile_pindel_genome_date"] insertsize = config_dict["bamfile_pindel_insertsize"] create_dir(out_dir) pindel = pindel_dir + "/pindel" pindel2vcf4tcga = pindel_dir + "/pindel2vcf4tcga" def __pindelout2vcf(datadir, prefix, out_vcf): out_type_list = ["_D","_BP","_SI","_INV","_TD","_LI","_BP"] out_fnlist = [ prefix + i for i in out_type_list] fn = FundementalFile("/dev/null") if not isexist(out_vcf + ".pindelout"): fn.catmerge(out_fnlist, out_vcf + ".pindelout") cmd = "%s -p %s -r %s -R %s -d %s -v %s -G -so true" \ %(pindel2vcf4tcga, out_vcf + ".pindelout", reffa, genome_name, genome_date, out_vcf) if not isexist(out_vcf): runcmd(cmd) savecmd(cmd, self.samplename) info("Running Pindel step for " + self.path) runed_vcf = VcfFile(out_dir + "/" + self.samplename + ".vcf", self.samplename, config_dict) if isinstance(control_bam, BamFile): control_bam = control_bam.path config_case = out_dir + "/pindel.case.config" config_casefn = open(config_case,"w") config_casefn.write(self.path + "\t" + insertsize + "\t" + self.samplename + "\n") config_casefn.flush() out_case = out_dir + "/" + self.samplename + ".case" config_control = out_dir + "/pindel.control.config" config_controlfn = open(config_control,"w") config_controlfn.write(control_bam + "\t" + insertsize + "\t" + self.samplename + "\n") config_controlfn.flush() out_control = out_dir + "/" + self.samplename + ".control" if self.isexist(): case_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_case, thread, out_case) case_cmd = case_cmd + " &> %s/log/%s.case.pindel_caller.log" % (os.getcwd(), self.runid) control_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_control, thread, out_control) control_cmd = control_cmd + " &> %s/log/%s.control.pindel_caller.log" % (os.getcwd(), self.runid) else: out_case = out_dir + "/" + self.samplename + ".case" case_cmd = "%s -f %s -i %s -c ALL --number_of_threads %s -o %s" %(pindel, reffa, config_case, thread, out_case) case_cmd = case_cmd + " &> %s/log/%s.case.pindel_caller.log" % (os.getcwd(), self.runid) if self.isexist(): if control_bam != "" and isexist(control_bam): if not isexist(out_case + "_D"): runcmd(case_cmd) savecmd(case_cmd, self.samplename) if not isexist(out_control + "_D"): runcmd(control_cmd) savecmd(control_cmd, self.samplename) out_case_vcf = VcfFile(out_case + ".vcf", self.samplename, config_dict) out_control_vcf = VcfFile(out_control + ".vcf", self.samplename, config_dict) __pindelout2vcf(out_dir, out_case, out_case_vcf.path) __pindelout2vcf(out_dir, out_control, out_control_vcf.path) out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path) else: if not isexist(out_case + "_D"): runcmd(case_cmd) savecmd(case_cmd, self.samplename) out_case_vcf = VcfFile(out_case + ".vcf", self.samplename, config_dict) __pindelout2vcf(out_dir, out_case, out_case_vcf.path) out_case_vcf.mv(runed_vcf.path) if runed_vcf.isexist(): return(runed_vcf) else: return(False) info("Pindel VariantCaller run fail!") else: info("Bam File not exists, can not conduct Pindel step!") return(False)
def lofreq_caller(self, out_dir, control_bam = ""): """ Ret:Use lofreq to conduct Variant Discovery Step. """ config_dict = self.config_dict java = config_dict["java"] gatk = config_dict["gatk"] lofreq = config_dict["lofreq"] reffa = config_dict["reffa"] dbsnp = config_dict["lofreq_dbsnp"] intervals = config_dict["intervals"] thread = config_dict["bamfile_lofreq_caller_thread"] extra_option_germline = config_dict["bamfile_lofreq_caller_extra_germline"] extra_option_somatic = config_dict["bamfile_lofreq_caller_extra_somatic"] create_dir(out_dir) info("Running Lofreq_caller step for " + self.path) out_fn = out_dir + "/" + self.samplename + "_" out_snp_vcf = out_dir + "/" + self.samplename + "_somatic_final.snvs.vcf" out_indel_vcf = out_dir + "/" + self.samplename + "_somatic_final.indels.vcf" runed_vcf = out_dir + "/" + self.samplename + ".vcf" runed_vcf = VcfFile(runed_vcf,self.samplename, config_dict) out_snp_vcf = VcfFile(out_snp_vcf, self.samplename, config_dict, runid = self.runid + ".Lofreq") out_indel_vcf = VcfFile(out_indel_vcf, self.samplename, config_dict, runid = self.runid + ".Lofreq") out_snp_vcfgz = FundementalFile(out_snp_vcf.path + ".gz") out_indel_vcfgz = FundementalFile(out_indel_vcf.path + ".gz") if isinstance(control_bam, BamFile): control_bam = control_bam.path if control_bam != "" and isexist(control_bam): cmd = "%s somatic -n %s -t %s -f %s -d %s --threads %s --call-indels -o %s %s " \ % (lofreq, control_bam, self.path, reffa, dbsnp, thread, out_fn, extra_option_somatic) if intervals != "" and isexist(intervals): cmd = cmd + " -l %s"%(intervals) else: cmd = "%s call-parallel --pp-threads %s -f %s --call-indels -o %s %s " %(lofreq, thread, reffa, runed_vcf, extra_option_germline) if intervals != "" and isexist(intervals): cmd = cmd + " -l %s %s"%(intervals, self.path) else: cmd = cmd + self.path cmd = cmd + " &> %s/log/%s.case.lofreq_caller.log" % (os.getcwd(), self.runid) if self.isexist(): if control_bam == "" or (not isexist(control_bam)): runcmd(cmd) savecmd(cmd, self.samplename) else: if out_snp_vcfgz.isexist() and not out_snp_vcf.isexist(): out_snp_vcfgz.gzip_uncompress() if out_indel_vcfgz.isexist() and not out_indel_vcf.isexist(): out_indel_vcfgz.gzip_uncompress() if not runed_vcf.isexist() and out_snp_vcf.isexist() and out_indel_vcf.isexist(): out_snp_vcf.merge(runed_vcf, indelvcf = out_indel_vcf.path) if not runed_vcf.isexist(): runcmd(cmd) savecmd(cmd, self.samplename) out_snp_vcfgz.gzip_uncompress() out_indel_vcfgz.gzip_uncompress() out_snp_vcf.merge(runed_vcf, indelvcf = out_indel_vcf.path) if runed_vcf.isexist(): return(runed_vcf) else: return(False) else: info("Bam File not exists, can not conduct lofreq_caller step!") return(False)
def torrent_caller(self, out_dir, control_bam=""): """ Ret:Use TVC-5.0.3 to conduct Variant Discovery Step. """ config_dict = self.config_dict java = config_dict["java"] gatk = config_dict["gatk"] tvc = config_dict["tvc"] reffa = config_dict["reffa"] dbsnp = config_dict["dbsnp"] intervals = config_dict["intervals"] tmp_dir = config_dict["tvc_tmp_dir"] thread = config_dict["bamfile_torrent_caller_thread"] extra_option = config_dict["bamfile_torrent_caller_extra"] json = config_dict["tvc_params_json"] create_dir(out_dir) runed_vcf = out_dir + "/" + self.samplename + ".vcf" runed_vcf = VcfFile(runed_vcf,self.samplename. config_dict) def setcmd(bamfile, reffa, out_dir, json ="", backrun=False): cmd = "%s -i %s -r %s -o %s %s " \ % (tvc, bamfile, reffa, out_dir, extra_option) if json != "": cmd = cmd + " -p %s" %(json) if backrun: cmd = cmd + " &" return(cmd) if isinstance(control_bam, BamFile): control_bam = control_bam.path if control_bam != "" and isexist(control_bam): info("Running TorrentVariantCaller step for " + self.path + " and " + control_bam) out_case_vcf = VcfFile(out_dir + "/case/TSVC_variants.vcf", self.samplename, config_dict) out_control_vcf = VcfFile(out_dir + "/control/TSVC_variants.vcf" ,self.samplename, config_dict) case_cmd = setcmd(self.path, reffa, out_case_vcf.dirname, json) case_cmd = case_cmd + " &> %s/log/%s.case.torrent_caller.log" % (os.getcwd(), self.runid) control_cmd = setcmd(control_bam, reffa, out_control_vcf.dirname, json) control_cmd = control_cmd + " &> %s/log/%s.control.torrent_caller.log" % (os.getcwd(), self.runid) if self.isexist() and isexist(control_bam): if not runed_vcf.isexist(): if not out_case_vcf.isexist(): runcmd(case_cmd) savecmd(case_cmd, self.samplename) if not out_control_vcf.isexist(): runcmd(control_cmd) savecmd(control_cmd, self.samplename) if not out_case_vcf.isexist() or not out_control_vcf.isexist(): return(False) out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path) if not runed_vcf.isexist(): return(False) else: savecmd(case_cmd, self.samplename) savecmd(control_cmd, self.samplename) out_case_vcf.control_filter(out_control_vcf.path, runed_vcf.path) if not runed_vcf.isexist(): return(False) return(runed_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct TorrentVariantCaller step!") return(False) else: info("Running TorrentVariantCaller step for " + self.path) out_vcf= out_dir + "/TSVC_variants.vcf" out_vcf = VcfFile(out_vcf, self.samplename, config_dict) cmd = setcmd(self.path, reffa, out_dir, json) if out_vcf.isexist(): out_vcf.mv(runed_vcf.path) if self.isexist(): if not runed_vcf.isexist(): runcmd(cmd) savecmd(cmd, self.samplename) if out_vcf.isexist(): if not out_vcf.mv(runed_vcf.path): return(False) else: return(False) else: savecmd(cmd, self.samplename) return(runed_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct TorrentVariantCaller step!") return(False)
def varscan_caller(self, out_dir="", control_bam = ""): config_dict = self.config_dict java = config_dict["java"] gatk = config_dict["gatk"] varscan = config_dict["varscan"] samtools = config_dict["samtools"] reffa = config_dict["reffa"] dbsnp = config_dict["dbsnp"] java_max_mem = config_dict["java_max_mem"] extra_option_somatic = config_dict["bamfile_varscan_caller_extra_somatic"] extra_option_germline = config_dict["bamfile_varscan_caller_extra_germline"] create_dir(out_dir) info("Running Varscan_caller step for " + self.path) out_snp_vcf = out_dir + "/" + self.samplename + ".snp.vcf" out_snp_vcf = VcfFile(out_snp_vcf, self.samplename, config_dict, runid = self.runid + ".Varscan") out_indel_vcf = out_dir + "/" + self.samplename + ".indel.vcf" out_indel_vcf = VcfFile(out_indel_vcf, self.samplename, config_dict, runid = self.runid + ".Varscan") out_vcf = out_dir + "/" + self.samplename + ".vcf" out_vcf = VcfFile(out_vcf, self.samplename, config_dict) case_bam = BamFile(self.path, self.samplename, config_dict) control_bam = BamFile(control_bam, self.samplename, config_dict) cmd = "" if self.isexist(): if not out_vcf.isexist() and (not out_snp_vcf.isexist() or not out_indel_vcf.isexist()): case_mpileup_fn = MpileupFile(out_dir + "/" + self.samplename + ".mpileup.case", self.samplename, config_dict) control_mpileup_fn = MpileupFile(out_dir + "/" + self.samplename + ".mpileup.control", self.samplename, config_dict) threads = [] if control_bam.path != "" and control_bam.isexist(): def func(case_bam = case_bam, case_mpileup_fn = case_mpileup_fn): case_bam.mpileup(case_mpileup_fn.path) t1 = threading.Thread(target = func) def func(control_bam = control_bam, control_mpileup_fn = control_mpileup_fn): control_bam.mpileup(control_mpileup_fn.path) t2 = threading.Thread(target = func) threads.append(t1) threads.append(t2) for t in threads: t.setDaemon(True) t.start() for t in threads: t.join() cmd = "%s -Xmx%s -jar %s somatic %s %s --output-snp %s --output-indel %s --output-vcf %s"\ %(java, java_max_mem, varscan, case_mpileup_fn.path, control_mpileup_fn.path, out_snp_vcf.path, out_indel_vcf.path, extra_option_somatic) log = " &> %s/log/%s.case.Varscan_caller.log" % (os.getcwd(), self.runid) cmd = cmd + log runcmd(cmd) savecmd(cmd, self.samplename) else: case_bam.mpileup(case_mpileup_fn.path) snpcmd = "%s -Xmx%s -jar %s mpileup2snp %s --output-vcf 1 %s > %s"\ %(java, java_max_mem, varscan, case_mpileup_fn.path, extra_option_germline, out_snp_vcf.path) indelcmd = "%s -Xmx%s -jar %s mpileup2indel %s --output-vcf 1 %s > %s"\ %(java, java_max_mem, varscan, case_mpileup_fn.path, extra_option_germline, out_indel_vcf.path) snpcmd = snpcmd + " 2> %s/log/%s.case.Varscan_caller_snp.log" % (os.getcwd(), self.runid) indelcmd = indelcmd + " 2> %s/log/%s.case.Varscan_caller_indel.log" % (os.getcwd(), self.runid) t1 = threading.Thread(target = runcmd(snpcmd)) t2 = threading.Thread(target = runcmd(indelcmd)) threads.append(t1) threads.append(t2) savecmd(snpcmd, self.samplename) savecmd(indelcmd, self.samplename) for t in threads: t.setDaemon(True) t.start() for t in threads: t.join() if not out_snp_vcf.isexist() or not out_indel_vcf.isexist(): return(False) else: out_snp_vcf.varscan2gatkfmt() out_indel_vcf.varscan2gatkfmt() out_snp_vcf.merge(out_vcf.path, indel=out_indel_vcf.path) else: savecmd(cmd, self.samplename) out_snp_vcf.varscan2gatkfmt() out_indel_vcf.varscan2gatkfmt() out_snp_vcf.merge(out_vcf.path, indel=out_indel_vcf.path) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct varscan_caller step!") return(False)
def unifiedgenotyper_caller(self, out_dir, control_bam = ""): """ Ret:Use GATK UnifiedGenotyper to conduct Variant Discovery Step. """ config_dict = self.config_dict java = config_dict["java"] gatk = config_dict["gatk"] reffa = config_dict["reffa"] dbsnp = config_dict["dbsnp"] intervals = config_dict["intervals"] thread = config_dict["bamfile_unifiedgenotyper_caller_thread"] extra_option = config_dict["bamfile_unifiedgenotyper_caller_extra"] tmp_dir = config_dict["gatk_tmp_dir"] java_max_mem = config_dict["java_max_mem"] create_dir(out_dir) def setcmd(bamfile, out_vcf, backrun=False): cmd = "%s -Xmx%s -Djava.io.tmpdir=%s -jar %s -R %s %s -nt %s \ -T UnifiedGenotyper \ -I %s -o %s "\ % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option, thread, bamfile, out_vcf) if snp_flag and intervals_flag : cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) elif snp_flag and not intervals_flag: cmd = cmd + " --dbsnp %s" %(dbsnp) elif not snp_flag and intervals_flag: cmd = cmd + " --intervals %s" %(intervals) if backrun: cmd = cmd + " &" return(cmd) snp_flag = dbsnp != "" intervals_flag = intervals != "" out_vcf = out_dir + "/" + self.samplename + ".vcf" out_vcf = VcfFile(out_vcf, self.samplename, config_dict) if isinstance(control_bam, BamFile): control_bam = control_bam.path if control_bam != "" and isexist(control_bam): info("Running Unifiedgenotyper_caller step for " + self.path + " and " + control_bam) out_case_vcf = VcfFile(out_vcf.path + ".case", self.samplename, config_dict) out_control_vcf = VcfFile(out_vcf.path + ".control" ,self.samplename, config_dict) case_cmd = setcmd(self.path, out_case_vcf.path) log = " &> %s/log/%s.case.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid) case_cmd = case_cmd + log control_cmd = setcmd(control_bam, out_control_vcf.path) log = " &> %s/log/%s.control.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid) control_cmd = control_cmd + log if self.isexist() and isexist(control_bam): if not out_vcf.isexist(): threads = [] if not out_case_vcf.isexist(): def func(cmd = case_cmd): runcmd(cmd) t1 = threading.Thread(target = func) threads.append(t1) savecmd(case_cmd, self.samplename) if not out_control_vcf.isexist(): def func(cmd = control_cmd): runcmd(cmd) t2 = threading.Thread(target = func) threads.append(t2) savecmd(control_cmd, self.samplename) for t in threads: t.setDaemon(True) t.start() for t in threads: t.join() if not out_case_vcf.isexist() or not out_control_vcf.isexist(): return(False) out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path) if not out_vcf.isexist(): return(False) else: savecmd(case_cmd, self.samplename) savecmd(control_cmd, self.samplename) out_case_vcf.control_filter(out_control_vcf.path, out_vcf.path) if not out_vcf.isexist(): return(False) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct unifiedgenotyper_caller step!") return(False) else: info("Running Unifiedgenotyper_caller step for " + self.path) cmd = setcmd(self.path, out_vcf.path) log = " &> %s/log/%s.case.Unifiedgenotyper_caller.log" % (os.getcwd(), self.runid) cmd = cmd + log if self.isexist(): if not out_vcf.isexist(): runcmd(cmd) savecmd(cmd, self.samplename) if not out_vcf.isexist(): return(False) else: savecmd(cmd, self.samplename) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct unifiedgenotyper_caller step!") return(False)
def haplotype_caller(self, out_dir, control_bam = "", seq_type="dna"): """ Ret:Use GATK HaplotypeCaller to conduct Variant Discovery Step. """ config_dict = self.config_dict intervals = config_dict["intervals"] java = config_dict["java"] gatk = config_dict["gatk"] reffa = config_dict["reffa"] dbsnp = config_dict["dbsnp"] tmp_dir = config_dict["gatk_tmp_dir"] extra_option_rna = config_dict["bamfile_haplotype_caller_extra_rna"] extra_option_dna = config_dict["bamfile_haplotype_caller_extra_dna"] java_max_mem = config_dict["java_max_mem"] info("Running Haplotype_caller step for " + self.path) snp_flag = dbsnp != "" intervals_flag = intervals != "" create_dir(out_dir) out_vcf = out_dir + "/" + self.samplename + ".vcf" out_vcf = VcfFile(out_vcf,self.samplename, config_dict) if isinstance(control_bam, BamFile): control_bam = control_bam.path if control_bam != "" and isexist(control_bam): if seq_type == "dna": cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \ -jar %s -R %s \ -T HaplotypeCaller \ %s \ -I %s -I %s -o %s "\ % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_dna, self.path, control_bam, out_vcf.path) else: cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \ -jar %s -R %s \ -T HaplotypeCaller \ %s \ -I %s -I %s -o %s "\ % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_rna, self.path, control_bam, out_vcf.path) else: if seq_type == "dna": cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \ -jar %s -R %s \ -T HaplotypeCaller \ %s \ -I %s -o %s"\ % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_dna, self.path, out_vcf.path) else: cmd = "%s -Xmx%s -Djava.io.tmpdir=%s \ -jar %s -R %s \ -T HaplotypeCaller \ %s \ -I %s -o %s"\ % (java, java_max_mem, tmp_dir, gatk, reffa, extra_option_rna, self.path, out_vcf.path) if snp_flag and intervals_flag : cmd = cmd + " --dbsnp %s --intervals %s" %(dbsnp,intervals) elif snp_flag and not intervals_flag: cmd = cmd + " --dbsnp %s" %(dbsnp) elif not snp_flag and intervals_flag: cmd = cmd + " --intervals %s" %(intervals) log = " &> %s/log/%s.case.Haplotypecaller.log" % (os.getcwd(), self.runid) cmd = cmd + log if self.isexist(): if not out_vcf.isexist(): runcmd(cmd) savecmd(cmd, self.samplename) if not out_vcf.isexist(): return(False) else: savecmd(cmd , self.samplename) return(out_vcf) # VcfFile Class instance else: info("Bam File not exists, can not conduct haplotype_caller step!") return(False)