def filterVCF(vcf, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "filtered.vcf.gz_{0}".format(SAMPLE_NUM)) cmd = [GATK, "VariantFiltration", "-R", REF, "-V", vcf, "-O", output_path] stdout = runCMD(cmd) return output_path
def splitNCigarReads(bam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "splitted.bam_{0}".format(SAMPLE_NUM)) cmd = [GATK, "SplitNCigarReads", "-R", REF, "-I", bam, "-O", output_path] stdout = runCMD(cmd) return output_path
def haploCaller(bam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "output.g.vcf.gz_{0}".format(SAMPLE_NUM)) cmd = [GATK, "HaplotypeCaller", "-R", REF, "-I", bam, "-O", output_path, "-ERC", "GVCF", "-L", "chr1", "-L", "chr2"] stdout = runCMD(cmd) return output_path
def convertSAMtoFASTQ(usam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "sample.fastq_{0}".format(SAMPLE_NUM)) input_arg = "I={0}".format(usam) output_arg = "FASTQ={0}".format(output_path) cmd = ["java", "-jar", PICARD, "SamToFastq", input_arg, output_arg] stdout = runCMD(cmd) return output_path
def bwa_map(fastq, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "mapped.sam_{0}".format(SAMPLE_NUM)) cmd = [BWA, "mem", REF, fastq] f = open(output_path, "w") stdout = runCMD(cmd, f) f.close() return output_path
def revertSAM(ubam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "reverted.bam_{0}".format(SAMPLE_NUM)) input_arg = "I={0}".format(ubam) output_arg = "O={0}".format(output_path) cmd = ["java", "-jar", PICARD, "RevertSam", input_arg, output_arg] stdout = runCMD(cmd) return output_path
def convertSAMtoBAM(sam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "mapped.bam_{0}".format(SAMPLE_NUM)) cmd = [SAMTOOLS, "view", "-bhS", sam] f = open(output_path, "w") stdout = runCMD(cmd, f) f.close() return output_path
def analyzeCovariates(recal_data, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "AnalyzeCovariates.pdf_{0}".format(SAMPLE_NUM)) cmd = [ GATK, "AnalyzeCovariates", "-bqsr", recal_data, "-plots", output_path ] stdout = runCMD(cmd) return output_path
def annotateVCF(bam, vcf, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "annotated.vcf_{0}".format(SAMPLE_NUM)) cmd = [ GATK, "VariantAnnotator", "-I", bam, "-R", REF, "-V", vcf, "-O", output_path, "-A", "Coverage", "--dbsnp", DBSNP ] stdout = runCMD(cmd) return output_path
def sort(bam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "sorted.bam_{0}".format(SAMPLE_NUM)) output_arg = "O={0}".format(output_path) input_arg = "I={0}".format(bam) sort_order = "SORT_ORDER=queryname" cmd = [ "java", "-jar", PICARD, "SortSam", input_arg, output_arg, sort_order ] stdout = runCMD(cmd) return output_path
def applyBQSR(bam, recal_data, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "recalibrated.bam_{0}".format(SAMPLE_NUM)) cmd = [ GATK, "ApplyBQSR", "-R", REF, "-I", bam, "--add-output-sam-program-record", "--use-original-qualities", "-O", output_path, "--bqsr-recal-file", recal_data ] stdout = runCMD(cmd) return output_path
def recalibrateBase(bam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "recal_data.table_{0}".format(SAMPLE_NUM)) known_sites1 = DBSNP cmd = [ GATK, "BaseRecalibrator", "-I", bam, "-R", REF, "--known-sites", known_sites1, "-O", output_path, "-L", "chr1", "-L", "chr2" ] stdout = runCMD(cmd) return output_path
def mergeBamAlignment(mapped_bam, unmapped_sam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "merged.bam_{0}".format(SAMPLE_NUM)) output_arg = "O={0}".format(output_path) input_arg1 = "ALIGNED={0}".format(mapped_bam) input_arg2 = "UNMAPPED={0}".format(unmapped_sam) ref = "R={0}".format(REF) cmd = [ "java", "-jar", PICARD, "MergeBamAlignment", input_arg1, input_arg2, ref, output_arg ] stdout = runCMD(cmd) return output_path
def markDuplicates(mapped_bam, SAMPLE_NUM): output_path1 = os.path.join(OUT_DIR, "marked_duplicates.bam_{0}".format(SAMPLE_NUM)) output_arg = "O={0}".format(output_path1) output_path2 = os.path.join(OUT_DIR, "marked_dup_metrics") output_arg2 = "M={0}".format(output_path2) input_arg = "I={0}".format(mapped_bam) cmd = [ "java", "-jar", PICARD, "MarkDuplicates", input_arg, output_arg, output_arg2 ] stdout = runCMD(cmd) return output_path1, output_path2
def run(self, buildType): tCompCmd = self._formatCompileCMD() try: tCompileLog = self._getLogFileName() with open(tCompileLog, "w+") as tFile: compileOut = runCMD(tCompCmd, workDir=self._getRunDir(), pipe=tFile, isShell=platform.system() is not "Windows") except: log.error("[Error] Can't compile project: {0}".format(sys.exc_info()[1])) return False else: log.info("[Info] Compile log saved to: {0}".format(tCompileLog)) if compileOut["ret_code"] != 0: log.error("[Error] {0} Compiler log {1}".format("=" * 20, "=" * 20)) self._printNiceFailLog(compileOut["out"]) log.error("[Error] {0}".format("-" * 51)) return False else: return True
def addOrReplaceReadGroups(bam, SAMPLE_NUM): output_path = os.path.join(OUT_DIR, "output_rg.bam_{0}".format(SAMPLE_NUM)) output_arg = "O={0}".format(output_path) input_arg = "I={0}".format(bam) rgid = "RGID=4" rglb = "RGLB=lib1" rgpl = "RGPL=ILLUMINA" rgpu = "RGPU=unitl" rgsm = "RGSM=20" cmd = [ "java", "-jar", PICARD, "AddOrReplaceReadGroups", input_arg, output_arg, rgid, rglb, rgpl, rgpu, rgsm ] stdout = runCMD(cmd) indexBAM(output_rg) return output_path
def run(self, buildType): assert self._rootPath is not None # check that root path specified self._setUpBuildInfo(buildType) cmakeCMD = "cmake -H." + self._getBuildDirCMD() + self._getGenCMD() + self._getDefsCMD() with open(self._getLogFile(), "w+") as tFile: setUpEnv(self._getEnvVars()) try: tProcessOut = runCMD(cmakeCMD, workDir=self._getRunDir(), pipe=tFile, isShell=platform.system() is not "Windows") except: log.error("[Error] Can't run cmake: {0} ".format(sys.exc_info()[1])) return None else: log.info("[Info] Cmake log saved to: {0}".format(self._getLogFile())) if tProcessOut["ret_code"] != 0: log.error("[Error] {0} Cmake log {1}".format("=" * 18, "=" * 19)) self._printNiceFailLog(tProcessOut["out"]) log.error("[Error] {0}".format("-" * 51)) return None else: return self._buildDict
def _checkToolVersion(self, binName, matchInfo): tCmdArgs = [binName, matchInfo["args"]] # Uncoment for easy debug #return True tProcOut = runCMD(tCmdArgs, isShell=platform.system().lower() == "windows") #return True if tProcOut["ret_code"] != 0: log.error("[Error] Can't get version of: {0}".format(binName)) return False tLines = tProcOut["out"].split("\n") if len(tLines) < matchInfo["line"]: log.error("[Error] Invalid number of lines in output of: {0}".format(binName)) log.error("[Error] Expected count of lines: {0}".format(matchInfo["line"])) return False tWords = tLines[matchInfo["line"]].split(" ") if len(tWords) < matchInfo["word"]: log.error("[Error] Invalid count of word ({0} but need {1}): {2}" .format(len(tWords), matchInfo["word"], binName)) return False tPhrase = tWords[matchInfo["word"]] matchInfo["actual_ver"] = tPhrase return self._isValidVersion(tPhrase, matchInfo["match"])
def indexBAM(bam): cmd = [SAMTOOLS, "index", bam] stdout = runCMD(cmd)