def phaseVCF(vcfpath, phasevcfpath): beagle_path = params.GetSoftwarePath()[1] java_path = params.GetSoftwarePath()[0] print(" ___ phasing vcf file ___ ") if (not vcfpath.endswith('.vcf.gz')): gzipFile(vcfpath) vcfpath = vcfpath + '.gz' path, vcffn = os.path.split(vcfpath) path2, vcffn2 = os.path.split(phasevcfpath) phasevcffn = sub('.vcf.gz$', '_phased', vcffn) command = " ".join([ java_path, "-Xmx4g -jar", beagle_path, "gt=" + vcfpath, "out=" + "/".join([path2, phasevcffn]), "2> beagle.log" ]) runCommand(command) return phasevcffn
def thinVCF(invcf, outvcf): vcftools_path = params.GetSoftwarePath()[4] command = " ".join( [vcftools_path, "--vcf", invcf, "--thin 50 --out", outvcf, "--recode"]) runCommand(command)
def removeDupSambamba(bamrepairedfinalsortfn, tmpbams_path=''): bamrepairedfinalmarkedfn = sub('.sorted.bam$', ".marked.bam", bamrepairedfinalsortfn) java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([sambamba_path, "markdup","--remove-duplicates", "--nthreads", str(4), bamrepairedfinalsortfn, bamrepairedfinalmarkedfn]) runCommand(command) return bamrepairedfinalmarkedfn
def extractPairedBAMfromROI(inbamfn, bedfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtools_path, "view -b -f 0x0001 -L", bedfn, inbamfn, ">", outbamfn]) runCommand(command)
def extractPairedReadfromROI(inbamfn, bedfn, outbamfn, flag="either"): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join( [bedtools_path, "pairtobed -abam", inbamfn, "-b", bedfn, "-type", flag, ">", outbamfn, "2> bedtool.log"]) runCommand(command)
def removeIfEmpty(bamdir, file): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() if file.endswith(".bam"): command = " ".join([samtools_path, "view", "/".join([bamdir, file]), "| less | head -1 | wc -l"]) nline = subprocess.check_output(command, shell=True) if os.path.isfile("/".join([bamdir, file])) and (int(nline) == 0): os.remove("/".join([bamdir, file]))
def splitStrands(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() read_strand1sortfn = sub('.bam$', '.read_pos.bam', inbamfn) read_strand2sortfn = sub('.bam$', '.read_neg.bam', inbamfn) mapped_all = sub('sorted.bam$', 'mapped_all.bam', inbamfn) command1 = " ".join([samtools_path, "view -u -h -f 33", inbamfn, ">", read_strand1sortfn]) command2 = " ".join([samtools_path, "view -u -h -f 17", inbamfn, ">", read_strand2sortfn]) runCommand(command1) runCommand(command2)
def find_unpaired_reads(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() unpairedfn = sub('.bam$', '.unpairedfn.bam', inbamfn) command1 = " ".join([samtools_path, "view -u -h -f 0x0004", inbamfn, ">", unpairedfn]) runCommand(command1)
def sortByName(inbamfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() if (inbamfn is not None): command = " ".join([sambamba_path, "sort -n", inbamfn, "-o", outbamfn]) print(command) runCommand(command)
def splitBamByChr(inbamfn, path, chr): if (chr is not None): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtools_path, "view -bh", inbamfn, str(chr), ">", "/".join([path, str(chr) + ".bam"])]) runCommand(command)
def subsample(bamfn1, bamfn2, samplingrate=0.5): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtools_path, "view -s", samplingrate, "-b", bamfn1, ">", bamfn2]) runCommand(command)
def extractBAMfromROI_All(inbamfn, bedfn, outbamfn): java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath( ) command = " ".join( [samtool_path, "view -b -L", bedfn, inbamfn, ">", outbamfn]) runCommand(command)
def mergeSortBamFiles(mergedBamfn, finalbamdir): java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath( ) command = "" os.chdir(finalbamdir) matches = [] for root, dirnames, filenames in os.walk(finalbamdir): for filename in fnmatch.filter(filenames, '*.bam'): path = os.path.join(root, filename) if os.path.islink(path): path = os.path.realpath(path) if (not matches.__contains__(path)): matches.append(path) command = " ".join([path, command]) command2 = " ".join([sambamba_path, "merge", mergedBamfn, command]) runCommand(command2)
def mergeSortBamFiles(mergedBamfn, finalbamdir): java_path, beagle_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = "" os.chdir(finalbamdir) matches = [] num_files = 0 for root, dirnames, filenames in os.walk(finalbamdir): for filename in fnmatch.filter(filenames, '*.bam'): path = os.path.join(root, filename) if os.path.islink(path): path = os.path.realpath(path) if (not matches.__contains__(path)): matches.append(path) command = " ".join([path, command]) num_files = num_files + 1 if (num_files > 1): command2 = " ".join([sambamba_path, "merge", mergedBamfn, command]) runCommand(command2) elif (num_files == 1): outbam = sub('.bam$', '.sort.bam', str(command.strip())) sortBam(command, outbam, finalbamdir) os.remove(str(command.strip()))
def getStrands(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() outbamfn_forward = sub('.bam$', '_forward.bam', inbamfn) outbamfn_reverse = sub('.bam$', '_reverse.bam', inbamfn) command1 = " ".join([samtools_path, "view -F 0x10", inbamfn, ">", outbamfn_forward]) command2 = " ".join([samtools_path, "view -f 0x10", inbamfn, ">", outbamfn_reverse]) runCommand(command1) runCommand(command2)
def countReads(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() cmd = " ".join([samtools_path, "view", inbamfn, "|wc -l"]) out, err = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE, shell=True).communicate() return "".join(out.split())
def sortIndexBam(inbamfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([sambamba_path, "sort", inbamfn, "-o", outbamfn]) command2 = " ".join([sambamba_path, "index", outbamfn]) runCommand(command) runCommand(command2)
def splitPairAndStrands(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() read1_strand1sortfn = sub('.bam$', '.read1_pos.bam', inbamfn) read1_strand2sortfn = sub('.bam$', '.read1_neg.bam', inbamfn) read2_strand1sortfn = sub('.bam$', '.read2_pos.bam', inbamfn) read2_strand2sortfn = sub('.bam$', '.read2_neg.bam', inbamfn) mapped_all = sub('sorted.bam$', 'mapped_all.bam', inbamfn) command1 = " ".join([samtools_path, "view -u -h -f 0x0061", inbamfn, ">", read1_strand1sortfn]) command2 = " ".join([samtools_path, "view -u -h -f 0x0051", inbamfn, ">", read1_strand2sortfn]) command3 = " ".join([samtools_path, "view -u -h -f 0x0091", inbamfn, ">", read2_strand1sortfn]) command4 = " ".join([samtools_path, "view -u -h -f 0x00A1", inbamfn, ">", read2_strand2sortfn]) runCommand(command1) runCommand(command2) runCommand(command3) runCommand(command4)
def sortBam(inbamfn, outbamfn, tmpbams_path=''): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([sambamba_path, "sort", inbamfn, "-o", outbamfn, '--tmpdir=', tmpbams_path]) runCommand(command)
def extract_proper_paired_reads(inbamfn, properfn): # properfn = sub('.bam$', '_proper.bam', inbamfn) java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtools_path, "view -f 0x03 -bSq 30", inbamfn, ">", properfn]) runCommand(command) os.remove(inbamfn)
def getProperPairs(inbamfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtools_path, "view -u -h -f 0x0003", inbamfn, ">", outbamfn]) runCommand(command)
def thinVCF(invcf, outvcf): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([vcftools_path, "--vcf", invcf, "--thin 50 --out", outvcf, "--recode"]) runCommand(command)
def merge_bams(bamfn1, bamfn2, mergefn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([sambamba_path, "merge", mergefn, bamfn1, bamfn2, "--nthreads", str(4)]) runCommand(command)
def extractAllReadsfromROI(inbamfn, bedfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join( [bedtools_path, "intersect -abam", inbamfn, "-b", bedfn, ">", outbamfn, "2> bedtool.log"]) runCommand(command)
def merge_final(mergefn, finalbamdir): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() mergemarkedfn = sub('.bam$', ".marked.bam", mergefn) os.chdir(finalbamdir) command1 = " ".join([sambamba_path, "merge", mergefn, "*.bam", "--nthreads", str(4)]) command2 = " ".join([sambamba_path, "markdup","--remove-duplicates", "--nthreads", str(4), mergefn, mergemarkedfn]) runCommand(command1) print (" ___ removing merged duplicates near breakpoints ___ ") runCommand(command2) os.remove(mergefn) os.remove(mergefn + '.bai') os.rename(mergemarkedfn, mergefn) os.rename(mergemarkedfn + '.bai', mergefn + '.bai')
def dedupBam(inbamfn, outbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([samtool_path, "rmdup", inbamfn, outbamfn]) runCommand(command)
def mergeSortBamFiles(mergedBamfn, finalbamdir): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = "" os.chdir(finalbamdir) matches = [] num_files = 0 for root, dirnames, filenames in os.walk(finalbamdir): for filename in fnmatch.filter(filenames, '*.bam'): path = os.path.join(root, filename) if os.path.islink(path): path = os.path.realpath(path) if not matches.__contains__(path): matches.append(path) command = " ".join([path, command]) num_files = num_files + 1 if num_files > 1: command2 = " ".join([sambamba_path, "merge", mergedBamfn, command, "--nthreads", str(4)]) runCommand(command2) elif num_files == 1: if str(command.strip()).endswith("GAIN.bam"): path, fname = os.path.split(str(command.strip())) inbam_original = '/'.join([params.GetSplitBamsPath(), sub('_gain', '', fname.lower())]) command2 = " ".join([sambamba_path, "merge", mergedBamfn, command, inbam_original, "--nthreads", str(4)]) runCommand(command2) elif str(command.strip()).endswith("LOSS.bam"): outbam = sub('.bam$', '.sort.bam', str(command.strip())) sortBam(command, outbam, finalbamdir) os.remove(str(command.strip()))
def removeDupPicard(bamrepairedfinalsortfn, tmpbams_path=''): print (" ___ removing repaired duplicates ___ ") bamrepairedfinalmarkedfn = sub('.re_paired_final.sorted.bam$', ".re_paired_final.marked.bam", bamrepairedfinalsortfn) markedmetricsfn = sub('.re_paired_final.sorted.bam$', ".marked_metrics.txt", bamrepairedfinalsortfn) java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() command = " ".join([java_path, "-Xmx8g -jar", picard_path, "MarkDuplicates", "I=" + bamrepairedfinalsortfn, "O=" + bamrepairedfinalmarkedfn, "M=" + markedmetricsfn, "REMOVE_DUPLICATES=true", "ASSUME_SORTED=true"]) runCommand(command) return bamrepairedfinalmarkedfn
def splitPairs(inbamfn): java_path, beagle_path, picard_path, samtools_path, bedtools_path, vcftools_path, sambamba_path = params.GetSoftwarePath() pair1fn = sub('.bam$', '_read1.bam', inbamfn) pair2fn = sub('.bam$', '_read2.bam', inbamfn) command1 = " ".join([samtools_path, "view -u -h -f 0x0043", inbamfn, ">", pair1fn]) command2 = " ".join([samtools_path, "view -u -h -f 0x0083", inbamfn, ">", pair2fn]) runCommand(command1) runCommand(command2)