def run_picard(): global infile mem = mem2(argsmem, '-jdict') mem['-Djava.io.tmpdir'] = tmpdir shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem) if not (steps.sort or steps.index or steps.markdup or steps.rmdup): shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run() else: bamfile = outfile if steps.sort: bamfile = path.join(joboutdir, inprefix + '.sorted.bam') shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.markdup: mfile = "/dev/null" bamfile = path.join(joboutdir, inprefix + '.dedup.bam') shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.index: shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run() if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_samtools(): global infile if not (steps.sort or steps.index or steps.markdup or steps.rmdup): subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run() else: bamfile = outfile if steps.sort: mem = mem2(argsmem, 'M') bamfile = path.join(joboutdir, inprefix + '.sorted.bam') subshell.samtools.sort( m = mem + 'M', n = sortby == 'queryname', o = bamfile, T = tmpdir, O = 'bam', _ = infile, **{'@': nthread} ).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.markdup or steps.rmdup: bamfile = path.join(joboutdir, inprefix + '.dedup.bam') subshell.rmdup(infile, bamfile).run() if infile != {{i.infile | quote}}: shell.rm(infile, f = True) infile = bamfile if steps.index: subshell.samtools.index(bamfile, outfile + '.bai') if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)
def run_gatk(): mem = mem2(argsmem, '-jdict') intfile = path.join(joboutdir, outprefix + '.intervals') mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True gatksh = Shell(equal=' ', dash='-').gatk rtcparams = params.get('RealignerTargetCreator', Box()) rtcparams.T = 'RealignerTargetCreator' rtcparams.R = ref rtcparams.I = infile rtcparams.o = intfile rtcparams.nt = nthread rtcparams._ = list(mem.keys()) gatksh(**rtcparams).run() bamfileir = path.join(joboutdir, outprefix + '.ir.bam') irparams = params.get('IndelRealigner', Box()) irparams.T = 'IndelRealigner' irparams.R = ref irparams.I = infile irparams.o = bamfileir irparams._ = list(mem.keys()) irparams.targetIntervals = intfile gatksh(**irparams).run() recaltable = path.join(joboutdir, outprefix + '.recaltable') brparams = params.get('BaseRecalibrator', Box()) brparams.T = 'BaseRecalibrator' brparams.R = ref brparams.I = bamfileir brparams.o = recaltable brparams.nct = nthread brparams._ = list(mem.keys()) brparams.knownSites = knownSites gatksh(**brparams).run() prparams = params.get('PrintReads', Box()) prparams.T = 'PrintReads' prparams.R = ref prparams.I = bamfileir prparams.o = outfile prparams.nct = nthread prparams._ = list(mem.keys()) gatksh(**prparams).run() shell.rm(bamfileir, f=True) shell.mv(outprefix + '.bai', outfile + '.bai')
def run_star(): params.genomeDir = ref + '.star' params.readFilesIn = [infile1, infile2] params.readFilesCommand = ("cat", "zcat", "bzcat")[ 1 if infile1.endswith('.gz') else 2 if infile1.endswith('.bz2') else 0 ] params.readNameSeparator = '.' params.outFileNamePrefix = outdir + '/' params.outSAMtype = [outfmt.upper(), 'Unsorted'] Shell(equal = ' ').star(**params).run() starout = path.join(outdir, "Aligned.out.{}".format(outfmt)) if path.isfile(starout): shell.mv(starout, outfile)
def run_strelka(): # config cfgParams.bam = infile cfgParams.referenceFasta = ref cfgParams.runDir = joboutdir Shell().strelka(**cfgParams).run() # run the pipeline params.m = 'local' params.j = nthread params.g = mem2(mem, 'G')[:-1] Shell({'runWorkflow': path.join(joboutdir, 'runWorkflow.py')}).runWorkflow(**params).run() # mv output file to desired outfile ofile = path.join(joboutdir, 'results', 'variants', 'genome.S1.vcf.gz') shell.mv(ofile, outfile + '.gz') if not gz: shell.gunzip(outfile + '.gz')
def vcfIndex(vcf, tabix='tabix'): # /path/to/some.vcf -> some.vcf # /path/to/some.vcf.gz -> some.vcf bname = path.basename( vcf[:-3]) if vcf.endswith('.gz') else path.basename(vcf) # /path/to/some.bam -> /path/to/ dname = path.dirname(vcf) # some.vcf -> some # some.vcf.gz -> some fname = path.splitext(bname)[0] # some -> some # [1]some -> some rname = fname.split(']', 1)[1] if fname.startswith('[') else fname expectedIndex = path.join(dname, rname + '.vcf.gz.tbi') if path.isfile(expectedIndex): return vcf # if vcf is not a link, there is nowhere else to find index, create it using tabix tabix = shell.Shell({'tabix': tabix}).tabix gt = gztype(vcf) if gt == 'bgzip': if path.islink(vcf): linkvcf = path.readlink(vcf) if path.isfile(linkvcf + '.tbi'): shell.ln_s(linkvcf + '.tbi', expectedIndex) return vcf realvcf = path.realpath(vcf) if path.isfile(realvcf + '.tbi'): shell.ln_s(realvcf + '.tbi', expectedIndex) return vcf tabix(p='vcf', _=vcf).run() return vcf if gt == 'gzip': tmpvcf = path.join(dname, bname + '.tmp.vcf') shell.gunzip_to(vcf, tmpvcf) shell.bgzip(tmpvcf) tabix(p='vcf', _=tmpvcf + '.gz').run() shell.mv(tmpvcf + '.gz.tbi', expectedIndex) return vcf shell.bgzip(vcf, c=True, _stdout=vcf + '.gz') tabix(p='vcf', _=vcf + '.gz').run() return vcf + '.gz'
def run_sambamba(): global infile if not (steps.sort or steps.index or steps.markdup or steps.rmdup): subshell.sambamba.view(S = True, f = 'bam', o = outfile, t = nthread, _ = infile).run() else: bamfile = outfile if infmt == 'sam': bamfile = path.join(joboutdir, inprefix + '.s2b.bam') subshell.sambamba.view(S = True, f = 'bam', o = bamfile, t = nthread, _ = infile).run() infile = bamfile if steps.sort: if sortby == 'queryname': params.n = True params.N = True bamfile = path.join(joboutdir, inprefix + '.sorted.bam') params.m = argsmem params.tmpdir = tmpdir params.o = bamfile params.t = nthread params._ = infile subshell.sambamba.sort(**params).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.markdup: bamfile = path.join(joboutdir, inprefix + '.dedup.bam') subshell.sambamba.markdup(r = steps.rmdup, t = nthread, tmpdir = tmpdir, _ = [infile, bamfile]).run() if infile != {{i.infile | quote}}: shell.rm(f = True, _ = infile) infile = bamfile if steps.index: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') else: subshell.sambamba.index(t = nthread, _ = [infile, infile + '.bai']) if infile != outfile: if path.exists(infile + '.bai'): shell.mv(infile + '.bai', outfile + '.bai') shell.mv(infile, outfile)