Exemplo n.º 1
0
def run_picard():
	global infile
	mem = mem2(argsmem, '-jdict')
	mem['-Djava.io.tmpdir'] = tmpdir
	shellpicard = Shell(subcmd = True, dash = '', equal = '=').picard(**mem)
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		shellpicard.SamFormatConverter(TMP_DIR = tmpdir, I = infile, O = outfile).run()
	else:
		bamfile = outfile
		if steps.sort:
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			shellpicard.ShortSam(TMP_DIR = tmpdir, I = infile, O = bamfile, SO = sortby).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.markdup:
			mfile = "/dev/null"
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			shellpicard.MarkDuplicates(REMOVE_DUPLICATES = 'true' if steps.rmdup else 'false', TMP_DIR = tmpdir, I = infile, O = bamfile, M = mfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.index:
			shellpicard.BuildBamIndex(TMP_DIR = tmpdir, I = infile, O = outfile + '.bai').run()
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Exemplo n.º 2
0
def run_samtools():
	global infile
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		subshell.samtools.view(b = True, o = outfile, O = 'bam', _ = infile).run()
	else:
		bamfile = outfile
		if steps.sort:
			mem = mem2(argsmem, 'M')
			bamfile = path.join(joboutdir, inprefix + '.sorted.bam')
			subshell.samtools.sort(
				m = mem + 'M', 
				n = sortby == 'queryname',
				o = bamfile,
				T = tmpdir,
				O = 'bam',
				_ = infile,
				**{'@': nthread}
			).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.markdup or steps.rmdup:
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			subshell.rmdup(infile, bamfile).run()
			if infile != {{i.infile | quote}}:
				shell.rm(infile, f = True)
			infile = bamfile
		if steps.index:
			subshell.samtools.index(bamfile, outfile + '.bai')
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)
Exemplo n.º 3
0
def run_gatk():
    mem = mem2(argsmem, '-jdict')
    intfile = path.join(joboutdir, outprefix + '.intervals')

    mem['-Djava.io.tmpdir={}'.format(shell.shquote(tmpdir))] = True
    gatksh = Shell(equal=' ', dash='-').gatk

    rtcparams = params.get('RealignerTargetCreator', Box())
    rtcparams.T = 'RealignerTargetCreator'
    rtcparams.R = ref
    rtcparams.I = infile
    rtcparams.o = intfile
    rtcparams.nt = nthread
    rtcparams._ = list(mem.keys())
    gatksh(**rtcparams).run()

    bamfileir = path.join(joboutdir, outprefix + '.ir.bam')
    irparams = params.get('IndelRealigner', Box())
    irparams.T = 'IndelRealigner'
    irparams.R = ref
    irparams.I = infile
    irparams.o = bamfileir
    irparams._ = list(mem.keys())

    irparams.targetIntervals = intfile
    gatksh(**irparams).run()

    recaltable = path.join(joboutdir, outprefix + '.recaltable')
    brparams = params.get('BaseRecalibrator', Box())
    brparams.T = 'BaseRecalibrator'
    brparams.R = ref
    brparams.I = bamfileir
    brparams.o = recaltable
    brparams.nct = nthread
    brparams._ = list(mem.keys())

    brparams.knownSites = knownSites
    gatksh(**brparams).run()

    prparams = params.get('PrintReads', Box())
    prparams.T = 'PrintReads'
    prparams.R = ref
    prparams.I = bamfileir
    prparams.o = outfile
    prparams.nct = nthread
    prparams._ = list(mem.keys())
    gatksh(**prparams).run()

    shell.rm(bamfileir, f=True)
    shell.mv(outprefix + '.bai', outfile + '.bai')
Exemplo n.º 4
0
def run_star():
	params.genomeDir        = ref + '.star'
	params.readFilesIn      = [infile1, infile2]
	params.readFilesCommand = ("cat", "zcat", "bzcat")[
		1 if infile1.endswith('.gz') else 2 if infile1.endswith('.bz2') else 0
	]
	params.readNameSeparator = '.'
	params.outFileNamePrefix = outdir + '/'
	params.outSAMtype        = [outfmt.upper(), 'Unsorted']
	Shell(equal = ' ').star(**params).run()
	
	starout = path.join(outdir, "Aligned.out.{}".format(outfmt))
	if path.isfile(starout):
		shell.mv(starout, outfile)
Exemplo n.º 5
0
def run_strelka():
	# config
	cfgParams.bam            = infile
	cfgParams.referenceFasta = ref
	cfgParams.runDir         = joboutdir
	Shell().strelka(**cfgParams).run()

	# run the pipeline
	params.m = 'local'
	params.j = nthread
	params.g = mem2(mem, 'G')[:-1]
	Shell({'runWorkflow': path.join(joboutdir, 'runWorkflow.py')}).runWorkflow(**params).run()

	# mv output file to desired outfile
	ofile = path.join(joboutdir, 'results', 'variants', 'genome.S1.vcf.gz')
	shell.mv(ofile, outfile + '.gz')
	if not gz: shell.gunzip(outfile + '.gz')
Exemplo n.º 6
0
def vcfIndex(vcf, tabix='tabix'):

    # /path/to/some.vcf -> some.vcf
    # /path/to/some.vcf.gz -> some.vcf
    bname = path.basename(
        vcf[:-3]) if vcf.endswith('.gz') else path.basename(vcf)
    # /path/to/some.bam -> /path/to/
    dname = path.dirname(vcf)
    # some.vcf -> some
    # some.vcf.gz -> some
    fname = path.splitext(bname)[0]
    # some -> some
    # [1]some -> some
    rname = fname.split(']', 1)[1] if fname.startswith('[') else fname

    expectedIndex = path.join(dname, rname + '.vcf.gz.tbi')
    if path.isfile(expectedIndex):
        return vcf

    # if vcf is not a link, there is nowhere else to find index, create it using tabix
    tabix = shell.Shell({'tabix': tabix}).tabix
    gt = gztype(vcf)
    if gt == 'bgzip':
        if path.islink(vcf):
            linkvcf = path.readlink(vcf)
            if path.isfile(linkvcf + '.tbi'):
                shell.ln_s(linkvcf + '.tbi', expectedIndex)
                return vcf
            realvcf = path.realpath(vcf)
            if path.isfile(realvcf + '.tbi'):
                shell.ln_s(realvcf + '.tbi', expectedIndex)
                return vcf
        tabix(p='vcf', _=vcf).run()
        return vcf
    if gt == 'gzip':
        tmpvcf = path.join(dname, bname + '.tmp.vcf')
        shell.gunzip_to(vcf, tmpvcf)
        shell.bgzip(tmpvcf)
        tabix(p='vcf', _=tmpvcf + '.gz').run()
        shell.mv(tmpvcf + '.gz.tbi', expectedIndex)
        return vcf
    shell.bgzip(vcf, c=True, _stdout=vcf + '.gz')
    tabix(p='vcf', _=vcf + '.gz').run()
    return vcf + '.gz'
Exemplo n.º 7
0
def run_sambamba():
	global infile
	if not (steps.sort or steps.index or steps.markdup or steps.rmdup):
		subshell.sambamba.view(S = True, f = 'bam', o = outfile, t = nthread, _ = infile).run()
	else:
		bamfile = outfile
		if infmt == 'sam':
			bamfile = path.join(joboutdir, inprefix + '.s2b.bam')
			subshell.sambamba.view(S = True, f = 'bam', o = bamfile, t = nthread, _ = infile).run()
			infile = bamfile
		if steps.sort:
			if sortby == 'queryname':
				params.n = True
				params.N = True
			bamfile       = path.join(joboutdir, inprefix + '.sorted.bam')
			params.m      = argsmem
			params.tmpdir = tmpdir
			params.o      = bamfile
			params.t      = nthread
			params._      = infile
			subshell.sambamba.sort(**params).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.markdup:
			bamfile = path.join(joboutdir, inprefix + '.dedup.bam')
			subshell.sambamba.markdup(r = steps.rmdup, t = nthread, tmpdir = tmpdir, _ = [infile, bamfile]).run()
			if infile != {{i.infile | quote}}:
				shell.rm(f = True, _ = infile)
			infile = bamfile
		if steps.index:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			else:
				subshell.sambamba.index(t = nthread, _ = [infile, infile + '.bai'])
		if infile != outfile:
			if path.exists(infile + '.bai'):
				shell.mv(infile + '.bai', outfile + '.bai')
			shell.mv(infile, outfile)