trim = args.cutSite, minLength = args.minLength ) # Print trim metrics print '\nTrim Metrics:\n\t%s\n\t%s\n\t%s\n\t%s' %( 'total: ' + str(trimMetrics['total']), 'too short: ' + str(trimMetrics['short']), 'read1 trim: ' + str(trimMetrics['trim1']), 'read2 trim: ' + str(trimMetrics['trim2']) ) # Generate align command alignCommand = fastqAlign.bwaMemAlign( index = args.bwaFasta, outFile = args.nameSortBam, read1 = args.outFastq, bwaPath = args.bwa, threads = str(args.threads), markSecondary = True, check = True, nameSort = True ) # Merge commands and run subprocess.check_output(alignCommand, shell = True, stderr=subprocess.STDOUT) ############################################################################### ## Extract aligned pairs ############################################################################### # extract pairs from alignments alignMetrics, pairMetrics = alignedPair.extractPairs( inBam = args.nameSortBam, pairOut = args.outPairs, minMapQ = args.minMapQ,
itertools.izip_longest(read1List, read2List)): # Modify and format read group and library id readgroup = format(readgroup + 1, '03d') # Create file names for alignment alignLog = os.path.join(args['<logfolder>'], 'rg{}.align.log'.format(readgroup)) alignBam = args['<outbam>'][:-4] + '.rg{}.sort.bam'.format(readgroup) # Generate command for alignment alignCommand = fastqAlign.bwaMemAlign( index=os.path.abspath(args['<index>']), outFile=alignBam, read1=read1, read2=read2, bwaPath=pmDict[('bwa', 'path')], threads=args['<threads>'], sampleName=args['--samplename'], libraryID=args['--samplename'], readGroup=readgroup, platform=args['--platform'], markSecondary=True, check=True, samtoolsPath=pmDict[('samtools', 'path')], memory=6, nameSort=False) # Add job to queue alignJobID = jobObject.add(command=alignCommand, processors=args['<threads>'], modules=pmDict[('bwa', 'modules')] + pmDict[('samtools', 'modules')], memory=7, stdout=alignLog, stderr=alignLog)
'dedupbam' : bamPrefix + '_dedup.bam', 'realignbam' : bamPrefix + '_dedup_realign.bam', 'recalbam' : bamPrefix + '_dedup_realign_recal.bam', 'listfile' : logPrefix + '_target.list', 'bsqrfile' : logPrefix + '_bsqr.grp', 'alignlog' : logPrefix + '_align.log', 'deduplog1' : logPrefix + '_dedup_1.log', 'deduplog2' : logPrefix + '_dedup_2.log', 'realignlog' : logPrefix + '_realign.log', 'recallog' : logPrefix + '_recal.log' } # Generate command for alignment alignCommand = fastqAlign.bwaMemAlign( index = args['<index>'], outFile = outfiles['initialbam'], read1 = read1[0], read2 = read2[0], bwaPath = paths['bwa'], threads = args['--threads'], sampleName = args['name'], libraryID = args['prefix'], readGroup = 1, platform = 'ILLUMINA', markSecondary = True, check = True, samtoolsPath = paths['samtools'], memory = 2, nameSort = False ) # Mark duplicates using picard dedupCommand = picard.markDuplicates( inBam = outfiles['initialbam'], outBam = outfiles['dedupbam'], logFile = outfiles['deduplog1'], picardPath = paths['picard'], javaPath = paths['java'], removeDuplicates = True, delete = True ) # Perform local realignment realignCommand = gatk.gatkRealign( inBam = outfiles['dedupbam'], outBam = outfiles['realignbam'], inVcf = args['<indelvcf>'], reference = args['<index>'], javaPath = paths['java'], gatkPath = paths['gatk'], delete = True, threads = 4, listFile = outfiles['listfile']