示例#1
0
    trim = args.cutSite,
    minLength = args.minLength
)
# Print trim metrics
print '\nTrim Metrics:\n\t%s\n\t%s\n\t%s\n\t%s' %(
    'total: ' + str(trimMetrics['total']),
    'too short: ' + str(trimMetrics['short']),
    'read1 trim: ' + str(trimMetrics['trim1']),
    'read2 trim: ' + str(trimMetrics['trim2'])
)
# Generate align command
alignCommand = fastqAlign.bwaMemAlign(
    index = args.bwaFasta,
    outFile = args.nameSortBam,
    read1 = args.outFastq,
    bwaPath = args.bwa,
    threads = str(args.threads),
    markSecondary = True,
    check = True,
    nameSort = True
)
# Merge commands and run
subprocess.check_output(alignCommand, shell = True, stderr=subprocess.STDOUT)

###############################################################################
## Extract aligned pairs
###############################################################################
# extract pairs from alignments
alignMetrics, pairMetrics = alignedPair.extractPairs(
    inBam = args.nameSortBam,
    pairOut = args.outPairs,
    minMapQ = args.minMapQ,
示例#2
0
     itertools.izip_longest(read1List, read2List)):
 # Modify and format read group and library id
 readgroup = format(readgroup + 1, '03d')
 # Create file names for alignment
 alignLog = os.path.join(args['<logfolder>'],
                         'rg{}.align.log'.format(readgroup))
 alignBam = args['<outbam>'][:-4] + '.rg{}.sort.bam'.format(readgroup)
 # Generate command for alignment
 alignCommand = fastqAlign.bwaMemAlign(
     index=os.path.abspath(args['<index>']),
     outFile=alignBam,
     read1=read1,
     read2=read2,
     bwaPath=pmDict[('bwa', 'path')],
     threads=args['<threads>'],
     sampleName=args['--samplename'],
     libraryID=args['--samplename'],
     readGroup=readgroup,
     platform=args['--platform'],
     markSecondary=True,
     check=True,
     samtoolsPath=pmDict[('samtools', 'path')],
     memory=6,
     nameSort=False)
 # Add job to queue
 alignJobID = jobObject.add(command=alignCommand,
                            processors=args['<threads>'],
                            modules=pmDict[('bwa', 'modules')] +
                            pmDict[('samtools', 'modules')],
                            memory=7,
                            stdout=alignLog,
                            stderr=alignLog)
    'dedupbam' : bamPrefix + '_dedup.bam',
    'realignbam' : bamPrefix + '_dedup_realign.bam',
    'recalbam' : bamPrefix + '_dedup_realign_recal.bam',
    'listfile' : logPrefix + '_target.list',
    'bsqrfile' : logPrefix + '_bsqr.grp',
    'alignlog' : logPrefix + '_align.log',
    'deduplog1' : logPrefix + '_dedup_1.log',
    'deduplog2' : logPrefix + '_dedup_2.log',
    'realignlog' : logPrefix + '_realign.log',
    'recallog' : logPrefix + '_recal.log'
}
# Generate command for alignment
alignCommand = fastqAlign.bwaMemAlign(
    index = args['<index>'], outFile = outfiles['initialbam'],
    read1 = read1[0], read2 = read2[0], bwaPath = paths['bwa'],
    threads = args['--threads'], sampleName = args['name'],
    libraryID = args['prefix'], readGroup = 1, platform = 'ILLUMINA',
    markSecondary = True, check = True, samtoolsPath = paths['samtools'],
    memory = 2, nameSort = False
)
# Mark duplicates using picard
dedupCommand = picard.markDuplicates(
    inBam = outfiles['initialbam'], outBam = outfiles['dedupbam'],
    logFile = outfiles['deduplog1'], picardPath = paths['picard'],
    javaPath = paths['java'], removeDuplicates = True, delete = True
)
# Perform local realignment
realignCommand = gatk.gatkRealign(
    inBam = outfiles['dedupbam'], outBam = outfiles['realignbam'],
    inVcf = args['<indelvcf>'], reference = args['<index>'],
    javaPath = paths['java'], gatkPath = paths['gatk'], delete = True,
    threads = 4, listFile = outfiles['listfile']