################################################################################ ## Perform RNASeQC analysis ################################################################################ args['tophatBam'] = os.path.join(args['tophatSampleDir'], 'accepted_hits.bam') args['mdupBam'] = os.path.join(args['tophatSampleDir'], args['name'] + '_tophat_mdup.bam') args['mdupLog'] = os.path.join(args['tophatSampleDir'], args['name'] + '_mdup.log') args['seqcLog'] = os.path.join(args['tophatSampleDir'], args['name'] + '_rnaseqc.log') # Create command to mark duplicates markDupCommand = picard.markDuplicates(inBam=args['tophatBam'], outBam=args['mdupBam'], logFile=args['mdupLog'], removeDuplicates=False, picardPath=paths['picard'], javaPath=paths['java'], delete=True) # Create command to peform RNASeqC seqcCommand = bamQC.RNASeqC(inBam=args['mdupBam'], fasta=args['<bwt2genindex>'] + '.fa', gtf=args['<gtf>'], rRNA=args['<rrna>'], outDir=args['tophatSampleDir'], outPrefix=args['name'], seqcPath=paths['rnaseqc'], javaPath=paths['java'], singleEnd=args['--singleend']) # Combine mark, index and RNASeqC commands seqcComboCommand = '%s && %s' % (markDupCommand, seqcCommand)
pmDict[('samtools', 'modules')], memory=7, stdout=alignLog, stderr=alignLog) # Dedup individual files and store outputs if args['--dedupindv']: # Create file names dedupLog = os.path.join(args['<logfolder>'], 'rg{}.dedup.log'.format(readgroup)) dedupBam = args['<outbam>'][:-4] + '.rg{}.dedup.bam'.format(readgroup) # Create command for marking duplicates dedupCommand = picard.markDuplicates(inBam=alignBam, outBam=dedupBam, logFile=dedupLog + '1', picardPath=pmDict[('picard', 'path')], javaPath='java', removeDuplicates=False, delete=True, memory=20) # Add command to dictionary dedupJobID = jobObject.add(command=dedupCommand, processors=4, modules=pmDict[('picard', 'modules')], memory=6, stdout=dedupLog + '2', stderr=dedupLog + '2', depend=[alignJobID]) # Store output BAM files and job IDs bamList.append(dedupBam) jobList.append(dedupJobID)
################################################################################ ## Perform RNASeQC analysis ################################################################################ args['tophatBam'] = os.path.join(args['tophatSampleDir'], 'accepted_hits.bam') args['mdupBam'] = os.path.join(args['tophatSampleDir'], args['name'] + '_tophat_mdup.bam') args['mdupLog'] = os.path.join(args['tophatSampleDir'], args['name'] + '_mdup.log') args['seqcLog'] = os.path.join(args['tophatSampleDir'], args['name'] + '_rnaseqc.log') # Create command to mark duplicates markDupCommand = picard.markDuplicates( inBam = args['tophatBam'], outBam = args['mdupBam'], logFile = args['mdupLog'], removeDuplicates = False, picardPath = paths['picard'], javaPath = paths['java'], delete = True ) # Create command to peform RNASeqC seqcCommand = bamQC.RNASeqC( inBam = args['mdupBam'], fasta = args['<bwt2genindex>'] + '.fa', gtf = args['<gtf>'], rRNA = args['<rrna>'], outDir = args['tophatSampleDir'], outPrefix = args['name'], seqcPath = paths['rnaseqc'], javaPath = paths['java'], singleEnd = args['--singleend']
############################################################################### ## Mark duplicates in Tophat output ############################################################################### args['tophatBam'] = os.path.join(args['tophatSampleDir'], 'accepted_hits.bam') args['mdupBam'] = os.path.join(args['tophatSampleDir'], args['name'] + '_tophat2_mdup.bam') args['mdupLog1'] = os.path.join(args['tophatSampleDir'], args['name'] + '_mdup.log1') args['mdupLog2'] = os.path.join(args['tophatSampleDir'], args['name'] + '_mdup.log2') # Create command to mark duplicates markDupCommand = picard.markDuplicates( inBam = args['tophatBam'], outBam = args['mdupBam'], logFile = args['mdupLog1'], removeDuplicates = False, picardPath = pmDict[('picard', 'path')], delete = True, memory = 10 ) # Submit mark duplicates job ID markDupJobID = slurmJobs.add( command = markDupCommand, stdout = args['mdupLog2'], stderr = args['mdupLog2'], depend = [tophat2AlignJobID], memory = 12, modules = pmDict[('picard', 'modules')] ) ###############################################################################
modules=pmDict[('bowtie2', 'modules')] + pmDict[('samtools', 'modules')], depend=trimJobList, stderr=bowtie2Log) # Delete trim files if generated if args['--trim']: rmCommand = 'rm {}'.format(' '.join(read1List + read2List)) slurmJobs.add(rmCommand, depend=qcJobList + [bowtie2JobID]) # Mark duplicates in BAM files dedupLog = args['<logprefix>'] + '.dedup.log' dedupCommand = picard.markDuplicates(inBam=unmarkedBAM, outBam=args['<outbam>'], logFile=dedupLog + '1', picardPath=pmDict[('picard', 'path')], javaPath='java', removeDuplicates=False, delete=True, memory=30) # Add job to queue dedupJobID = slurmJobs.add(command=dedupCommand, processors=6, memory=6, stdout=dedupLog + '2', stderr=dedupLog + '2', modules=pmDict[('picard', 'modules')], depend=[bowtie2JobID]) #Submit jobs slurmJobs.submit(verbose=True, check_sub=False)
'realignlog' : logPrefix + '_realign.log', 'recallog' : logPrefix + '_recal.log' } # Generate command for alignment alignCommand = fastqAlign.bwaMemAlign( index = args['<index>'], outFile = outfiles['initialbam'], read1 = read1[0], read2 = read2[0], bwaPath = paths['bwa'], threads = args['--threads'], sampleName = args['name'], libraryID = args['prefix'], readGroup = 1, platform = 'ILLUMINA', markSecondary = True, check = True, samtoolsPath = paths['samtools'], memory = 2, nameSort = False ) # Mark duplicates using picard dedupCommand = picard.markDuplicates( inBam = outfiles['initialbam'], outBam = outfiles['dedupbam'], logFile = outfiles['deduplog1'], picardPath = paths['picard'], javaPath = paths['java'], removeDuplicates = True, delete = True ) # Perform local realignment realignCommand = gatk.gatkRealign( inBam = outfiles['dedupbam'], outBam = outfiles['realignbam'], inVcf = args['<indelvcf>'], reference = args['<index>'], javaPath = paths['java'], gatkPath = paths['gatk'], delete = True, threads = 4, listFile = outfiles['listfile'] ) recalCommand = gatk.bsqr( inBam = outfiles['realignbam'], outBam = outfiles['recalbam'], inVcf = args['<snpvcf>'], reference = args['<index>'], bsqrTable = outfiles['bsqrfile'], javaPath = paths['java'], gatkPath = paths['gatk'], delete = True )