def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 3. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) NGSTools.writeCommands(command, bamFileDir + '/filterBam_' + sampleName + '.sh', run=_run) ###################### 4. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each lane to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 2.2. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ####################### 4. call SNP ###################### if MPILEUP: SNP_out = os.path.join(args.outDir, "SNP", sampleName) NGSTools._mkdir(SNP_out) command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName) NGSTools.writeCommands(command, SNP_out+'/bcftools_call_'+sampleName+'.sh', run=_run) outVcf = rawVcf.replace("vcf$", "flt.vcf") command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg) NGSTools.writeCommands(command, SNP_out+'/bcftools_filter_'+sampleName+'.sh', run=_run)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg) NGSTools.writeCommands(command, args.outDir + '/mapping/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, args.outDir + '/mapping/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ##################### 4. DMR calling ################## if Methylation_extractor: NGSTools.methylation_extractor(finalBamFilePath, bamFileDir + '/' + sampleName, cfg)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 3. filter bam ###################### command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 4. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName) # merge the bam from each line to one final bam command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg) NGSTools.writeCommands(command, args.outDir+'/mapping/picard_mergebam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg) NGSTools.writeCommands(command, args.outDir+'/mapping/picard_rmdup_'+sampleName+'.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ##################### 4. DMR calling ################## if Methylation_extractor: NGSTools.methylation_extractor(finalBamFilePath, bamFileDir+'/'+sampleName, cfg)
# GATK HC (in RNA-seq mode) call variations outdir = os.path.join(args.outDir, 'variation') NGSTools._mkdir(outdir) for condition in set(finalBam.values()): bams = [] for bam in finalBam.keys(): if finalBam[bam] == condition: bams.append(bam) out = os.path.join(outdir, condition) NGSTools._mkdir(out) mergedBam = '%s/%s.bam' % (out, condition) script = NGSTools.picard_merge(bams, mergedBam, cfg) NGSTools.writeCommands(script, '%s/picard_merge_%s.sh' % (out, condition), False) script = NGSTools.GATK_HC(mergedBam, cfg, out, condition) NGSTools.writeCommands(script, '%s/gatk_HC_%s.sh' % (out, condition), False) rawVcf = '%s/%s.raw.snps.indels.vcf' % (out, condition) fltVcf = '%s/%s.flt.snps.indels.vcf' % (out, condition) script = NGSTools.GATK_filter(mergedBam, rawVcf, fltVcf, cfg) NGSTools.writeCommands(script,
# GATK HC (in RNA-seq mode) call variations outdir = os.path.join(args.outDir, 'variation') NGSTools._mkdir(outdir) for condition in set(finalBam.values()): bams = [] for bam in finalBam.keys(): if finalBam[bam] == condition: bams.append(bam) out = os.path.join(outdir, condition) NGSTools._mkdir(out) mergedBam = '%s/%s.bam' % (out, condition) script = NGSTools.picard_merge(bams, mergedBam, cfg) NGSTools.writeCommands(script, '%s/picard_merge_%s.sh' % (out, condition), False) script = NGSTools.GATK_HC(mergedBam, cfg, out, condition) NGSTools.writeCommands(script, '%s/gatk_HC_%s.sh' % (out, condition), False) rawVcf = '%s/%s.raw.snps.indels.vcf' % (out, condition) fltVcf = '%s/%s.flt.snps.indels.vcf' % (out, condition) script = NGSTools.GATK_filter(mergedBam, rawVcf, fltVcf, cfg) NGSTools.writeCommands(script, '%s/gatk_filter_%s.sh' % (out, condition), False)
def processSample(sampleName, sampleNameDict): '''pipeline for sample ''' # process communication mng = Manager() libraryBamFileList = mng.list() # This libraryBamFileList is a list contains seval bams from one sample. # init mult-processer record = [] for sampleID in sampleNameDict: Processer = Process(name=sampleID, target=processSampleFromLibarary, args=( sampleID, sampleNameDict[sampleID], libraryBamFileList, )) Processer.start() record.append(Processer) # wait for processer for proc in record: proc.join() ###################### 2.1 post mapping #################### bamFileDir = os.path.dirname(libraryBamFileList[0]) mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName) finalBamFilePath = mergedBamFilePath # merge the bam from each lane to one final bam command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_mergebam_' + sampleName + '.sh', run=_run) ###################### 2.2. filter bam ###################### #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath) #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run) ###################### 3. romove duplicates ################## if RMDUP: command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg) NGSTools.writeCommands(command, bamFileDir + '/picard_rmdup_' + sampleName + '.sh', run=_run) finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath) ####################### 4. call SNP ###################### if MPILEUP: SNP_out = os.path.join(args.outDir, "SNP", sampleName) NGSTools._mkdir(SNP_out) command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName) NGSTools.writeCommands(command, SNP_out + '/bcftools_call_' + sampleName + '.sh', run=_run) outVcf = rawVcf.replace("vcf$", "flt.vcf") command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg) NGSTools.writeCommands(command, SNP_out + '/bcftools_filter_' + sampleName + '.sh', run=_run)