Пример #1
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
    mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

    # merge the bam from each line to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           bamFileDir + '/picard_mergebam_' + sampleName +
                           '.sh',
                           run=_run)

    ###################### 3. filter bam  ######################
    #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath,
                                                       finalBamFilePath)
    NGSTools.writeCommands(command,
                           bamFileDir + '/filterBam_' + sampleName + '.sh',
                           run=_run)

    ###################### 4. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               bamFileDir + '/picard_rmdup_' + sampleName +
                               '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
Пример #2
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
	mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

	# merge the bam from each lane to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
	NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run)


	###################### 2.2. filter bam  ######################
	#command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	#command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	#NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

	###################### 3. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg)
		NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)


	####################### 4. call SNP ######################
	if MPILEUP:
		SNP_out = os.path.join(args.outDir, "SNP", sampleName)
		NGSTools._mkdir(SNP_out)

		command, rawVcf = NGSTools.bcftools_call(finalBamFilePath, cfg, outdir=SNP_out, sampleName=sampleName)
		NGSTools.writeCommands(command, SNP_out+'/bcftools_call_'+sampleName+'.sh', run=_run)
		
		outVcf = rawVcf.replace("vcf$", "flt.vcf")
		command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg)
		NGSTools.writeCommands(command, SNP_out+'/bcftools_filter_'+sampleName+'.sh', run=_run)
Пример #3
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName)

    # merge the bam from each line to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           args.outDir + '/mapping/picard_mergebam_' +
                           sampleName + '.sh',
                           run=_run)

    ###################### 3. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               args.outDir + '/mapping/picard_rmdup_' +
                               sampleName + '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

    ##################### 4. DMR calling ##################
    if Methylation_extractor:
        NGSTools.methylation_extractor(finalBamFilePath,
                                       bamFileDir + '/' + sampleName, cfg)
Пример #4
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_properly.bam" % sampleName)
	mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)

	# merge the bam from each line to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
	NGSTools.writeCommands(command, bamFileDir+'/picard_mergebam_'+sampleName+'.sh', run=_run)


	###################### 3. filter bam  ######################
	command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
	NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

	###################### 4. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
		NGSTools.writeCommands(command, bamFileDir+'/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)
Пример #5
0
def processSample(sampleName, sampleNameDict):
	'''pipeline for sample '''

	# process communication
	mng = Manager()
	libraryBamFileList = mng.list()
	# This libraryBamFileList is a list contains seval bams from one sample.

	# init mult-processer
	record = []

	for sampleID in sampleNameDict:

		Processer = Process(name = sampleID, target = processSampleFromLibarary, args = (sampleID, sampleNameDict[sampleID], libraryBamFileList, ))

		Processer.start()

		record.append(Processer)

	# wait for processer
	for proc in record:
		proc.join()
		
	
	######################	2.1 post mapping  ####################
	bamFileDir = os.path.dirname(libraryBamFileList[0])
	finalBamFilePath = os.path.join(bamFileDir, "%s_final.bam" % sampleName)

	# merge the bam from each line to one final bam
	command = NGSTools.picard_merge(libraryBamFileList, finalBamFilePath, cfg)
	NGSTools.writeCommands(command, args.outDir+'/mapping/picard_mergebam_'+sampleName+'.sh', run=_run)

	###################### 3. romove duplicates ##################
	if RMDUP:
		command = NGSTools.picard_rmdup(finalBamFilePath, True, cfg)
		NGSTools.writeCommands(command, args.outDir+'/mapping/picard_rmdup_'+sampleName+'.sh', run=_run)

		finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

	
	##################### 4. DMR calling ##################
	if Methylation_extractor:
		NGSTools.methylation_extractor(finalBamFilePath, bamFileDir+'/'+sampleName, cfg)
Пример #6
0
    # GATK HC (in RNA-seq mode) call variations
    outdir = os.path.join(args.outDir, 'variation')
    NGSTools._mkdir(outdir)

    for condition in set(finalBam.values()):
        bams = []

        for bam in finalBam.keys():
            if finalBam[bam] == condition:
                bams.append(bam)

        out = os.path.join(outdir, condition)
        NGSTools._mkdir(out)

        mergedBam = '%s/%s.bam' % (out, condition)
        script = NGSTools.picard_merge(bams, mergedBam, cfg)

        NGSTools.writeCommands(script,
                               '%s/picard_merge_%s.sh' % (out, condition),
                               False)

        script = NGSTools.GATK_HC(mergedBam, cfg, out, condition)

        NGSTools.writeCommands(script, '%s/gatk_HC_%s.sh' % (out, condition),
                               False)

        rawVcf = '%s/%s.raw.snps.indels.vcf' % (out, condition)
        fltVcf = '%s/%s.flt.snps.indels.vcf' % (out, condition)
        script = NGSTools.GATK_filter(mergedBam, rawVcf, fltVcf, cfg)

        NGSTools.writeCommands(script,
Пример #7
0
	# GATK HC (in RNA-seq mode) call variations
	outdir = os.path.join(args.outDir, 'variation')
	NGSTools._mkdir(outdir)

	for condition in set(finalBam.values()):
		bams = []

		for bam in finalBam.keys():
			if finalBam[bam] == condition:
				bams.append(bam)

		out = os.path.join(outdir, condition)
		NGSTools._mkdir(out)

		mergedBam = '%s/%s.bam' % (out, condition)
		script = NGSTools.picard_merge(bams, mergedBam, cfg)

		NGSTools.writeCommands(script, '%s/picard_merge_%s.sh' % (out, condition), False)


		script = NGSTools.GATK_HC(mergedBam, cfg, out, condition)

		NGSTools.writeCommands(script, '%s/gatk_HC_%s.sh' % (out, condition), False)	


		rawVcf = '%s/%s.raw.snps.indels.vcf' % (out, condition)
		fltVcf = '%s/%s.flt.snps.indels.vcf' % (out, condition)
		script = NGSTools.GATK_filter(mergedBam, rawVcf, fltVcf, cfg)

		NGSTools.writeCommands(script, '%s/gatk_filter_%s.sh' % (out, condition), False) 
Пример #8
0
def processSample(sampleName, sampleNameDict):
    '''pipeline for sample '''

    # process communication
    mng = Manager()
    libraryBamFileList = mng.list()
    # This libraryBamFileList is a list contains seval bams from one sample.

    # init mult-processer
    record = []

    for sampleID in sampleNameDict:

        Processer = Process(name=sampleID,
                            target=processSampleFromLibarary,
                            args=(
                                sampleID,
                                sampleNameDict[sampleID],
                                libraryBamFileList,
                            ))

        Processer.start()

        record.append(Processer)

    # wait for processer
    for proc in record:
        proc.join()

    ######################	2.1 post mapping  ####################
    bamFileDir = os.path.dirname(libraryBamFileList[0])
    mergedBamFilePath = os.path.join(bamFileDir, "%s_merged.bam" % sampleName)
    finalBamFilePath = mergedBamFilePath

    # merge the bam from each lane to one final bam
    command = NGSTools.picard_merge(libraryBamFileList, mergedBamFilePath, cfg)
    NGSTools.writeCommands(command,
                           bamFileDir + '/picard_mergebam_' + sampleName +
                           '.sh',
                           run=_run)

    ###################### 2.2. filter bam  ######################
    #command = 'samtools view -Sb -h -f 2 -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    #command = 'samtools view -Sb -h -q 10 %s > %s ' % (mergedBamFilePath, finalBamFilePath)
    #NGSTools.writeCommands(command, bamFileDir+'/filterBam_'+sampleName+'.sh', run=_run)

    ###################### 3. romove duplicates ##################
    if RMDUP:
        command = NGSTools.picard_rmdup(mergedBamFilePath, True, cfg)
        NGSTools.writeCommands(command,
                               bamFileDir + '/picard_rmdup_' + sampleName +
                               '.sh',
                               run=_run)

        finalBamFilePath = re.sub(r'.bam$', '.rmdup.bam', finalBamFilePath)

    ####################### 4. call SNP ######################
    if MPILEUP:
        SNP_out = os.path.join(args.outDir, "SNP", sampleName)
        NGSTools._mkdir(SNP_out)

        command, rawVcf = NGSTools.bcftools_call(finalBamFilePath,
                                                 cfg,
                                                 outdir=SNP_out,
                                                 sampleName=sampleName)
        NGSTools.writeCommands(command,
                               SNP_out + '/bcftools_call_' + sampleName +
                               '.sh',
                               run=_run)

        outVcf = rawVcf.replace("vcf$", "flt.vcf")
        command = NGSTools.bcftools_filter(rawVcf, outVcf, cfg)
        NGSTools.writeCommands(command,
                               SNP_out + '/bcftools_filter_' + sampleName +
                               '.sh',
                               run=_run)