示例#1
0
    def saveRead(self, readToSave):
        '''
        @param readToSave - the read that should be added to the output
        @param parent - 1, 2, 3, or SET; 1, 2, and 3 indicate the parent type, SET means it's already added, don't add again
        @param out - whether you want to see output from this save or not
        '''
        if readToSave == None:
            return

        #save the read
        self.outputFile.write(readToSave)

        choice = MergeImprove.getTag(readToSave, MergeImprove.CHOICE_TYPE_TAG)
        parent = MergeImprove.getTag(readToSave,
                                     MergeImprove.PARENT_OF_ORIGIN_TAG)

        #TODO: remove this print error
        if parent == None or choice == None:
            print 'ERROR:' + str(readToSave)
        else:
            if self.statistics.has_key(choice) and parent < len(
                    self.statistics[choice]):
                self.statistics[choice][parent] += 1
                self.statistics['tot'][parent] += 1
            else:
                print 'Poorly structured tag:'
                print self.statistics
                print readToSave

        if MergeImprove.verbosity:
            logger.info('Saving from ' + MergeImprove.getTag(
                readToSave, MergeImprove.PARENT_OF_ORIGIN_TAG) + ': ' +
                        str(readToSave))
示例#2
0
 def saveRead(self, readToSave):
     '''
     @param readToSave - the read that should be added to the output
     @param parent - 1, 2, 3, or SET; 1, 2, and 3 indicate the parent type, SET means it's already added, don't add again
     @param out - whether you want to see output from this save or not
     '''
     if readToSave == None:
         return
     
     #save the read
     self.outputFile.write(readToSave)
     
     choice = MergeImprove.getTag(readToSave, MergeImprove.CHOICE_TYPE_TAG)
     parent = MergeImprove.getTag(readToSave, MergeImprove.PARENT_OF_ORIGIN_TAG)
     
     #TODO: remove this print error
     if parent == None or choice == None:
         print 'ERROR:'+str(readToSave)
     else:
         if self.statistics.has_key(choice) and parent < len(self.statistics[choice]):
             self.statistics[choice][parent] += 1
             self.statistics['tot'][parent] += 1
         else:
             print 'Poorly structured tag:'
             print self.statistics
             print readToSave
         
     if MergeImprove.verbosity:
         logger.info('Saving from '+MergeImprove.getTag(readToSave, MergeImprove.PARENT_OF_ORIGIN_TAG)+': '+str(readToSave))
示例#3
0
    def saveRandomPair(self, possiblePairs):
        '''
        @param possiblePairs - a list of possible pairs that could be saved
        @param parent - the parent of origin for these pairs, can be SET indicating a mix of pairs that already have it set
        '''
        if len(possiblePairs) > 1:
            if self.isRandomFilter:
                #pick a random pair
                rv = random.randint(0, len(possiblePairs) - 1)

                #random choice
                MergeImprove.setTag(possiblePairs[rv][0],
                                    MergeImprove.CHOICE_TYPE_TAG, 'R')
                MergeImprove.setTag(possiblePairs[rv][1],
                                    MergeImprove.CHOICE_TYPE_TAG, 'R')

                #save the pair
                self.saveRead(possiblePairs[rv][0])
                self.saveRead(possiblePairs[rv][1])
            else:
                for pair in possiblePairs:
                    MergeImprove.setTag(pair[0], MergeImprove.CHOICE_TYPE_TAG,
                                        'K')
                    MergeImprove.setTag(pair[1], MergeImprove.CHOICE_TYPE_TAG,
                                        'K')

                    self.saveRead(pair[0])
                    self.saveRead(pair[1])
        else:
            #save the pair
            self.saveRead(possiblePairs[0][0])
            self.saveRead(possiblePairs[0][1])
示例#4
0
 def saveRandomPair(self, possiblePairs):
     '''
     @param possiblePairs - a list of possible pairs that could be saved
     @param parent - the parent of origin for these pairs, can be SET indicating a mix of pairs that already have it set
     '''
     if len(possiblePairs) > 1:
         if self.isRandomFilter:
             #pick a random pair
             rv = random.randint(0, len(possiblePairs)-1)
         
             #random choice
             MergeImprove.setTag(possiblePairs[rv][0], MergeImprove.CHOICE_TYPE_TAG, 'R')
             MergeImprove.setTag(possiblePairs[rv][1], MergeImprove.CHOICE_TYPE_TAG, 'R')
             
             #save the pair
             self.saveRead(possiblePairs[rv][0])
             self.saveRead(possiblePairs[rv][1])
         else:
             for pair in possiblePairs:
                 MergeImprove.setTag(pair[0], MergeImprove.CHOICE_TYPE_TAG, 'K')
                 MergeImprove.setTag(pair[1], MergeImprove.CHOICE_TYPE_TAG, 'K')
                 
                 self.saveRead(pair[0])
                 self.saveRead(pair[1])
     else:
         #save the pair
         self.saveRead(possiblePairs[0][0])
         self.saveRead(possiblePairs[0][1])
示例#5
0
 def saveRandomSingle(self, possibleSingles):
     '''
     @param possibleSingles - a list of possible singles that could be saved
     @param parent - the parent of origin for these singles, can be SET indicating a mix of pairs that already have it set
     '''
     if len(possibleSingles) > 1:
         if self.isRandomFilter:
             #pick a random single and save it
             rv = random.randint(0, len(possibleSingles)-1)
             MergeImprove.setTag(possibleSingles[rv], MergeImprove.CHOICE_TYPE_TAG, 'R')
             self.saveRead(possibleSingles[rv])
         else:
             for single in possibleSingles:
                 MergeImprove.setTag(single, MergeImprove.CHOICE_TYPE_TAG, 'K')
                 self.saveRead(single)
     else:
         self.saveRead(possibleSingles[0])
示例#6
0
 def saveRandomSingle(self, possibleSingles):
     '''
     @param possibleSingles - a list of possible singles that could be saved
     @param parent - the parent of origin for these singles, can be SET indicating a mix of pairs that already have it set
     '''
     if len(possibleSingles) > 1:
         if self.isRandomFilter:
             #pick a random single and save it
             rv = random.randint(0, len(possibleSingles) - 1)
             MergeImprove.setTag(possibleSingles[rv],
                                 MergeImprove.CHOICE_TYPE_TAG, 'R')
             self.saveRead(possibleSingles[rv])
         else:
             for single in possibleSingles:
                 MergeImprove.setTag(single, MergeImprove.CHOICE_TYPE_TAG,
                                     'K')
                 self.saveRead(single)
     else:
         self.saveRead(possibleSingles[0])
示例#7
0
    def __init__(self, sharedDict, resultsQueue, outputFilename,
                 isRandomFilter, outHeader, workerID, keepAll, numInputs):
        '''
        @param sharedDict - a dictionary full of multiprocessing.Arrays
        @param resultsQueue - the shared queue for storing statistics on the alignments from each worker
        @param outputFilename - the overall output filename where the final merge will be, worker won't use this unless a single process scenario
        @param isRandomFilter - boolean determining whether we random or union at the end
        @param outHeader - the header to use in any output files
        @param workerID - integer ID, 0 means master
        '''
        global logger
        logger = MergeImprove.getLogger()

        #this first
        multiprocessing.Process.__init__(self)

        #my vars
        self.sharedDict = sharedDict

        #always get these stats
        '''
        self.statistics = {'K':{'1':0,'2':0,'3':0},
                           'U':{'1':0,'2':0,'3':0},
                           'Q':{'1':0,'2':0,'3':0},
                           'P':{'1':0,'2':0,'3':0},
                           'R':{'1':0,'2':0,'3':0},
                           'tot':{'1':0,'2':0,'3':0}}
        '''
        cts = ['K', 'U', 'Q', 'P', 'R', 'tot']
        self.statistics = {}
        maxPO = 2**numInputs
        for ct in cts:
            self.statistics[ct] = [0 for x in range(0, maxPO)]

        self.percentageChoice = [0 for x in range(0, 101)]

        #save the inputs from the init
        self.resultsQueue = resultsQueue
        self.baseOutputFN = outputFilename
        self.outHeader = outHeader
        self.isRandomFilter = isRandomFilter

        self.pileupTempFN = outputFilename + '.tmp' + str(
            workerID) + '.bam' + '.pileup_tmp.bam'
        self.outputFN = outputFilename + '.tmp' + str(
            workerID) + '.bam' + '.pileup_complete.bam'
        self.workerID = workerID
        self.keepAll = keepAll
示例#8
0
 def __init__(self, sharedDict, resultsQueue, outputFilename, isRandomFilter, outHeader, workerID, keepAll, numInputs):
     '''
     @param sharedDict - a dictionary full of multiprocessing.Arrays
     @param resultsQueue - the shared queue for storing statistics on the alignments from each worker
     @param outputFilename - the overall output filename where the final merge will be, worker won't use this unless a single process scenario
     @param isRandomFilter - boolean determining whether we random or union at the end
     @param outHeader - the header to use in any output files
     @param workerID - integer ID, 0 means master
     '''
     global logger
     logger = MergeImprove.getLogger()
     
     #this first
     multiprocessing.Process.__init__(self)
     
     #my vars
     self.sharedDict = sharedDict
     
     #always get these stats
     '''
     self.statistics = {'K':{'1':0,'2':0,'3':0},
                        'U':{'1':0,'2':0,'3':0},
                        'Q':{'1':0,'2':0,'3':0},
                        'P':{'1':0,'2':0,'3':0},
                        'R':{'1':0,'2':0,'3':0},
                        'tot':{'1':0,'2':0,'3':0}}
     '''
     cts = ['K', 'U', 'Q', 'P', 'R', 'tot']
     self.statistics = {}
     maxPO = 2**numInputs
     for ct in cts:
         self.statistics[ct] = [0 for x in range(0, maxPO)]
     
     self.percentageChoice = [0 for x in range(0, 101)]
     
     #save the inputs from the init
     self.resultsQueue = resultsQueue
     self.baseOutputFN = outputFilename
     self.outHeader = outHeader
     self.isRandomFilter = isRandomFilter
     
     self.pileupTempFN = outputFilename+'.tmp'+str(workerID)+'.bam'+'.pileup_tmp.bam'
     self.outputFN = outputFilename+'.tmp'+str(workerID)+'.bam'+'.pileup_complete.bam'
     self.workerID = workerID
     self.keepAll = keepAll
示例#9
0
    def handlePostPileupMerge(self, reads):
        '''
        This function compares a group of alignments and decides which one to keep
        @param reads - a set of reads with the same name to be compared using pileup
        '''
        avgSum = 0
        [pairs, singles] = MergeImprove.pairReads(reads,
                                                  MergeImprove.PILEUP_HI_TAG)

        if len(pairs) != 0:
            bestAvgPileup = -1
            bestPairs = []

            for pair in pairs:
                [tot1, bases1] = self.calcPileupStats(pair[0])
                [tot2, bases2] = self.calcPileupStats(pair[1])

                avgPileup = float(tot1 + tot2) / (bases1 + bases2)
                avgSum += avgPileup

                if avgPileup > bestAvgPileup:
                    bestAvgPileup = avgPileup
                    bestPairs = []

                if avgPileup == bestAvgPileup:
                    bestPairs.append(pair)

            #stats
            if len(bestPairs) == 1:
                MergeImprove.setTag(bestPairs[0][0],
                                    MergeImprove.CHOICE_TYPE_TAG, 'P')
                MergeImprove.setTag(bestPairs[0][1],
                                    MergeImprove.CHOICE_TYPE_TAG, 'P')

                #save pileup stats
                if (bestAvgPileup == 0):
                    self.percentageChoice[0] += 2
                else:
                    self.percentageChoice[int(100 * bestAvgPileup /
                                              avgSum)] += 2

            #save one of the best pileup pairs
            self.saveRandomPair(bestPairs)

        else:
            #do this over singles
            bestAvgPileup = {}
            bestReads = {}
            avgSum = {False: 0, True: 0}

            for read in singles:
                #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below
                isFirst = MergeImprove.isFlagSet(
                    read.flag, MergeImprove.FIRST_SEGMENT_FLAG)
                if not bestAvgPileup.has_key(isFirst):
                    bestAvgPileup[isFirst] = -1
                    bestReads[isFirst] = []

                #get the pileup calculation
                [tot, bases] = self.calcPileupStats(read)
                avgPileup = float(tot) / bases
                avgSum[isFirst] += avgPileup

                #if it's better, keep it
                if avgPileup > bestAvgPileup[isFirst]:
                    bestAvgPileup[isFirst] = avgPileup
                    bestReads[isFirst] = []

                if avgPileup == bestAvgPileup[isFirst]:
                    bestReads[isFirst].append(read)

            #save the best from each end
            for end in bestReads:
                brs = bestReads[end]

                if len(brs) == 1:
                    MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG,
                                        'P')
                    if bestAvgPileup[end] == 0:
                        self.percentageChoice[0] += 1
                    else:
                        self.percentageChoice[int(100 * bestAvgPileup[end] /
                                                  avgSum[end])] += 1

                self.saveRandomSingle(brs)
示例#10
0
    def handlePostPileupMerge(self, reads):
        '''
        This function compares a group of alignments and decides which one to keep
        @param reads - a set of reads with the same name to be compared using pileup
        '''
        avgSum = 0
        [pairs, singles] = MergeImprove.pairReads(reads, MergeImprove.PILEUP_HI_TAG)
        
        if len(pairs) != 0:
            bestAvgPileup = -1
            bestPairs = []
            
            for pair in pairs:
                [tot1, bases1] = self.calcPileupStats(pair[0])
                [tot2, bases2] = self.calcPileupStats(pair[1])
                
                avgPileup = float(tot1+tot2)/(bases1+bases2)
                avgSum += avgPileup
                
                if avgPileup > bestAvgPileup:
                    bestAvgPileup = avgPileup
                    bestPairs = []
                
                if avgPileup == bestAvgPileup:
                    bestPairs.append(pair)
            
            #stats
            if len(bestPairs) == 1:
                MergeImprove.setTag(bestPairs[0][0], MergeImprove.CHOICE_TYPE_TAG, 'P')
                MergeImprove.setTag(bestPairs[0][1], MergeImprove.CHOICE_TYPE_TAG, 'P')
            
                #save pileup stats
                if(bestAvgPileup == 0):
                    self.percentageChoice[0] += 2
                else:
                    self.percentageChoice[int(100*bestAvgPileup/avgSum)] += 2
                
            #save one of the best pileup pairs
            self.saveRandomPair(bestPairs)
            
        else:
            #do this over singles
            bestAvgPileup = {}
            bestReads = {}
            avgSum = {False: 0, True: 0}
            
            for read in singles:
                #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below
                isFirst = MergeImprove.isFlagSet(read.flag, MergeImprove.FIRST_SEGMENT_FLAG)
                if not bestAvgPileup.has_key(isFirst):
                    bestAvgPileup[isFirst] = -1
                    bestReads[isFirst] = []
                
                #get the pileup calculation
                [tot, bases] = self.calcPileupStats(read)
                avgPileup = float(tot)/bases
                avgSum[isFirst] += avgPileup
                
                #if it's better, keep it
                if avgPileup > bestAvgPileup[isFirst]:
                    bestAvgPileup[isFirst] = avgPileup
                    bestReads[isFirst] = []
                
                if avgPileup == bestAvgPileup[isFirst]:    
                    bestReads[isFirst].append(read)
            
            #save the best from each end
            for end in bestReads:
                brs = bestReads[end]
                
                if len(brs) == 1:
                    MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG, 'P')
                    if bestAvgPileup[end] == 0:
                        self.percentageChoice[0] += 1
                    else:
                        self.percentageChoice[int(100*bestAvgPileup[end]/avgSum[end])] += 1

                self.saveRandomSingle(brs)