def saveRandomPair(self, possiblePairs): ''' @param possiblePairs - a list of possible pairs that could be saved @param parent - the parent of origin for these pairs, can be SET indicating a mix of pairs that already have it set ''' if len(possiblePairs) > 1: if self.isRandomFilter: #pick a random pair rv = random.randint(0, len(possiblePairs) - 1) #random choice MergeImprove.setTag(possiblePairs[rv][0], MergeImprove.CHOICE_TYPE_TAG, 'R') MergeImprove.setTag(possiblePairs[rv][1], MergeImprove.CHOICE_TYPE_TAG, 'R') #save the pair self.saveRead(possiblePairs[rv][0]) self.saveRead(possiblePairs[rv][1]) else: for pair in possiblePairs: MergeImprove.setTag(pair[0], MergeImprove.CHOICE_TYPE_TAG, 'K') MergeImprove.setTag(pair[1], MergeImprove.CHOICE_TYPE_TAG, 'K') self.saveRead(pair[0]) self.saveRead(pair[1]) else: #save the pair self.saveRead(possiblePairs[0][0]) self.saveRead(possiblePairs[0][1])
def saveRandomPair(self, possiblePairs): ''' @param possiblePairs - a list of possible pairs that could be saved @param parent - the parent of origin for these pairs, can be SET indicating a mix of pairs that already have it set ''' if len(possiblePairs) > 1: if self.isRandomFilter: #pick a random pair rv = random.randint(0, len(possiblePairs)-1) #random choice MergeImprove.setTag(possiblePairs[rv][0], MergeImprove.CHOICE_TYPE_TAG, 'R') MergeImprove.setTag(possiblePairs[rv][1], MergeImprove.CHOICE_TYPE_TAG, 'R') #save the pair self.saveRead(possiblePairs[rv][0]) self.saveRead(possiblePairs[rv][1]) else: for pair in possiblePairs: MergeImprove.setTag(pair[0], MergeImprove.CHOICE_TYPE_TAG, 'K') MergeImprove.setTag(pair[1], MergeImprove.CHOICE_TYPE_TAG, 'K') self.saveRead(pair[0]) self.saveRead(pair[1]) else: #save the pair self.saveRead(possiblePairs[0][0]) self.saveRead(possiblePairs[0][1])
def saveRandomSingle(self, possibleSingles): ''' @param possibleSingles - a list of possible singles that could be saved @param parent - the parent of origin for these singles, can be SET indicating a mix of pairs that already have it set ''' if len(possibleSingles) > 1: if self.isRandomFilter: #pick a random single and save it rv = random.randint(0, len(possibleSingles)-1) MergeImprove.setTag(possibleSingles[rv], MergeImprove.CHOICE_TYPE_TAG, 'R') self.saveRead(possibleSingles[rv]) else: for single in possibleSingles: MergeImprove.setTag(single, MergeImprove.CHOICE_TYPE_TAG, 'K') self.saveRead(single) else: self.saveRead(possibleSingles[0])
def saveRandomSingle(self, possibleSingles): ''' @param possibleSingles - a list of possible singles that could be saved @param parent - the parent of origin for these singles, can be SET indicating a mix of pairs that already have it set ''' if len(possibleSingles) > 1: if self.isRandomFilter: #pick a random single and save it rv = random.randint(0, len(possibleSingles) - 1) MergeImprove.setTag(possibleSingles[rv], MergeImprove.CHOICE_TYPE_TAG, 'R') self.saveRead(possibleSingles[rv]) else: for single in possibleSingles: MergeImprove.setTag(single, MergeImprove.CHOICE_TYPE_TAG, 'K') self.saveRead(single) else: self.saveRead(possibleSingles[0])
def handlePostPileupMerge(self, reads): ''' This function compares a group of alignments and decides which one to keep @param reads - a set of reads with the same name to be compared using pileup ''' avgSum = 0 [pairs, singles] = MergeImprove.pairReads(reads, MergeImprove.PILEUP_HI_TAG) if len(pairs) != 0: bestAvgPileup = -1 bestPairs = [] for pair in pairs: [tot1, bases1] = self.calcPileupStats(pair[0]) [tot2, bases2] = self.calcPileupStats(pair[1]) avgPileup = float(tot1 + tot2) / (bases1 + bases2) avgSum += avgPileup if avgPileup > bestAvgPileup: bestAvgPileup = avgPileup bestPairs = [] if avgPileup == bestAvgPileup: bestPairs.append(pair) #stats if len(bestPairs) == 1: MergeImprove.setTag(bestPairs[0][0], MergeImprove.CHOICE_TYPE_TAG, 'P') MergeImprove.setTag(bestPairs[0][1], MergeImprove.CHOICE_TYPE_TAG, 'P') #save pileup stats if (bestAvgPileup == 0): self.percentageChoice[0] += 2 else: self.percentageChoice[int(100 * bestAvgPileup / avgSum)] += 2 #save one of the best pileup pairs self.saveRandomPair(bestPairs) else: #do this over singles bestAvgPileup = {} bestReads = {} avgSum = {False: 0, True: 0} for read in singles: #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below isFirst = MergeImprove.isFlagSet( read.flag, MergeImprove.FIRST_SEGMENT_FLAG) if not bestAvgPileup.has_key(isFirst): bestAvgPileup[isFirst] = -1 bestReads[isFirst] = [] #get the pileup calculation [tot, bases] = self.calcPileupStats(read) avgPileup = float(tot) / bases avgSum[isFirst] += avgPileup #if it's better, keep it if avgPileup > bestAvgPileup[isFirst]: bestAvgPileup[isFirst] = avgPileup bestReads[isFirst] = [] if avgPileup == bestAvgPileup[isFirst]: bestReads[isFirst].append(read) #save the best from each end for end in bestReads: brs = bestReads[end] if len(brs) == 1: MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG, 'P') if bestAvgPileup[end] == 0: self.percentageChoice[0] += 1 else: self.percentageChoice[int(100 * bestAvgPileup[end] / avgSum[end])] += 1 self.saveRandomSingle(brs)
def handlePostPileupMerge(self, reads): ''' This function compares a group of alignments and decides which one to keep @param reads - a set of reads with the same name to be compared using pileup ''' avgSum = 0 [pairs, singles] = MergeImprove.pairReads(reads, MergeImprove.PILEUP_HI_TAG) if len(pairs) != 0: bestAvgPileup = -1 bestPairs = [] for pair in pairs: [tot1, bases1] = self.calcPileupStats(pair[0]) [tot2, bases2] = self.calcPileupStats(pair[1]) avgPileup = float(tot1+tot2)/(bases1+bases2) avgSum += avgPileup if avgPileup > bestAvgPileup: bestAvgPileup = avgPileup bestPairs = [] if avgPileup == bestAvgPileup: bestPairs.append(pair) #stats if len(bestPairs) == 1: MergeImprove.setTag(bestPairs[0][0], MergeImprove.CHOICE_TYPE_TAG, 'P') MergeImprove.setTag(bestPairs[0][1], MergeImprove.CHOICE_TYPE_TAG, 'P') #save pileup stats if(bestAvgPileup == 0): self.percentageChoice[0] += 2 else: self.percentageChoice[int(100*bestAvgPileup/avgSum)] += 2 #save one of the best pileup pairs self.saveRandomPair(bestPairs) else: #do this over singles bestAvgPileup = {} bestReads = {} avgSum = {False: 0, True: 0} for read in singles: #if there's nothing yet for this sequence, set it's best as -1 so it gets overwritten below isFirst = MergeImprove.isFlagSet(read.flag, MergeImprove.FIRST_SEGMENT_FLAG) if not bestAvgPileup.has_key(isFirst): bestAvgPileup[isFirst] = -1 bestReads[isFirst] = [] #get the pileup calculation [tot, bases] = self.calcPileupStats(read) avgPileup = float(tot)/bases avgSum[isFirst] += avgPileup #if it's better, keep it if avgPileup > bestAvgPileup[isFirst]: bestAvgPileup[isFirst] = avgPileup bestReads[isFirst] = [] if avgPileup == bestAvgPileup[isFirst]: bestReads[isFirst].append(read) #save the best from each end for end in bestReads: brs = bestReads[end] if len(brs) == 1: MergeImprove.setTag(brs[0], MergeImprove.CHOICE_TYPE_TAG, 'P') if bestAvgPileup[end] == 0: self.percentageChoice[0] += 1 else: self.percentageChoice[int(100*bestAvgPileup[end]/avgSum[end])] += 1 self.saveRandomSingle(brs)