Пример #1
0
 def testBooleanWhenNotEmpty(self):
     """
     The queue must test False when empty.
     """
     pq = PriorityQueue()
     pq.add(4)
     self.assertTrue(pq)
Пример #2
0
 def testLengthAfterAdd(self):
     """
     An queue with one thing in it must have a length of one.
     """
     pq = PriorityQueue()
     pq.add(3)
     self.assertEqual(1, len(pq))
Пример #3
0
 def testContains(self):
     """
     The __contains__ function must work as expected.
     """
     pq = PriorityQueue()
     pq.add('hey')
     self.assertTrue('hey' in pq)
     self.assertFalse('hi' in pq)
Пример #4
0
 def testLengthAfterDoubleAdd(self):
     """
     An queue with an item that is added twice must have a length of one.
     """
     pq = PriorityQueue()
     pq.add(3)
     pq.add(3)
     self.assertEqual(1, len(pq))
Пример #5
0
 def testPopRaisesOnEmpty(self):
     """
     The pop method must raise a KeyError if the queue is empty.
     """
     pq = PriorityQueue()
     error = 'pop from an empty priority queue'
     assertRaisesRegex(self, KeyError, error, pq.pop)
Пример #6
0
 def testLowestPriorityRaisesOnEmpty(self):
     """
     The lowestPriority method must raise a KeyError if the queue is empty.
     """
     pq = PriorityQueue()
     error = 'peek on an empty priority queue'
     assertRaisesRegex(self, KeyError, error, pq.lowestPriority)
Пример #7
0
 def testPop(self):
     """
     The queue pop method must return the lowest priority item.
     """
     pq = PriorityQueue()
     pq.add('hey', 9)
     pq.add('you', 7)
     pq.add('two', 8)
     self.assertEqual('you', pq.pop())
Пример #8
0
 def testLowestPriority(self):
     """
     The lowestPriority method must return the lowest priority without
     disturbing the length of the queue.
     """
     pq = PriorityQueue()
     pq.add('hey', 9)
     pq.add('you', 7)
     pq.add('two', 8)
     self.assertEqual(7, pq.lowestPriority())
     self.assertEqual(3, len(pq))
Пример #9
0
 def testRemove(self):
     """
     The remove method must work as expected.
     """
     pq = PriorityQueue()
     pq.add('hey')
     pq.remove('hey')
     self.assertFalse(pq)
     self.assertEqual(0, len(pq))
Пример #10
0
    def analyzeReferenceId(self, referenceId, alignmentFile, outputDir):
        """
        Analyze the given reference id in the given alignment file (if an
        alignment to the reference id is present).

        @param referenceId: The C{str} id of the reference sequence to analyze.
        @param alignmentFile: The C{str} name of an alignment file.
        @param outputDir: The C{str} name of the output directory.
        @return: C{None} if C{referenceId} is not present in C{alignmentFile}
            or if no significant offsets are found. Else, a C{dict} containing
            the signifcant offsets and the consensus sequence that best matches
            C{referenceId}.
        """
        analysis = self.initialReferenceIdAnalysis(referenceId, alignmentFile,
                                                   outputDir)

        if analysis:
            (genomeLength, alignedReads, readCountAtOffset, baseCountAtOffset,
             readsAtOffset, significantOffsets, samFilter,
             paddedSAM) = analysis
        else:
            return

        insignificantOffsets = set(
            range(genomeLength)) - set(significantOffsets)

        reference = self.referenceGenomes[referenceId]
        referenceSequence = reference.sequence

        consensus = []
        for base in referenceSequence:
            ob = OffsetBases()
            ob.incorporateBase(base)
            consensus.append(ob)

        readQueue = PriorityQueue()
        self.updatePriorityQueue(readQueue, alignedReads, consensus,
                                 significantOffsets)

        consensusFilename = join(outputDir, 'reference-consensus.sam')
        nonConsensusFilename = join(outputDir, 'reference-non-consensus.sam')
        self.report('    Writing consensus SAM to', consensusFilename)
        self.report('    Writing non-consensus SAM to', nonConsensusFilename)

        with samfile(alignmentFile) as sam:
            consensusAlignment = AlignmentFile(consensusFilename,
                                               mode='w',
                                               template=sam)
            nonConsensusAlignment = AlignmentFile(nonConsensusFilename,
                                                  mode='w',
                                                  template=sam)

        # Reads with no significant offsets get written to both output files.
        readsWithNoSignificantOffsetsCount = 0
        for read in alignedReads:
            if not read.significantOffsets:
                readsWithNoSignificantOffsetsCount += 1
                consensusAlignment.write(read.alignment)
                nonConsensusAlignment.write(read.alignment)

                for offset in insignificantOffsets:
                    base = read.base(offset)
                    if base is not None:
                        consensus[offset].incorporateBase(base)

        self.report('    %d read%s did not overlap any significant offsets' %
                    (readsWithNoSignificantOffsetsCount,
                     s(readsWithNoSignificantOffsetsCount)))

        readsMatchingConsensusCount = readsNotMatchingConsensusCount = 0
        cutoff = self.cutoff
        while readQueue:
            mismatchFraction, _ = readQueue.lowestPriority()
            read = readQueue.pop()
            if mismatchFraction <= cutoff:
                # We want this read. Incorporate it into the consensus.
                readsMatchingConsensusCount += 1
                consensusAlignment.write(read.alignment)
                affectedReads = set()
                for offset in read.significantOffsets:
                    readBase = read.base(offset)
                    consensus[offset].incorporateBase(readBase)
                    for readAtOffset in readsAtOffset[offset]:
                        if readAtOffset in readQueue:
                            affectedReads.add(readAtOffset)
                self.updatePriorityQueue(readQueue, affectedReads, consensus,
                                         significantOffsets)
            else:
                readsNotMatchingConsensusCount += 1
                nonConsensusAlignment.write(read.alignment)

        consensusAlignment.close()
        nonConsensusAlignment.close()

        self.report(
            '    %d read%s matched the consensus, %d did not.' %
            (readsMatchingConsensusCount, s(readsMatchingConsensusCount),
             readsNotMatchingConsensusCount))

        # Remove the reference bases from the consensus.
        for offset, base in enumerate(referenceSequence):
            consensus[offset].unincorporateBase(base)

        consensusInfoFilename = join(outputDir, 'reference-consensus.txt')
        self.report('    Writing consensus info to', consensusInfoFilename)

        with open(consensusInfoFilename, 'w') as fp:
            consensusSequence = []
            for offset in range(genomeLength):
                # Take a copy of the commonest set because we may pop from
                # it below.
                commonest = set(consensus[offset].commonest)
                referenceBase = referenceSequence[offset]

                if len(commonest) > 1:
                    nucleotides = ' Nucleotides: %s' % (
                        consensus[offset].baseCountsToStr())
                else:
                    nucleotides = ''

                if referenceBase in commonest:
                    consensusBase = referenceBase
                else:
                    if len(commonest) == 1:
                        # Nothing in the included reads covers this offset.
                        consensusBase = '-'
                    elif len(commonest) > 1:
                        # Report a draw (in which the reference base is not
                        # included and so cannot be used to break the draw).
                        commonest.pop()
                    else:
                        consensusBase = commonest.pop()

                consensusSequence.append(consensusBase)

                mismatch = '' if referenceBase == consensusBase else (
                    ' Mismatch (reference has %s)' % referenceBase)

                print('%d: %s%s%s' %
                      (offset + 1, consensusBase, mismatch, nucleotides),
                      file=fp)

        consensusRead = Read('gready-consensus-%s' % referenceId,
                             ''.join(consensusSequence))
        consensusFilename = join(outputDir, 'reference-consensus.fasta')
        self.report('    Writing gready consensus info to', consensusFilename)
        Reads([consensusRead]).save(consensusFilename)

        return {
            'consensusRead': consensusRead,
            'significantOffsets': significantOffsets,
        }
Пример #11
0
class DistanceCache(object):
    """
    Maintain a set of distances between objects, with lazy evaluation and
    removal from the set.

    @param distFunc: A function that computes the distance between two objects.
    """
    def __init__(self, distFunc):
        self._distFunc = distFunc
        self._distances = defaultdict(dict)
        self._pq = PriorityQueue()

    def distance(self, a, b):
        """
        Find the distance between a pair of objects.

        @param a: An immutable object.
        @param b: An immutable object.
        @return: The distance between C{a} and C{b}, according to the distance
            function passed to __init__.
        """
        return self._distances[a][b]

    def add(self, a):
        """
        Add an object.

        @param a: An immutable object.
        """
        assert a not in self._distances

        if self._distances:
            for b in list(self._distances):
                d = self._distFunc(a, b)
                self._distances[b][a] = self._distances[a][b] = d
                self._pq.add(_key(a, b), d)
        else:
            # This is the first element, so it has no distances to
            # anything.  Mention it to create its distance dictionary so it
            # will be found when subsequent elements are added.
            self._distances[a]

    def lowestDistance(self):
        """
        Get the lowest distance between any two clusters.

        @return: A C{float} distance.
        """
        try:
            return self._pq.lowestPriority()
        except KeyError:
            return None

    def pop(self):
        """
        Pop the lowest distance cluster pair.

        @raise KeyError: If the distance priority queue is empty.
        @return: A 2-C{tuple} of C{int} cluster numbers.
        """
        return self._pq.pop()

    def __contains__(self, pair):
        """
        Test if a pair has a computed distance (useful for testing).

        @param pair: A 2-tuple of objects.
        @return: A Boolean indicating membership.
        """
        return ((pair[0], pair[1]) in self._distances
                or (pair[1], pair[0]) in self._distances)

    def remove(self, a):
        """
        Remove an object.

        @param a: An object.
        """
        errorCount = 0
        for b in self._distances:
            if b != a:
                try:
                    self._pq.remove(_key(a, b))
                except KeyError:
                    # We allow one KeyError since 'a' has likely just been
                    # popped as part of the lowest scoring pain.
                    errorCount += 1
                    if errorCount > 1:
                        raise
                del self._distances[b][a]

        del self._distances[a]
Пример #12
0
 def __init__(self, distFunc):
     self._distFunc = distFunc
     self._distances = defaultdict(dict)
     self._pq = PriorityQueue()
Пример #13
0
 def testBooleanWhenEmpty(self):
     """
     The queue must test False when empty.
     """
     pq = PriorityQueue()
     self.assertFalse(pq)
Пример #14
0
 def testEmptyLength(self):
     """
     An empty queue must have a zero length.
     """
     pq = PriorityQueue()
     self.assertEqual(0, len(pq))