def testInvalidatesEmpty(self): """ The list of titles invalidated by an earlier title that didn't invalidate anything must be empty. """ rsf = ReadSetFilter(0.5) self.assertEqual([], rsf.invalidates("title1"))
def testInvalidatesEmpty(self): """ The list of titles invalidated by an earlier title that didn't invalidate anything must be empty. """ rsf = ReadSetFilter(0.5) self.assertEqual([], rsf.invalidates('title1'))
def testDifferentSet(self): """ Testing for acceptance against a read set filter that has seen a set should return C{True} if the new set is totally different. """ rsf = ReadSetFilter(1.0) rsf.accept("title1", self.makeTitleAlignments(0)) self.assertTrue(rsf.accept("title2", self.makeTitleAlignments(1)))
def testRepeatTitle(self): """ Testing for acceptance on a title that has been seen before (in an accepted read set) must raise C{AssertionError}. """ rsf = ReadSetFilter(0.5) rsf.accept("title1", self.makeTitleAlignments(0, 1, 2, 3, 4)) self.assertRaises(AssertionError, rsf.accept, "title1", self.makeTitleAlignments())
def testDifferentSet(self): """ Testing for acceptance against a read set filter that has seen a set should return C{True} if the new set is totally different. """ rsf = ReadSetFilter(1.0) rsf.accept('title1', self.makeTitleAlignments(0)) self.assertTrue(rsf.accept('title2', self.makeTitleAlignments(1)))
def testFirstUse(self): """ Testing for acceptance against a read set filter that has not been used should return C{True}. """ titleAlignments = self.makeTitleAlignments() rsf = ReadSetFilter(0.9) self.assertTrue(rsf.accept('title1', titleAlignments))
def testFirstUse(self): """ Testing for acceptance against a read set filter that has not been used should return C{True}. """ titleAlignments = self.makeTitleAlignments() rsf = ReadSetFilter(0.9) self.assertTrue(rsf.accept("title1", titleAlignments))
def testRepeatTitle(self): """ Testing for acceptance on a title that has been seen before (in an accepted read set) must raise C{AssertionError}. """ rsf = ReadSetFilter(0.5) rsf.accept('title1', self.makeTitleAlignments(0, 1, 2, 3, 4)) self.assertRaises(AssertionError, rsf.accept, 'title1', self.makeTitleAlignments())
def testDuplicateSingleReadZeroThreshold(self): """ Testing for acceptance against a read set filter that has already seen the exact set should return C{True} if the C{minNew} threshold is zero. """ rsf = ReadSetFilter(0.0) rsf.accept('title1', self.makeTitleAlignments(0)) self.assertTrue(rsf.accept('title2', self.makeTitleAlignments(0)))
def testDuplicateSingleRead(self): """ Testing for acceptance against a read set filter that has already seen the exact set should return C{False} if the C{minNew} threshold is non-zero. """ rsf = ReadSetFilter(0.9) rsf.accept('title1', self.makeTitleAlignments(0)) self.assertFalse(rsf.accept('title2', self.makeTitleAlignments(0)))
def testThresholdRoundsUp(self): """ Testing for acceptance should round up the needed number of new reads. """ rsf = ReadSetFilter(0.5) rsf.accept("title1", self.makeTitleAlignments(0, 1, 2, 3, 4)) # If we pass a read set of size three, two of the reads will need to be # different. self.assertFalse(rsf.accept("title2", self.makeTitleAlignments(0, 1, 6)))
def testInsufficientlyDifferent(self): """ Testing for acceptance against a read set filter that has seen several sets should return C{False} if the new set is insufficiently different. """ rsf = ReadSetFilter(0.5) rsf.accept("title1", self.makeTitleAlignments(0, 1, 2, 3, 4)) rsf.accept("title2", self.makeTitleAlignments(5, 6, 7, 8, 9)) self.assertFalse(rsf.accept("title3", self.makeTitleAlignments(0, 1, 2, 11)))
def testDuplicateSingleReadZeroThreshold(self): """ Testing for acceptance against a read set filter that has already seen the exact set should return C{True} if the C{minNew} threshold is zero. """ rsf = ReadSetFilter(0.0) rsf.accept("title1", self.makeTitleAlignments(0)) self.assertTrue(rsf.accept("title2", self.makeTitleAlignments(0)))
def testInsufficientlyDifferent(self): """ Testing for acceptance against a read set filter that has seen several sets should return C{False} if the new set is insufficiently different. """ rsf = ReadSetFilter(0.5) rsf.accept('title1', self.makeTitleAlignments(0, 1, 2, 3, 4)) rsf.accept('title2', self.makeTitleAlignments(5, 6, 7, 8, 9)) self.assertFalse(rsf.accept('title3', self.makeTitleAlignments(0, 1, 2, 11)))
def testThresholdRoundsUp(self): """ Testing for acceptance should round up the needed number of new reads. """ rsf = ReadSetFilter(0.5) rsf.accept('title1', self.makeTitleAlignments(0, 1, 2, 3, 4)) # If we pass a read set of size three, two of the reads will need to be # different. self.assertFalse(rsf.accept('title2', self.makeTitleAlignments(0, 1, 6)))
def testSufficientlyDifferent(self): """ Testing for acceptance against a read set filter that has seen several sets should return C{True} if the new set is sufficiently different. """ rsf = ReadSetFilter(0.5) rsf.accept('title1', self.makeTitleAlignments(0, 1, 2, 3, 4)) rsf.accept('title2', self.makeTitleAlignments(5, 6, 7, 8, 9)) self.assertTrue(rsf.accept('title3', self.makeTitleAlignments(0, 1, 2, 5, 6, 7)))
def testInvalidates(self): """ It must be possible to retrieve the list of titles that were invalidated by an earlier title's read set. """ rsf = ReadSetFilter(0.5) rsf.accept('title1', self.makeTitleAlignments(0)) rsf.accept('title2', self.makeTitleAlignments(0)) rsf.accept('title3', self.makeTitleAlignments(1)) rsf.accept('title4', self.makeTitleAlignments(0)) self.assertEqual(['title2', 'title4'], rsf.invalidates('title1'))
def testInvalidates(self): """ It must be possible to retrieve the list of titles that were invalidated by an earlier title's read set. """ rsf = ReadSetFilter(0.5) rsf.accept("title1", self.makeTitleAlignments(0)) rsf.accept("title2", self.makeTitleAlignments(0)) rsf.accept("title3", self.makeTitleAlignments(1)) rsf.accept("title4", self.makeTitleAlignments(0)) self.assertEqual(["title2", "title4"], rsf.invalidates("title1"))
def filter(self, minMatchingReads=None, minMedianScore=None, withScoreBetterThan=None, minNewReads=None, minCoverage=None, maxTitles=None, sortOn='maxScore'): """ Filter the titles in self to create another TitlesAlignments. @param minMatchingReads: titles that are matched by fewer reads are unacceptable. @param minMedianScore: sequences that are matched with a median bit score that is less are unacceptable. @param withScoreBetterThan: if the best score for a title is not as good as this value, the title is not acceptable. @param minNewReads: The C{float} fraction of its reads by which a new title's read set must differ from the read sets of all previously seen titles in order for this title to be considered acceptably different (and therefore interesting). @param minCoverage: The C{float} minimum fraction of the title sequence that must be matched by at least one read. @param maxTitles: A non-negative C{int} maximum number of titles to keep. If more titles than this are present, titles will be sorted (according to C{sortOn}) and only the best will be retained. @param sortOn: A C{str} attribute to sort on, used only if C{maxTitles} is not C{None}. See the C{sortTitles} method below for the legal values. @raise: C{ValueError} if C{maxTitles} is less than zero or the value of C{sortOn} is unknown. @return: A new L{TitlesAlignments} instance containing only the matching titles. """ # Use a ReadSetFilter only if we're checking that read sets are # sufficiently new. if minNewReads is None: readSetFilter = None else: if self.readSetFilter is None: self.readSetFilter = ReadSetFilter(minNewReads) readSetFilter = self.readSetFilter result = TitlesAlignments(self.readsAlignments, self.scoreClass, self.readSetFilter, importReadsAlignmentsTitles=False) if maxTitles is not None and len(self) > maxTitles: if maxTitles < 0: raise ValueError('maxTitles (%r) cannot be negative.' % maxTitles) else: # There are too many titles. Make a sorted list of them so # we loop through them (below) in the desired order and can # break when/if we've reached the maximum. We can't just # take the first maxTitles titles from the sorted list now, # as some of those titles might later be discarded by the # filter and then we'd return a result with fewer titles # than we should. titles = self.sortTitles(sortOn) else: titles = self.keys() for title in titles: # Test max titles up front, as it may be zero. if maxTitles is not None and len(result) == maxTitles: break titleAlignments = self[title] if (minMatchingReads is not None and titleAlignments.readCount() < minMatchingReads): continue # To compare the median score with another score, we must # convert both values to instances of the score class used in # this data set so they can be compared without us needing to # know if numerically greater scores are considered better or # not. if (minMedianScore is not None and self.scoreClass(titleAlignments.medianScore()) < self.scoreClass(minMedianScore)): continue if (withScoreBetterThan is not None and not titleAlignments.hasScoreBetterThan(withScoreBetterThan) ): continue if (minCoverage is not None and titleAlignments.coverage() < minCoverage): continue if (readSetFilter and not readSetFilter.accept(title, titleAlignments)): continue result.addTitle(title, titleAlignments) return result