def testRepeatedFilter_MinStartThenMaxstop(self): """ It must be possible to filter alignments multiple times using different filter parameters. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) readsAlignments.filter(minStart=9000) readsAlignments.filter(maxStop=12000) result = list(readsAlignments) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(2, len(result[0]))
def testClearFilter(self): """ It must be possible to clear any filtering that has been applied. """ def result(_): return BZ2([PARAMS, RECORD0, RECORD1, RECORD2]) with patch.object(bz2, 'BZ2File') as mockMethod: mockMethod.side_effect = result readsAlignments = LightReadsAlignments('file.json.bz2', DB) self.assertEqual(3, len(list(readsAlignments))) readsAlignments.filter(minSequenceLen=14) readsAlignments.filter(maxSequenceLen=16) readsAlignments.filter(scoreCutoff=0.05) result = list(readsAlignments) self.assertEqual(1, len(result)) readsAlignments.clearFilter() self.assertEqual(3, len(list(readsAlignments)))
def testReadIdNoMatches(self): """ When filtering on alignments based on a regex for read ids that matches no ids, an empty generator must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='blah')) self.assertEqual(0, len(result))
def testLimitZero(self): """ If L{LightReadsAlignments} is limited to zero results, that limit must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(limit=0)) self.assertEqual(0, len(result))
def testTitleByNegativeRegexMatchesAll(self): """ Filtering with a negative title regex that matches all alignments must remove everything and return an empty result. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(negativeTitleRegex='pox')) self.assertEqual(0, len(result))
def testReadIdCaseSensitive(self): """ Filtering alignments based on a regex for read ids must be case sensitive. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='^READ0$')) self.assertEqual(0, len(result))
def testTitleByRegexCaseInvariant(self): """ Filtering with a title regex must work independent of case. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(titleRegex='sqUIRRel')) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testLimitOne(self): """ If L{LightReadsAlignments} is limited to one hit, that limit must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0 + RECORD1) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(limit=1)) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testNoResultNoFilteringArgs(self): """ If the L{LightReadsAlignments} filter function is called with no arguments, and there are no hits, it should produce a generator that yields no result. """ mockOpener = mockOpen(read_data=PARAMS) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter()) self.assertEqual(0, len(result))
def testMinTitleSequenceLengthNoHits(self): """ It must be possible to filter alignments based on minimum hit sequence length and if nothing sufficiently long matches, an empty list of alignments must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minSequenceLen=1000000)) self.assertEqual(0, len(result))
def testReadIdAnchored(self): """ It must be possible to filter alignments based on a regex for read ids that is anchored at start and end. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='^read0$')) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testMaxStopNoHits(self): """ It must be possible to filter alignments based on maximum offset in the hit sequence, and if no hsps match then an empty result set must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(maxStop=100)) self.assertEqual(0, len(result))
def testReadId(self): """ It must be possible to filter alignments based on a regex for read ids. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='read[12]')) self.assertEqual(2, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual('read2', result[1].read.id)
def testOneAlignmentPerRead(self): """ If L{LightReadsAlignments} is asked to deliver only the best alignment for each read, that must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(oneAlignmentPerRead=True)) self.assertEqual(1, len(result)) self.assertEqual(1, len(result[0])) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testTitleByRegexOneAlignments(self): """ Filtering with a title regex must work in the case that only some alignments for a hit match the regex. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(titleRegex='Mummy')) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(MUMMYPOX.id, result[0][0].subjectTitle)
def testMinStartAndMaxstop(self): """ It must be possible to filter alignments based simultaneously on mininum and maximum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minStart=9000, maxStop=12000)) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(2, len(result[0]))
def testOneHitNoFilteringArgs(self): """ If the L{LightReadsAlignments} filter function is called with no arguments, and there is one hit, it should produce a generator that yields that hit. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter()) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testMaxStop(self): """ It must be possible to filter alignments based on maximum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(maxStop=1500)) self.assertEqual(1, len(result)) self.assertEqual('read2', result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual(COWPOX.id, result[0][0].subjectTitle)
def testMaxTitleSequenceLengthNoHits(self): """ It must be possible to filter alignments based on maximum hit sequence length and if no sufficiently short sequences match, an empty list of alignments must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(maxSequenceLen=len(COWPOX.sequence) - 1)) self.assertEqual(0, len(result))
def testMinTitleSequenceLength(self): """ It must be possible to filter alignments based on minimum hit sequence length. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(minSequenceLen=len(MUMMYPOX.sequence))) self.assertEqual(1, len(result)) self.assertEqual(READ1.id, result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual(MUMMYPOX.id, result[0][0].subjectTitle)
def testMinStart(self): """ It must be possible to filter alignments based on minimum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minStart=15300)) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual('Squirrelpox virus 1296/99', result[0][0].subjectTitle)
def testTitleByRegexMatchingAllWithBlacklist(self): """ Filtering with a title regex that matches all alignments must keep everything, except for any blacklisted titles. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) blacklist = [SQUIRRELPOX.id, CATPOX.id] result = list( readsAlignments.filter(titleRegex='pox', blacklist=blacklist)) self.assertEqual(2, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual('read2', result[1].read.id)
def testTitleByNegativeRegexOneAlignment(self): """ Filtering with a negative title regex must work in the case that only some alignments for a hit are ruled out (in which case only those alignments must be removed but the hit is still valid). """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(negativeTitleRegex='Mummy')) self.assertEqual(3, len(result)) self.assertEqual('read1', result[1].read.id) self.assertEqual(1, len(result[1])) self.assertEqual(MONKEYPOX.id, result[1][0].subjectTitle)
def testScoreCutoffRemovesEntireAlignment(self): """ If the L{LightReadsAlignments} filter function is supposed to filter on a scoreCutoff and the cut-off value results in an alignment with no HSPs, then the alignment must be removed entirely. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(scoreCutoff=READ0_SQUIRRELPOX_SCORE - 0.01)) self.assertEqual(1, len(result)) self.assertEqual(1, len(result[0])) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testTitleByNegativeRegexMatchingAllWithWhitelist(self): """ Filtering with a negative title regex that matches all alignments must remove everything and result in no hits, except for any whitelisted titles. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) title = SQUIRRELPOX.id result = list( readsAlignments.filter(negativeTitleRegex='pox', whitelist=[title])) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual(title, result[0][0].subjectTitle)