Пример #1
0
def main(recordFilenames, fastaFilename, title, xRange, bitRange):
    """
    Print reads that match in a specified X-axis and bit score range.

    @param recordFilenames: A C{list} of C{str} file names contain results of a
        BLAST run, in JSON format.
    @param fastaFilename: The C{str} name of the FASTA file that was originally
        BLASTed.
    @param title: The C{str} title of the subject sequence, as output by BLAST.
    @param xRange: A (start, end) list of C{int}s, giving an X-axis range or
        C{None} if the entire X axis range should be printed.
    @param bitRange: A (start, end) list of C{int}s, giving a bit score range
        or C{None} if the entire bit score range should be printed.
    """
    reads = FastaReads(fastaFilename)
    blastReadsAlignments = BlastReadsAlignments(reads, recordFilenames)
    filtered = blastReadsAlignments.filter(whitelist=set([title]),
                                           negativeTitleRegex='.')
    titlesAlignments = TitlesAlignments(filtered)

    if title not in titlesAlignments:
        print('%s: Title %r not found in BLAST output' % (sys.argv[0], title))
        sys.exit(3)

    for titleAlignment in titlesAlignments[title]:
        for hsp in titleAlignment.hsps:
            if ((xRange is None or (xRange[0] <= hsp.subjectEnd and
                                    xRange[1] >= hsp.subjectStart)) and
                (bitRange is None or (bitRange[0] <= hsp.score.score <=
                                      bitRange[1]))):
                print(('query: %s, start: %d, end: %d, score: %d' % (
                       titleAlignment.read.id, hsp.subjectStart,
                       hsp.subjectEnd, hsp.score.score)))
Пример #2
0
def main(recordFilenames, fastaFilename, title, xRange, bitRange):
    """
    Print reads that match in a specified X-axis and bit score range.

    @param recordFilenames: A C{list} of C{str} file names contain results of a
        BLAST run, in JSON format.
    @param fastaFilename: The C{str} name of the FASTA file that was originally
        BLASTed.
    @param title: The C{str} title of the subject sequence, as output by BLAST.
    @param xRange: A (start, end) list of C{int}s, giving an X-axis range or
        C{None} if the entire X axis range should be printed.
    @param bitRange: A (start, end) list of C{int}s, giving a bit score range
        or C{None} if the entire bit score range should be printed.
    """
    reads = FastaReads(fastaFilename)
    blastReadsAlignments = BlastReadsAlignments(reads, recordFilenames)
    filtered = blastReadsAlignments.filter(whitelist=set([title]),
                                           negativeTitleRegex='.')
    titlesAlignments = TitlesAlignments(filtered)

    if title not in titlesAlignments:
        print('%s: Title %r not found in BLAST output' % (sys.argv[0], title))
        sys.exit(3)

    for titleAlignment in titlesAlignments[title]:
        for hsp in titleAlignment.hsps:
            if ((xRange is None or (xRange[0] <= hsp.subjectEnd
                                    and xRange[1] >= hsp.subjectStart))
                    and (bitRange is None or
                         (bitRange[0] <= hsp.score.score <= bitRange[1]))):
                print(('query: %s, start: %d, end: %d, score: %d' %
                       (titleAlignment.read.id, hsp.subjectStart,
                        hsp.subjectEnd, hsp.score.score)))
Пример #3
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0011)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
         ], sorted(result.keys()))
Пример #4
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], sorted(result.keys()))
Пример #5
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], sorted(result.keys()))
Пример #6
0
 def testMedianScore_Bits(self):
     """
     Sorting on median score must work when scores are bit scores,
     including a secondary sort on title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         reads.add(Read('id4', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('medianScore')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 25
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 20
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 20
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 20
             ],
             result)
Пример #7
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Пример #8
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                         dumps(RECORD2) + '\n' +
                                         dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Пример #9
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                         dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Пример #10
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 1e-10
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 1e-8
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 1e-7
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 1e-6
             ],
             result)
Пример #11
0
 def testUnknown(self):
     """
     Sorting on an unknown attribute must raise C{ValueError}.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertRaises(ValueError, titlesAlignments.sortTitles, 'xxx')
Пример #12
0
 def testEmpty(self):
     """
     If passed an empty readsAlignments, titleCounts must return an
     empty dictionary.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         self.assertEqual({}, titleCounts(readsAlignments))
Пример #13
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Пример #14
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Пример #15
0
 def testDuplicatedTitle(self):
     """
     If alignments for reads have a common title, the count on that title
     must be correct.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD2) + '\n' +
                                      dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         self.assertEqual({
             'gi|887699|gb|DQ37780 Cowpox virus 15': 2,
         }, titleCounts(readsAlignments))
Пример #16
0
 def testAddTitle(self):
     """
     The addTitle function must add a title to the TitlesAlignments
     instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23'
         titleAlignments = TitleAlignments(title, 55)
         self.assertTrue(title not in titlesAlignments)
         titlesAlignments.addTitle(title, titleAlignments)
         self.assertTrue(title in titlesAlignments)
Пример #17
0
 def testMaxTitlesZero(self):
     """
     The filter function must return an empty result when maxTitles is zero.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=0, sortOn='maxScore')
         self.assertEqual(0, len(result))
Пример #18
0
 def testMaxTitlesNegative(self):
     """
     The filter function must raise a ValueError if maxTitles is less than
     zero.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         error = '^maxTitles \(-1\) cannot be negative\.$'
         six.assertRaisesRegex(self,
                               ValueError,
                               error,
                               titlesAlignments.filter,
                               maxTitles=-1)
def check(fastaFile, jsonFiles):
    """
    Check for simple consistency between the FASTA file and the JSON files.

    Note that some checking is already performed by the BlastReadsAlignments
    class. That includes checking the number of reads matches the number of
    BLAST records and that read ids and BLAST record read ids match.

    @param jsonFiles: A C{list} of names of our BLAST JSON. These may
        may be compressed (as bz2).
    @param fastaFile: The C{str} name of a FASTA-containing file.
    """
    reads = FastaReads(fastaFile)
    readsAlignments = BlastReadsAlignments(reads, jsonFiles)
    for index, readAlignments in enumerate(readsAlignments):

        # Check that all the alignments in the BLAST JSON do not have query
        # sequences or query offsets that are greater than the length of
        # the sequence given in the FASTA file.
        fastaLen = len(readAlignments.read)
        for readAlignment in readAlignments:
            for hsp in readAlignment.hsps:
                # The FASTA sequence should be at least as long as the
                # query in the JSON BLAST record (minus any gaps).
                assert (fastaLen >=
                        len(hsp.query) - hsp.query.count('-')), (
                    'record %d: FASTA len %d < HSP query len %d.\n'
                    'FASTA: %s\nQuery match: %s' % (
                        index, fastaLen, len(hsp.query),
                        readAlignments.read.sequence, hsp.query))
                # The FASTA sequence length should be larger than either of
                # the query offsets mentioned in the JSON BLAST
                # record. That's because readStart and readEnd are offsets
                # into the read - so they can't be bigger than the read
                # length.
                #
                # TODO: These asserts should be more informative when they
                # fail.
                assert fastaLen >= hsp.readEnd >= hsp.readStart, (
                    'record %d: FASTA len %d not greater than both read '
                    'offsets (%d - %d), or read offsets are non-increasing. '
                    'FASTA: %s\nQuery match: %s' % (
                        index, fastaLen, hsp.readStart, hsp.readEnd,
                        readAlignments.read.sequence, hsp.query))
Пример #20
0
 def testAddTitleRepeat(self):
     """
     The addTitle function must raise a C{KeyError} if an attempt is made
     to add a pre-existing title to a TitlesAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
         titleAlignments = TitleAlignments(title, 55)
         error = ("Title 'gi\|887699\|gb\|DQ37780 Squirrelpox virus "
                  "1296/99' already present in TitlesAlignments instance\.")
         six.assertRaisesRegex(self, KeyError, error,
                               titlesAlignments.addTitle, title,
                               titleAlignments)
Пример #21
0
 def testCoverageExcludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     no titles if none of its titles has sufficient coverage.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.1)
         self.assertEqual(0, len(result))
Пример #22
0
 def testHsps(self):
     """
     The hsps function must yield all the hsps for all titles in a
     TitlesAlignments instance.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = list(titlesAlignments.hsps())
         self.assertEqual(
             sorted([HSP(20), HSP(25),
                     HSP(20), HSP(20),
                     HSP(20)]), sorted(result))
Пример #23
0
 def testUnknownSortOn(self):
     """
     The filter function must raise a ValueError if the passed sortOn
     value isn't recognized.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         error = ('^Sort attribute must be one of "length", "maxScore", '
                  '"medianScore", "readCount", "title"\.$')
         six.assertRaisesRegex(self,
                               ValueError,
                               error,
                               titlesAlignments.filter,
                               maxTitles=0,
                               sortOn='unknown')
Пример #24
0
 def testExpectedTitles(self):
     """
     An instance of TitlesAlignments must have the expected titles.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], sorted(titlesAlignments.keys()))
Пример #25
0
 def testMinMatchingReads(self):
     """
     The filter function work correctly when passed a value for
     minMatchingReads.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMatchingReads=2)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
         ], list(result.keys()))
Пример #26
0
 def testWithScoreBetterThan_Bits(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using bit scores.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=24)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], list(result.keys()))
Пример #27
0
 def testMaxTitlesOne(self):
     """
     The filter function must return just the best title when maxTitles
     is one.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=1, sortOn='maxScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], sorted(result.keys()))
Пример #28
0
    def testTwoJSONInputsWithSubjectInCommon(self):
        """
        If two JSON files are passed to L{BlastReadsAlignments} with a matched
        subject in common and a TitlesAlignments is made, the title in the
        TitlesAlignments must have information from both reads, including the
        correct HSP scores.
        """
        class SideEffect(object):
            def __init__(self):
                self.first = True

            def sideEffect(self, _ignoredFilename, **kwargs):
                if self.first:
                    self.first = False
                    return File([dumps(PARAMS) + '\n', dumps(RECORD2) + '\n'])
                else:
                    return File([dumps(PARAMS) + '\n', dumps(RECORD4) + '\n'])

        title = 'gi|887699|gb|DQ37780 Cowpox virus 15'

        sideEffect = SideEffect()
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect.sideEffect
            reads = Reads()
            reads.add(Read('id2', 'A' * 70))
            reads.add(Read('id4', 'A' * 70))
            readsAlignments = BlastReadsAlignments(
                reads, ['file1.json', 'file2.json'])
            titlesAlignments = TitlesAlignments(readsAlignments)
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(4, titleAlignments.hspCount())
            self.assertEqual('id2', titleAlignments[0].read.id)
            self.assertEqual('id4', titleAlignments[1].read.id)
            # First matching read has one HSP.
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])
            # Second matching read has three HSPs.
            self.assertEqual(HSP(10), titleAlignments[1].hsps[0])
            self.assertEqual(HSP(5), titleAlignments[1].hsps[1])
            self.assertEqual(HSP(3), titleAlignments[1].hsps[2])
Пример #29
0
 def testThreeRecords(self):
     """
     If alignments for three reads are passed to titleCounts, it must
     return the correct title counts.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         self.assertEqual(
             {
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99': 1,
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.': 1,
                 'gi|887699|gb|DQ37780 Cowpox virus 15': 1,
                 'gi|887699|gb|DQ37780 Monkeypox virus 456': 1,
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55': 1
             }, titleCounts(readsAlignments))
Пример #30
0
 def testMaxTitlesTwoSortOnLength(self):
     """
     The filter function must return the two titles whose sequences are the
     longest when maxTitles is 2 and sortOn is 'length'.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=2, sortOn='length')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], sorted(result.keys()))
Пример #31
0
    def testReadSetFilterStrict(self):
        """
        The filter function work correctly when passed a 1.0 value for
        minNewReads.
        """
        mockOpener = mockOpen(
            read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                       dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                       dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            reads.add(Read('id0', 'A' * 70))
            reads.add(Read('id1', 'A' * 70))
            reads.add(Read('id2', 'A' * 70))
            reads.add(Read('id3', 'A' * 70))
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)
            result = titlesAlignments.filter(minNewReads=1.0)

            # Either 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
            # invalidates 'gi|887699|gb|DQ37780 Monkeypox virus 456' or
            # vice-versa. It depends on Python's dict walking order. Check
            # for both, making sure just one of them is true.

            mummypox = 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
            monkeypox = 'gi|887699|gb|DQ37780 Monkeypox virus 456'

            assertionCount = 0
            if mummypox in result:
                self.assertTrue(
                    monkeypox in result.readSetFilter.invalidates(mummypox))
                assertionCount += 1
            if monkeypox in result:
                self.assertTrue(
                    mummypox in result.readSetFilter.invalidates(monkeypox))
                assertionCount += 1

            self.assertEqual(1, assertionCount)
Пример #32
0
    jsonFiles = list(chain.from_iterable(args.json))
    whitelist = (
        set(chain.from_iterable(args.whitelist)) if args.whitelist else None)
    blacklist = (
        set(chain.from_iterable(args.blacklist)) if args.blacklist else None)

    # TODO: Add a --readClass command-line option in case we want to
    # process FASTA containing AA sequences.
    if args.fasta:
        reads = FastaReads(list(chain.from_iterable(args.fasta)))
    else:
        reads = FastqReads(list(chain.from_iterable(args.fastq)))

    if args.matcher == 'blast':
        from dark.blast.alignments import BlastReadsAlignments
        readsAlignments = BlastReadsAlignments(reads, jsonFiles)
    else:
        # Must be 'diamond' (due to parser.add_argument 'choices' argument).
        if (args.diamondDatabaseFastaFilename is None and
                args.diamondSqliteDatabaseFilename is None):
            print('Either --diamondDatabaseFastaFilename or '
                  '--diamondSqliteDatabaseFilename must be used with '
                  '--matcher diamond.', file=sys.stderr)
            sys.exit(1)
        elif not (args.diamondDatabaseFastaFilename is None or
                  args.diamondSqliteDatabaseFilename is None):
            print('--diamondDatabaseFastaFilename and '
                  '--diamondSqliteDatabaseFilename cannot both be used with '
                  '--matcher diamond.', file=sys.stderr)
            sys.exit(1)