Exemplo n.º 1
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemplo n.º 2
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Exemplo n.º 3
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
         ], result)
Exemplo n.º 4
0
    def testFromThreeSequences(self):
        """
        If three sequences with no features are used to create an NJTree
        instance, the instance must 1) have a distance matrix that is zero
        on the diagonal and ones elsewhere, 2) save the labels, and 3) produce
        a simple tree with three children.
        """
        sequences = Reads()
        sequences.add(AARead('id1', 'A'))
        sequences.add(AARead('id2', 'A'))
        sequences.add(AARead('id3', 'A'))
        labels = ['x', 'y', 'z']
        njtree = NJTree.fromSequences(labels, sequences,
                                      landmarks=['AlphaHelix'])
        self.assertTrue(np.array_equal(
            [
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 0],
            ],
            njtree.distance))

        self.assertIs(labels, njtree.labels)
        self.assertEqual(['x:0.5;\n', 'y:0.5;\n', 'z:0.5;\n'],
                         sorted(str(child) for child in njtree.tree.children))
Exemplo n.º 5
0
 def testMaxTitlesTwoSortOnLength(self):
     """
     The filter function must return the two titles whose sequences are the
     longest when maxTitles is 2 and sortOn is 'length'.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=2, sortOn='length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Exemplo n.º 6
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', 'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             ],
             list(result.keys()))
Exemplo n.º 7
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 38000
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 35000
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 35000
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 30000
         ], result)
Exemplo n.º 8
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0003)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result))
Exemplo n.º 9
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads,
             'file.json',
             'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
         ], list(result.keys()))
Exemplo n.º 10
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Exemplo n.º 11
0
 def testLengthOne(self):
     """
     A FASTA list with just one item gets de-duped to the same one item.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Exemplo n.º 12
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mock_open(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = DiamondReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Exemplo n.º 13
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json', 'db')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Exemplo n.º 14
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Exemplo n.º 15
0
 def testMaxMatchingReads(self):
     """
     The filter function must work correctly when passed a value for
     maxMatchingReads.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxMatchingReads=1)
         # Cowpox virus 15 is not in the results as it is matched by two
         # reads.
         self.assertEqual(
             sorted([
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
             ]),
             sorted(result))
Exemplo n.º 16
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function must work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemplo n.º 17
0
    def testTwoByTwoAsDict(self):
        """
        If affinityMatrix is called with two reads and the database has two
        subjects, and a dict result is requested, the result must be as
        expected.
        """
        reads = Reads()
        reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
        reads.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects = Reads()
        subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
        subjects.add(AARead('id4', 'FRRRFRRRFAAAFRRRFRRRF'))

        matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                                subjects=subjects, computeDiagonal=True,
                                returnDict=True)
        self.assertEqual(
            {
                'id1': {
                    'id3': 1.0,
                    'id4': 1.0,
                },
                'id2': {
                    'id3': 1.0,
                    'id4': 1.0,
                },
            },
            matrix)
Exemplo n.º 18
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Exemplo n.º 19
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Exemplo n.º 20
0
 def testMinMedianScore_EValue(self):
     """
     The filter function work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Exemplo n.º 21
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Exemplo n.º 22
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0011)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result.keys()))
Exemplo n.º 23
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Exemplo n.º 24
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Exemplo n.º 25
0
    def add(self, virusTitle, sampleName):
        """
        Add a a virus title, sample name combination and get its FASTA file
        name. Write the FASTA file if it does not already exist.

        @param virusTitle: A C{str} virus title.
        @param sampleName: A C{str} sample name.
        @return: A C{str} FASTA file name holding all the reads (without
            duplicates) from the sample that matched the proteins in the given
            virus.
        """
        virusIndex = self._viruses.setdefault(virusTitle, len(self._viruses))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._fastaFilenames[(virusIndex, sampleIndex)]
        except KeyError:
            result = Reads()
            for proteinMatch in self._proteinGrouper.virusTitles[
                    virusTitle][sampleName]:
                for read in FastaReads(proteinMatch['fastaFilename'],
                                       checkAlphabet=0):
                    result.add(read)
            saveFilename = join(
                proteinMatch['outDir'],
                'virus-%d-sample-%d.fasta' % (virusIndex, sampleIndex))
            result.filter(removeDuplicates=True).save(saveFilename)
            self._fastaFilenames[(virusIndex, sampleIndex)] = saveFilename
            return saveFilename
Exemplo n.º 26
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemplo n.º 27
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 25
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 20
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 20
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 20
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             ],
             result)
Exemplo n.º 28
0
 def testMinMedianScore_EValue(self):
     """
     The filter function must work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemplo n.º 29
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 38000
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 35000
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 35000
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 30000
             ],
             result)
Exemplo n.º 30
0
    def add(self, pathogenName, sampleName):
        """
        Add a (pathogen name, sample name) combination and get its FASTA/FASTQ
        file name and unique read count. Write the FASTA/FASTQ file if it does
        not already exist. Save the unique read count into
        C{self._proteinGrouper}.

        @param pathogenName: A C{str} pathogen name.
        @param sampleName: A C{str} sample name.
        @return: A C{str} giving the FASTA/FASTQ file name holding all the
            reads (without duplicates, by id) from the sample that matched the
            proteins in the given pathogen.
        """
        pathogenIndex = self._pathogens.setdefault(pathogenName,
                                                   len(self._pathogens))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._readsFilenames[(pathogenIndex, sampleIndex)]
        except KeyError:
            reads = Reads()
            for proteinMatch in self._proteinGrouper.pathogenNames[
                    pathogenName][sampleName]['proteins'].values():
                for read in self._readsClass(proteinMatch['readsFilename']):
                    reads.add(read)
            saveFilename = join(
                proteinMatch['outDir'], 'pathogen-%d-sample-%d.%s' %
                (pathogenIndex, sampleIndex, self._format))
            reads.filter(removeDuplicatesById=True)
            nReads = reads.save(saveFilename, format_=self._format)
            # Save the unique read count into self._proteinGrouper
            self._proteinGrouper.pathogenNames[pathogenName][sampleName][
                'uniqueReadCount'] = nReads
            self._readsFilenames[(pathogenIndex, sampleIndex)] = saveFilename
            return saveFilename
Exemplo n.º 31
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Exemplo n.º 32
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Exemplo n.º 33
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mock_open(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = DiamondReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Exemplo n.º 34
0
 def testLengthOne(self):
     """
     A FASTA list with just one item gets de-duped to the same one item.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Exemplo n.º 35
0
 def testRemovalOfIdenticalSequences(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Exemplo n.º 36
0
 def testRemovalOfIdenticalSequences(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Exemplo n.º 37
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Exemplo n.º 38
0
 def testManuallyAddedReadsLength(self):
     """
     A Reads instance with reads added manually must have the correct
     length.
     """
     reads = Reads()
     reads.add(Read('id1', 'AT'))
     reads.add(Read('id2', 'AC'))
     self.assertEqual(2, len(reads))
Exemplo n.º 39
0
 def testRemovalOfIdenticalSequencesWithDifferingIds(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy,
     including when the read ids differ.
     """
     reads = Reads()
     reads.add(Read('id1', 'GGG'))
     reads.add(Read('id2', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id1', 'GGG')])
Exemplo n.º 40
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Exemplo n.º 41
0
 def testRemovalOfIdenticalSequencesWithDifferingIds(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy,
     including when the read ids differ.
     """
     reads = Reads()
     reads.add(Read('id1', 'GGG'))
     reads.add(Read('id2', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id1', 'GGG')])
Exemplo n.º 42
0
 def testAlignmentPanel(self):
     """
     The alignmentPanel function must work properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     reads = Reads()
     reads.add(GOLV)
     reads.add(AKAV)
     alignmentPanel(reads, reads, findParams=findParams, showFigure=False)
Exemplo n.º 43
0
 def testCorrectNumberOfSequences(self):
     """
     The right number of strings must be made (one for each sequence).
     """
     reads = Reads()
     reads.add(MONKEYPOX)
     reads.add(MUMMYPOX)
     hashesString = HashesString(reads, 0.0, landmarks=['AlphaHelix'])
     result = len(hashesString.hashString)
     self.assertEqual(2, result)
Exemplo n.º 44
0
 def testManuallyAddedReads(self):
     """
     A Reads instance with reads added manually must be able to be listed.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     self.assertEqual([read1, read2], list(reads))
Exemplo n.º 45
0
 def testDefaultLabel(self):
     """
     The ClusterAnalysis initializer must assign the default label to any
     read without a corresponding label.
     """
     reads = Reads()
     reads.add(AARead('id', 'MMM'))
     ca = ClusterAnalysis(reads, {}, defaultLabel=5,
                          landmarks=['AlphaHelix'])
     self.assertEqual([5], ca.trueLabels)
Exemplo n.º 46
0
 def testMissingLabelNoDefault(self):
     """
     The ClusterAnalysis initializer must raise ValueError if a read is
     given without a corresponding label and there is no default label.
     """
     reads = Reads()
     reads.add(AARead('id', 'MMM'))
     error = "Read 'id' has no corresponding label"
     six.assertRaisesRegex(self, ValueError, error, ClusterAnalysis, reads,
                           {}, landmarks=['AlphaHelix'])
Exemplo n.º 47
0
    def reads(self):
        """
        Find the set of reads matching this title.

        @return: An instance of C{dark.reads.Reads}.
        """
        reads = Reads()
        for alignment in self:
            reads.add(alignment.read)
        return reads
Exemplo n.º 48
0
 def testOneReadAffinity(self):
     """
     The ClusterAnalysis initializer must assign a 1.0 affinity to a single
     read.
     """
     reads = Reads()
     reads.add(AARead('id', 'MMM'))
     ca = ClusterAnalysis(reads, {}, defaultLabel=5,
                          landmarks=['AlphaHelix'])
     self.assertEqual([[1.0]], ca.affinity)
Exemplo n.º 49
0
    def reads(self):
        """
        Find the set of reads matching this title.

        @return: An instance of C{dark.reads.Reads}.
        """
        reads = Reads()
        for alignment in self:
            reads.add(alignment.read)
        return reads
Exemplo n.º 50
0
 def testFilterOnMaxLength(self):
     """
     Filtering on maximal length must work.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(maxLength=3)
     self.assertEqual([read2], list(result))
Exemplo n.º 51
0
 def testOneReadClusterCount(self):
     """
     The KMeansAnalysis class must find one cluster when given one read,
     and set the value of nClusters to 1.
     """
     reads = Reads()
     reads.add(AARead('id1', 'FRRRFRRRF'))
     km = KMeansAnalysis(reads, {}, defaultLabel=0,
                         landmarks=['AlphaHelix'])
     km.cluster(1)
     self.assertEqual(1, km.nClusters)
Exemplo n.º 52
0
 def testOneSequenceSpecificDiagonalValue(self):
     """
     If affinityMatrix is called with a single read and a specific
     diagonal value, that diagonal value must be in the result.
     """
     reads = Reads()
     read = AARead('id1', 'AAA')
     reads.add(read)
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             diagonalValue=2.0)
     self.assertEqual([[2.0]], matrix)
Exemplo n.º 53
0
 def testPlotHistogramLinesMustRun(self):
     """
     The plotHistogramLines function must run properly.
     """
     findParams = FindParameters(significanceMethod='HashFraction',
                                 binScoreMethod='FeatureAAScore')
     reads = Reads()
     reads.add(GOLV)
     reads.add(AKAV)
     reads.add(BUNV)
     plotHistogramLines(reads, findParams=findParams)
Exemplo n.º 54
0
 def testFilterOnLengthNothingMatches(self):
     """
     When filtering on length, no reads should be returned if none of them
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=10, maxLength=15)
     self.assertEqual([], list(result))
Exemplo n.º 55
0
 def testSaveWithUnknownFormat(self):
     """
     A Reads instance must raise ValueError if asked to save in an unknown
     format.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Save format must be either 'fasta' or 'fastq'\\."
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'xxx')
Exemplo n.º 56
0
 def testSaveAsFASTQFailsOnReadWithNoQuality(self):
     """
     A Reads instance must raise a ValueError if asked to save in FASTQ
     format and there is a read with no quality present.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Read 'id2' has no quality information"
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'fastq')
Exemplo n.º 57
0
 def testFilterWithMinLengthEqualToMaxLength(self):
     """
     When filtering on length, a read should be returned if its length
     equals a passed minimum and maximum length.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=4, maxLength=4)
     self.assertEqual([read1], list(result))
Exemplo n.º 58
0
 def testFilterOnLengthEverythingMatches(self):
     """
     When filtering on length, all reads should be returned if they all
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=2, maxLength=5)
     self.assertEqual([read1, read2], list(result))
Exemplo n.º 59
0
 def testSaveToFileDescriptor(self):
     """
     A Reads instance must save to a file-like object if not passed a string
     filename.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     fp = StringIO()
     reads.save(fp)
     self.assertEqual('>id1\nAT\n>id2\nAC\n', fp.getvalue())
Exemplo n.º 60
0
 def testMaxTitlesNegative(self):
     """
     The filter function must raise a ValueError if maxTitles is less than
     zero.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         error = '^maxTitles \(-1\) cannot be negative\.$'
         six.assertRaisesRegex(self, ValueError, error,
                               titlesAlignments.filter, maxTitles=-1)