Пример #1
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Пример #2
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Пример #3
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
         ], result)
Пример #4
0
 def testMinMedianScore_EValue(self):
     """
     The filter function work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Пример #5
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Пример #6
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', 'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             ],
             list(result.keys()))
Пример #7
0
    def add(self, virusTitle, sampleName):
        """
        Add a a virus title, sample name combination and get its FASTA file
        name. Write the FASTA file if it does not already exist.

        @param virusTitle: A C{str} virus title.
        @param sampleName: A C{str} sample name.
        @return: A C{str} FASTA file name holding all the reads (without
            duplicates) from the sample that matched the proteins in the given
            virus.
        """
        virusIndex = self._viruses.setdefault(virusTitle, len(self._viruses))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._fastaFilenames[(virusIndex, sampleIndex)]
        except KeyError:
            result = Reads()
            for proteinMatch in self._proteinGrouper.virusTitles[
                    virusTitle][sampleName]:
                for read in FastaReads(proteinMatch['fastaFilename'],
                                       checkAlphabet=0):
                    result.add(read)
            saveFilename = join(
                proteinMatch['outDir'],
                'virus-%d-sample-%d.fasta' % (virusIndex, sampleIndex))
            result.filter(removeDuplicates=True).save(saveFilename)
            self._fastaFilenames[(virusIndex, sampleIndex)] = saveFilename
            return saveFilename
Пример #8
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Пример #9
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Пример #10
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Пример #11
0
 def testLengthOne(self):
     """
     A FASTA list with just one item gets de-duped to the same one item.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Пример #12
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 38000
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 35000
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 35000
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 30000
         ], result)
Пример #13
0
 def testMaxTitlesTwoSortOnLength(self):
     """
     The filter function must return the two titles whose sequences are the
     longest when maxTitles is 2 and sortOn is 'length'.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=2, sortOn='length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Пример #14
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json', 'db')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Пример #15
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Пример #16
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0011)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result.keys()))
Пример #17
0
 def testRemovalOfIdenticalSequences(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Пример #18
0
 def __init__(self, _files, readClass=SSAARead, upperCase=False):
     self._files = _files if isinstance(_files, (list, tuple)) else [_files]
     self._readClass = readClass
     self._upperCase = upperCase
     if PY3:
         super().__init__()
     else:
         Reads.__init__(self)
Пример #19
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Пример #20
0
 def testManuallyAddedReadsLength(self):
     """
     A Reads instance with reads added manually must have the correct
     length.
     """
     reads = Reads()
     reads.add(Read('id1', 'AT'))
     reads.add(Read('id2', 'AC'))
     self.assertEqual(2, len(reads))
Пример #21
0
 def testRemovalOfIdenticalSequencesWithDifferingIds(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy,
     including when the read ids differ.
     """
     reads = Reads()
     reads.add(Read('id1', 'GGG'))
     reads.add(Read('id2', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id1', 'GGG')])
Пример #22
0
    def reads(self):
        """
        Find the set of reads matching this title.

        @return: An instance of C{dark.reads.Reads}.
        """
        reads = Reads()
        for alignment in self:
            reads.add(alignment.read)
        return reads
Пример #23
0
 def testManuallyAddedReads(self):
     """
     A Reads instance with reads added manually must be able to be listed.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     self.assertEqual([read1, read2], list(reads))
Пример #24
0
 def testFilterOnMaxLength(self):
     """
     Filtering on maximal length must work.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(maxLength=3)
     self.assertEqual([read2], list(result))
Пример #25
0
 def testFilterWithMinLengthEqualToMaxLength(self):
     """
     When filtering on length, a read should be returned if its length
     equals a passed minimum and maximum length.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=4, maxLength=4)
     self.assertEqual([read1], list(result))
Пример #26
0
 def testFilterOnLengthEverythingMatches(self):
     """
     When filtering on length, all reads should be returned if they all
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=2, maxLength=5)
     self.assertEqual([read1, read2], list(result))
Пример #27
0
 def testFilterOnLengthNothingMatches(self):
     """
     When filtering on length, no reads should be returned if none of them
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=10, maxLength=15)
     self.assertEqual([], list(result))
Пример #28
0
 def testSaveWithUnknownFormat(self):
     """
     A Reads instance must raise ValueError if asked to save in an unknown
     format.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Save format must be either 'fasta' or 'fastq'\\."
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'xxx')
Пример #29
0
 def testSaveAsFASTQFailsOnReadWithNoQuality(self):
     """
     A Reads instance must raise a ValueError if asked to save in FASTQ
     format and there is a read with no quality present.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Read 'id2' has no quality information"
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'fastq')
Пример #30
0
 def testSaveToFileDescriptor(self):
     """
     A Reads instance must save to a file-like object if not passed a string
     filename.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     fp = StringIO()
     reads.save(fp)
     self.assertEqual('>id1\nAT\n>id2\nAC\n', fp.getvalue())
Пример #31
0
 def testUnknown(self):
     """
     Sorting on an unknown attribute must raise C{ValueError}.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertRaises(ValueError, titlesAlignments.sortTitles, 'xxx')
Пример #32
0
 def testEmpty(self):
     """
     If passed an empty readsAlignments, titleCounts must return an
     empty dictionary.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         self.assertEqual({}, titleCounts(readsAlignments))
Пример #33
0
 def testAddTitleRepeat(self):
     """
     The addTitle function must raise a C{KeyError} if an attempt is made
     to add a pre-existing title to a TitlesAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
         titleAlignments = TitleAlignments(title, 55)
         error = ("Title 'gi\|887699\|gb\|DQ37780 Squirrelpox virus "
                  "1296/99' already present in TitlesAlignments instance\.")
         six.assertRaisesRegex(self, KeyError, error,
                               titlesAlignments.addTitle, title,
                               titleAlignments)
Пример #34
0
 def testUnknownSortOn(self):
     """
     The filter function must raise a ValueError if the passed sortOn
     value isn't recognized.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         error = ('^Sort attribute must be one of "length", "maxScore", '
                  '"medianScore", "readCount", "title"\.$')
         six.assertRaisesRegex(self,
                               ValueError,
                               error,
                               titlesAlignments.filter,
                               maxTitles=0,
                               sortOn='unknown')
Пример #35
0
    def testHeterogeneousReadsTwoDifferences(self):
        """
        heterogeneousSites must return a dictionary with two entries as expected if 
        reads given differ at two sites.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'TCCC')])

        self.assertEqual(({0: {'A': 1, 'T': 1}, 3: {'G': 1, 'C': 1}},
                          {0: {'A': ['id'], 'T': ['id2']},
                           3: {'C': ['id2'], 'G': ['id']}}, [0, 3]),
                           heterogeneousSites(reads, len(read), 1))
Пример #36
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Пример #37
0
 def testRecombinantFile(self):
     """
     Test that the recombinantFile method produces the expected string.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     self.ra.run(reads)
     self.assertEqual(join(self.ra.tmpDir, _OUTPUT_PREFIX + '.3s.rec'),
                      self.ra.recombinantFile())
Пример #38
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Пример #39
0
 def testRunThresholdString(self):
     """
     Test that the run function returns an exit status of 0, if run with a
     non-default (str) threshold argument.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     result = self.ra.run(reads, t='0.0')
     self.assertEqual(0, result.returncode)
Пример #40
0
    def testHeterogeneousReadsFractionHigh(self):
        """
        heterogeneousSites must return a dictionary with one entry as expected if 
        reads given differ and are less homogeneous than specified by the homogeneity 
        cutoff fraction.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'ACCC'), Read('id3', 'ACCC')])

        self.assertEqual(({3: {'C': 2, 'G': 1}},
                          {3: {'G': ['id'], 'C': ['id2', 'id3']}}, [3]),
                           heterogeneousSites(reads, len(read), 0.7))
Пример #41
0
    def testTwoJSONInputsWithSubjectInCommon(self):
        """
        If two JSON files are passed to L{BlastReadsAlignments} with a matched
        subject in common and a TitlesAlignments is made, the title in the
        TitlesAlignments must have information from both reads, including the
        correct HSP scores.
        """
        class SideEffect(object):
            def __init__(self):
                self.first = True

            def sideEffect(self, _ignoredFilename, **kwargs):
                if self.first:
                    self.first = False
                    return File([dumps(PARAMS) + '\n', dumps(RECORD2) + '\n'])
                else:
                    return File([dumps(PARAMS) + '\n', dumps(RECORD4) + '\n'])

        title = 'gi|887699|gb|DQ37780 Cowpox virus 15'

        sideEffect = SideEffect()
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect.sideEffect
            reads = Reads()
            reads.add(Read('id2', 'A' * 70))
            reads.add(Read('id4', 'A' * 70))
            readsAlignments = BlastReadsAlignments(
                reads, ['file1.json', 'file2.json'])
            titlesAlignments = TitlesAlignments(readsAlignments)
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(4, titleAlignments.hspCount())
            self.assertEqual('id2', titleAlignments[0].read.id)
            self.assertEqual('id4', titleAlignments[1].read.id)
            # First matching read has one HSP.
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])
            # Second matching read has three HSPs.
            self.assertEqual(HSP(10), titleAlignments[1].hsps[0])
            self.assertEqual(HSP(5), titleAlignments[1].hsps[1])
            self.assertEqual(HSP(3), titleAlignments[1].hsps[2])
Пример #42
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Пример #43
0
    def testFromThreeSequences(self):
        """
        If three sequences with no features are used to create an NJTree
        instance, the instance must 1) have a distance matrix that is zero
        on the diagonal and ones elsewhere, 2) save the labels, and 3) produce
        a simple tree with three children.
        """
        sequences = Reads()
        sequences.add(AARead('id1', 'A'))
        sequences.add(AARead('id2', 'A'))
        sequences.add(AARead('id3', 'A'))
        labels = ['x', 'y', 'z']
        njtree = NJTree.fromSequences(labels, sequences,
                                      landmarks=['AlphaHelix'])
        self.assertTrue(np.array_equal(
            [
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 0],
            ],
            njtree.distance))

        self.assertIs(labels, njtree.labels)
        self.assertEqual(['x:0.5;\n', 'y:0.5;\n', 'z:0.5;\n'],
                         sorted(str(child) for child in njtree.tree.children))
Пример #44
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 25
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 20
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 20
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 20
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             ],
             result)
Пример #45
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads,
             'file.json',
             'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
         ], list(result.keys()))
Пример #46
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 38000
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 35000
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 35000
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 30000
             ],
             result)
Пример #47
0
 def testMaxMatchingReads(self):
     """
     The filter function must work correctly when passed a value for
     maxMatchingReads.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxMatchingReads=1)
         # Cowpox virus 15 is not in the results as it is matched by two
         # reads.
         self.assertEqual(
             sorted([
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
             ]),
             sorted(result))
Пример #48
0
 def testMinMedianScore_EValue(self):
     """
     The filter function must work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Пример #49
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function must work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Пример #50
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0003)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result))
Пример #51
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Пример #52
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Пример #53
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Пример #54
0
 def testRemoveOutput(self, rmtreeMock):
     """
     Test that the removeOutput method is called with the expected
     temporary directory name.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     self.ra.run(reads)
     self.ra.removeOutput()
     rmtreeMock.assert_called_once_with(self.ra.tmpDir)
Пример #55
0
    def testHeterogeneousReadsFractionLowWithOneDifference(self):
        """
        heterogeneousSites must return a dictionary with one entry if reads given differ 
        at two sites and at one site are more homogeneous than specified by the homogeneity 
        cutoff fraction; at the other site less homogeneous than specified by the 
        homogeneity cutoff fraction.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'TCCG'), Read('id3', 'TCCG'), Read('id4', 'ACCG')])

        self.assertEqual(({0: {'A': 2, 'T': 2}},
                          {0: {'A': ['id', 'id4'], 'T': ['id2', 'id3']}}, [0]),
                           heterogeneousSites(reads, len(read), 0.6))
Пример #56
0
 def testExpectedAttrs(self):
     """
     A ReadsAlignments instance must have the expected attributes.
     """
     reads = Reads()
     params = {
         'application': 'app name'
     }
     readsAlignments = ReadsAlignments(reads, params)
     self.assertIs(readsAlignments.reads, reads)
     self.assertEqual('app name', readsAlignments.params['application'])
     self.assertIs(params, readsAlignments.params)
     self.assertIs(HigherIsBetterScore, readsAlignments.scoreClass)
Пример #57
0
 def testTwoByThreeWithRepeatedQueryAndSubjectIds(self):
     """
     If affinityMatrix is called with two reads and the database has three
     subjects, the resulting matrix must be 2x3, and the fact that query
     and subject ids are not all different must not cause a problem (as it
     would if we called affinityMatrix with returnDict=True).
     """
     reads = Reads()
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects = Reads()
     subjects.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual(
         [
             [1.0, 1.0, 1.0],
             [1.0, 1.0, 1.0]
         ],
         matrix)
Пример #58
0
 def testPopulationNotAllowed(self):
     """
     Passing a subjects keyword must result in a ValueError if database
     population has not been enabled.
     """
     subjects = Reads()
     specifier = DatabaseSpecifier(allowPopulation=False)
     error = '^Database population is not enabled.$'
     six.assertRaisesRegex(self,
                           ValueError,
                           error,
                           specifier.getDatabaseFromKeywords,
                           subjects=subjects)
Пример #59
0
 def testIdenticalMatrixIsReturnedOnRepeatedRequest(self):
     """
     An AffinityMatrices instance must return the identical affinity matrix
     object when asked for it a second time.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(),
             'findParams': FindParameters(),
         }
     }
     am = AffinityMatrices(Reads(), parameterSets=parameterSets,
                           returnDict=True)
     self.assertIs(am['test'], am['test'])
Пример #60
0
    def testPopulationFromInMemoryAndFastaFile(self):
        """
        Passing both subjects and databaseFasta keywords must result in
        all the subjects in memory and in the file being added to the returned
        database.
        """
        subjects = Reads()
        subject1 = AARead('id1', 'FFF')
        subject2 = AARead('id2', 'RRR')
        subjects.add(subject1)
        subjects.add(subject2)

        data = '\n'.join(['>id3', 'FFFF', '>id4', 'RRRR'])
        mockOpener = mockOpen(read_data=data)
        with patch.object(builtins, 'open', mockOpener):
            db = DatabaseSpecifier().getDatabaseFromKeywords(
                subjects=subjects, databaseFasta='file.fasta')

        allSubjects = [subject.read for subject in db.getSubjects()]
        self.assertEqual(
            {subject1, subject2,
             AARead('id3', 'FFFF'),
             AARead('id4', 'RRRR')}, set(allSubjects))