Пример #1
0
    def testDictLookupGzipDataWithBGZsuffix(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the index file is in BGZF format and has a .bgz
        suffix.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 1:
                    self.test.assertEqual('filename.fasta.bgz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.bgz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            index.close()
Пример #2
0
    def testDictLookupWithTwoFiles(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 2 or self.count == 3:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1 or self.count == 4:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>seq3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), index['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2'])
            self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3'])
            index.close()
Пример #3
0
    def testDictLookupSpecificReadClass(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read type.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nMM\n>id2\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', readClass=AARead)
            index.addFile('filename.fasta')
            result = index['id1']
            self.assertTrue(isinstance(result, AARead))
            self.assertEqual(AARead('id1', 'MM'), result)
            index.close()
Пример #4
0
    def testDictLookupWithFastaDirectory(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read, obtained from the expected file name, when a FASTA base
        directory is specified.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('/tmp/f.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                if self.count == 1:
                    self.test.assertEqual(
                        os.path.join('/usr/local/fasta', 'f.fasta'), filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta')
            index.addFile('/tmp/f.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Пример #5
0
    def testFindWithTwoFiles(self):
        """"
        The _find method must return the expected filename and offset when
        sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>sequence3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(('filename1.fasta', 5), index._find('id1'))
            self.assertEqual(('filename1.fasta', 15), index._find('id2'))
            self.assertEqual(('filename2.fasta', 11), index._find('sequence3'))
            index.close()
Пример #6
0
    def testAddDuplicateFile(self):
        """"
        If a filename is passed to addFile more than once, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            error = "^Duplicate file name: 'filename\\.fasta'$"
            assertRaisesRegex(self, ValueError, error, index._addFilename,
                              'filename.fasta')
            index.close()
Пример #7
0
    def testDictLookupSequenceMiddleOfThree(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the sequence spans multiple lines and is the middle
        one of three sequences in the input file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO(
                        '>id1\nACTG\nCCCC\n>id2\nAACCTG\nAAA\n>id3\nAAA\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta')
            self.assertEqual(DNARead('id2', 'AACCTGAAA'), index['id2'])
            index.close()
Пример #8
0
    def testDictLookupWithTwoFiles(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 2 or self.count == 3:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1 or self.count == 4:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>seq3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), index['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2'])
            self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3'])
            index.close()
Пример #9
0
    def testDictLookupWithFastaDirectory(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read, obtained from the expected file name, when a FASTA base
        directory is specified.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('/tmp/f.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                if self.count == 1:
                    self.test.assertEqual(
                        os.path.join('/usr/local/fasta', 'f.fasta'), filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta')
            index.addFile('/tmp/f.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Пример #10
0
    def testAddDuplicateFile(self):
        """"
        If a filename is passed to addFile more than once, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            error = "^Duplicate file name: 'filename.fasta'$"
            self.assertRaisesRegexp(ValueError, error, index._addFilename,
                                    'filename.fasta')
            index.close()
Пример #11
0
    def testAddOneFile(self):
        """"
        Test the creation of an index with sequences added from one file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            index.close()
Пример #12
0
    def testAddFilesWithDuplicateSequence(self):
        """"
        If a sequence id occurs in more than one FASTA file, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>id2\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            error = ("^FASTA sequence id 'id2', found in file "
                     "'filename2\.fasta', was previously added from file "
                     "'filename1\.fasta'\.$")
            self.assertRaisesRegexp(ValueError, error, index.addFile,
                                    'filename2.fasta')
            index.close()
Пример #13
0
    def testDictLookupSequenceCrossesNewlines(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the sequence spans multiple lines of the input file,
        including lines ending in \n and \r\n.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()
Пример #14
0
    def testDictLookupSpecificReadClass(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read type.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nMM\n>id2\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', readClass=AARead)
            index.addFile('filename.fasta')
            result = index['id1']
            self.assertTrue(isinstance(result, AARead))
            self.assertEqual(AARead('id1', 'MM'), result)
            index.close()
Пример #15
0
    def testDictLookupGzipDataWithBGZsuffix(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the index file is in BGZF format and has a .bgz
        suffix.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 1:
                    self.test.assertEqual('filename.fasta.bgz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.bgz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            index.close()
Пример #16
0
    def testAddFilesWithDuplicateSequence(self):
        """"
        If a sequence id occurs in more than one FASTA file, a ValueError must
        be raised.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>id2\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            error = ("^FASTA sequence id 'id2', found in file "
                     "'filename2\\.fasta', was previously added from file "
                     "'filename1\\.fasta'\\.$")
            assertRaisesRegex(self, ValueError, error, index.addFile,
                              'filename2.fasta')
            index.close()
Пример #17
0
    def testFindWithTwoFiles(self):
        """"
        The _find method must return the expected filename and offset when
        sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>sequence3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(('filename1.fasta', 5), index._find('id1'))
            self.assertEqual(('filename1.fasta', 15), index._find('id2'))
            self.assertEqual(('filename2.fasta', 11), index._find('sequence3'))
            index.close()
Пример #18
0
    def testAddOneFile(self):
        """"
        Test the creation of an index with sequences added from one file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            self.assertEqual(2, index.addFile('filename.fasta'))
            index.close()
Пример #19
0
 def testGetFileNumber(self):
     """"
     The internal _getFileNumber method must return the expected result.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename.fasta'))
     self.assertEqual(1, index._getFileNumber('filename.fasta'))
     index.close()
Пример #20
0
 def testGetNonexistentFileNumber(self):
     """"
     If the internal _getFileNumber method is called with a file whose name
     has not been added, it must return None.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(None, index._getFileNumber('filename.fasta'))
     index.close()
Пример #21
0
 def testAddFilename(self):
     """"
     Test the internal _addFilename method.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename1.fasta'))
     self.assertEqual(2, index._addFilename('filename2.fasta'))
     index.close()
Пример #22
0
 def testGetFileNumber(self):
     """"
     The internal _getFileNumber method must return the expected result.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename.fasta'))
     self.assertEqual(1, index._getFileNumber('filename.fasta'))
     index.close()
Пример #23
0
 def testGetNonexistentFileNumber(self):
     """"
     If the internal _getFileNumber method is called with a file whose name
     has not been added, it must return None.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(None, index._getFileNumber('filename.fasta'))
     index.close()
Пример #24
0
 def testAddFilename(self):
     """"
     Test the internal _addFilename method.
     """
     index = SqliteIndex(':memory:')
     self.assertEqual(1, index._addFilename('filename1.fasta'))
     self.assertEqual(2, index._addFilename('filename2.fasta'))
     index.close()
Пример #25
0
    def testDictLookupGzipData(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences span multiple lines of the input file,
        and include lines ending in \n and \r\n and have been compressed with
        bgzip, including when sequences are more than 64K bytes into the input
        file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 4:
                    self.test.assertEqual('filename.fasta.gz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(
                        b'>id0\nAC\n' +
                        b'>id1\n' + (b'A' * 70000) + b'\n' +
                        b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' +
                        b'>id3\nAACCTG\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.gz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            self.assertEqual(DNARead('id1', 'A' * 70000), index['id1'])
            self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2'])
            self.assertEqual(DNARead('id3', 'AACCTG'), index['id3'])
            index.close()
Пример #26
0
    def testDictLookupGzipData(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences span multiple lines of the input file,
        and include lines ending in \n and \r\n and have been compressed with
        bgzip, including when sequences are more than 64K bytes into the input
        file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 4:
                    self.test.assertEqual('filename.fasta.gz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n' + b'>id1\n' + (b'A' * 70000) +
                                 b'\n' + b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' +
                                 b'>id3\nAACCTG\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.gz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            self.assertEqual(DNARead('id1', 'A' * 70000), index['id1'])
            self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2'])
            self.assertEqual(DNARead('id3', 'AACCTG'), index['id3'])
            index.close()