Пример #1
0
    def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
        """Open a bzip2-compressed file.

        If filename is a str or bytes object, is gives the name of the file to
        be opened. Otherwise, it should be a file object, which will be used to
        read or write the compressed data.

        mode can be 'r' for reading (default), 'w' for (over)writing, or 'a' for
        appending. These can equivalently be given as 'rb', 'wb', and 'ab'.

        buffering is ignored. Its use is deprecated.

        If mode is 'w' or 'a', compresslevel can be a number between 1
        and 9 specifying the level of compression: 1 produces the least
        compression, and 9 (default) produces the most compression.

        If mode is 'r', the input file may be the concatenation of
        multiple compressed streams.
        """
        # This lock must be recursive, so that BufferedIOBase's
        # readline(), readlines() and writelines() don't deadlock.
        self._lock = RLock()
        self._fp = None
        self._closefp = False
        self._mode = _MODE_CLOSED
        self._pos = 0
        self._size = -1

        if buffering is not None:
            warnings.warn("Use of 'buffering' argument is deprecated",
                          DeprecationWarning)

        if not (1 <= compresslevel <= 9):
            raise ValueError("compresslevel must be between 1 and 9")

        if mode in ("", "r", "rb"):
            mode = "rb"
            mode_code = _MODE_READ
            self._decompressor = BZ2Decompressor()
            self._buffer = None
        elif mode in ("w", "wb"):
            mode = "wb"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        elif mode in ("a", "ab"):
            mode = "ab"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        else:
            raise ValueError("Invalid mode: {!r}".format(mode))

        if isinstance(filename, (str, bytes)):
            self._fp = builtins.open(filename, mode)
            self._closefp = True
            self._mode = mode_code
        elif hasattr(filename, "read") or hasattr(filename, "write"):
            self._fp = filename
            self._mode = mode_code
        else:
            raise TypeError("filename must be a str or bytes object, or a file")
Пример #2
0
    def test_EOF_error(self):
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        bz2d.decompress(self.DATA)
        raises(EOFError, bz2d.decompress, b"foo")
        raises(EOFError, bz2d.decompress, b"")
Пример #3
0
    def test_decompress(self):
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        raises(TypeError, bz2d.decompress)
        decompressed_data = bz2d.decompress(self.DATA)
        assert decompressed_data == self.TEXT
Пример #4
0
 def _rewind(self):
     self._fp.seek(0, 0)
     self._mode = _MODE_READ
     self._pos = 0
     self._decompressor = BZ2Decompressor()
     self._buffer = b""
     self._buffer_offset = 0
Пример #5
0
    def _fill_buffer(self):
        if self._mode == _MODE_READ_EOF:
            return False
        # Depending on the input data, our call to the decompressor may not
        # return any data. In this case, try again after reading another block.
        while self._buffer_offset == len(self._buffer):
            rawblock = (self._decompressor.unused_data or
                        self._fp.read(_BUFFER_SIZE))

            if not rawblock:
                if self._decompressor.eof:
                    # End-of-stream marker and end of file. We're good.
                    self._mode = _MODE_READ_EOF
                    self._size = self._pos
                    return False
                else:
                    # Problem - we were expecting more compressed data.
                    raise EOFError("Compressed file ended before the "
                                   "end-of-stream marker was reached")

            if self._decompressor.eof:
                # Continue to next stream.
                self._decompressor = BZ2Decompressor()
                try:
                    self._buffer = self._decompressor.decompress(rawblock)
                except OSError:
                    # Trailing data isn't a valid bzip2 stream. We're done here.
                    self._mode = _MODE_READ_EOF
                    self._size = self._pos
                    return False
            else:
                self._buffer = self._decompressor.decompress(rawblock)
            self._buffer_offset = 0
        return True
Пример #6
0
def decompress(data):
    """Decompress a block of data.

    For incremental decompression, use a BZ2Decompressor object instead.
    """
    results = []
    while 1:
        if data:
            decomp = BZ2Decompressor()
            try:
                res = decomp.decompress(data)
            except OSError:
                if results:
                    break
                else:
                    raise

            results.append(res)
            if not decomp.eof:
                raise ValueError(
                    'Compressed data ended before the end-of-stream marker was reached'
                )
            data = decomp.unused_data

    return ''.join(results)
Пример #7
0
    def _fill_buffer(self):
        # Depending on the input data, our call to the decompressor may not
        # return any data. In this case, try again after reading another block.
        while True:
            if self._buffer:
                return True

            if self._decompressor.unused_data:
                rawblock = self._decompressor.unused_data
            else:
                rawblock = self._fp.read(_BUFFER_SIZE)

            if not rawblock:
                if self._decompressor.eof:
                    self._mode = _MODE_READ_EOF
                    self._size = self._pos
                    return False
                else:
                    raise EOFError("Compressed file ended before the "
                                   "end-of-stream marker was reached")

            # Continue to next stream.
            if self._decompressor.eof:
                self._decompressor = BZ2Decompressor()

            self._buffer = self._decompressor.decompress(rawblock)
Пример #8
0
    def test_decompressor_pickle_error(self):
        from _bz2 import BZ2Decompressor
        import pickle

        exc = raises(TypeError, pickle.dumps, BZ2Decompressor())
        assert exc.value.args[
            0] == "cannot serialize '_bz2.BZ2Decompressor' object"
Пример #9
0
    def test_decompress_unused_data(self):
        # test with unused data. (data after EOF)
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        unused_data = b"this is unused data"
        decompressed_data = bz2d.decompress(self.DATA + unused_data)
        assert decompressed_data == self.TEXT
        assert bz2d.unused_data == unused_data
Пример #10
0
    def test_decompress_chunks_10(self):
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        decompressed_data = b""
        n = 0
        while True:
            temp = self.DATA[n * 10:(n + 1) * 10]
            if not temp:
                break
            decompressed_data += bz2d.decompress(temp)
            n += 1

        assert decompressed_data == self.TEXT
Пример #11
0
    def test_decompress_max_length(self):
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        decomp = []

        length = len(self.DATA)
        decomp.append(bz2d.decompress(self.DATA, max_length=100))
        assert len(decomp[-1]) == 100

        while not bz2d.eof:
            decomp.append(bz2d.decompress(b"", max_length=200))
            assert len(decomp[-1]) <= 200

        assert b''.join(decomp) == self.TEXT
Пример #12
0
def decompress(data):
    """Decompress a block of data.

    For incremental decompression, use a BZ2Decompressor object instead.
    """
    if len(data) == 0:
        return b""

    results = []
    while True:
        decomp = BZ2Decompressor()
        results.append(decomp.decompress(data))
        if not decomp.eof:
            raise ValueError("Compressed data ended before the "
                             "end-of-stream marker was reached")
        if not decomp.unused_data:
            return b"".join(results)
        # There is unused data left over. Proceed to next stream.
        data = decomp.unused_data
Пример #13
0
def decompress(data):
    """Decompress a block of data.

    For incremental decompression, use a BZ2Decompressor object instead.
    """
    results = []
    while data:
        decomp = BZ2Decompressor()
        try:
            res = decomp.decompress(data)
        except OSError:
            if results:
                break  # Leftover data is not a valid bzip2 stream; ignore it.
            else:
                raise  # Error on the first iteration; bail out.
        results.append(res)
        if not decomp.eof:
            raise ValueError("Compressed data ended before the "
                             "end-of-stream marker was reached")
        data = decomp.unused_data
    return b"".join(results)
Пример #14
0
    def _fill_buffer(self):
        if self._buffer:
            return True

        if self._decompressor.unused_data:
            rawblock = self._decompressor.unused_data
        else:
            rawblock = self._fp.read(_BUFFER_SIZE)

        if not rawblock:
            if self._decompressor.eof:
                self._mode = _MODE_READ_EOF
                self._size = self._pos
                return False
            else:
                raise EOFError("Compressed file ended before the "
                               "end-of-stream marker was reached")

        # Continue to next stream.
        if self._decompressor.eof:
            self._decompressor = BZ2Decompressor()

        self._buffer = self._decompressor.decompress(rawblock)
        return True
Пример #15
0
    def test_creation(self):
        from _bz2 import BZ2Decompressor

        raises(TypeError, BZ2Decompressor, "foo")

        BZ2Decompressor()
Пример #16
0
 def test_buffer(self):
     from _bz2 import BZ2Decompressor
     bz2d = BZ2Decompressor()
     decompressed_data = bz2d.decompress(memoryview(self.DATA))
     assert decompressed_data == self.TEXT
Пример #17
0
    def test_attribute(self):
        from _bz2 import BZ2Decompressor

        bz2d = BZ2Decompressor()
        assert bz2d.unused_data == b""
Пример #18
0
 def test_subsequent_read(self):
     from _bz2 import BZ2Decompressor
     bz2d = BZ2Decompressor()
     decompressed_data = bz2d.decompress(self.BUGGY_DATA)
     assert decompressed_data == b''
     raises(IOError, bz2d.decompress, self.BUGGY_DATA)