Exemplo n.º 1
0
    def test_creation(self):
        from bz2 import BZ2Compressor

        raises(TypeError, BZ2Compressor, "foo")
        raises(ValueError, BZ2Compressor, 10)

        BZ2Compressor(1)
        BZ2Compressor(9)
Exemplo n.º 2
0
    def __init__(self,
                 filename=None,
                 mode="r",
                 buffering=None,
                 compresslevel=9,
                 fileobj=None):
        """Open a bzip2-compressed file.

        If filename is given, open the named file. Otherwise, operate on
        the file object given by fileobj. Exactly one of these two
        parameters should be provided.

        mode can be 'r' for reading (default), or 'w' for writing.

        buffering is ignored. Its use is deprecated.

        If mode is 'w', compresslevel can be a number between 1 and 9
        specifying the level of compression: 1 produces the least
        compression, and 9 (default) produces the most compression.
        """
        # This lock must be recursive, so that BufferedIOBase's
        # readline(), readlines() and writelines() don't deadlock.
        import threading
        self._lock = threading.RLock()
        self._fp = None
        self._closefp = False
        self._mode = _MODE_CLOSED
        self._pos = 0
        self._size = -1

        if not (1 <= compresslevel <= 9):
            raise ValueError("compresslevel must be between 1 and 9")

        if mode in ("", "r", "rb"):
            mode = "rb"
            mode_code = _MODE_READ
            self._decompressor = BZ2Decompressor()
            self._buffer = None
        elif mode in ("w", "wb"):
            mode = "wb"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        elif mode in ("a", "ab"):
            mode = "ab"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        else:
            raise ValueError("Invalid mode: {!r}".format(mode))

        if filename is not None and fileobj is None:
            self._fp = open(filename, mode)
            self._closefp = True
            self._mode = mode_code
        elif fileobj is not None and filename is None:
            self._fp = fileobj
            self._mode = mode_code
        else:
            raise ValueError("Must give exactly one of filename and fileobj")
Exemplo n.º 3
0
def compress(file_path):
    """
    Compress in place file.
    If the file does not exist we return False
    If the file already has a bz2 extension we don't
    compress it.

    returns a tuple of success and the path string describing the compressed
    file.

    Parameters
    ----------
    file_path a valid path to a file to be compressed.

    """
    if Path(file_path).exists():
        if not has_bz2_extension(file_path):
            compressed_path = file_path + BZ2_FILE_EXTENSION
            compressor = BZ2Compressor()
            with open(file_path, 'rb') as uncompressed:
                with open(compressed_path, 'wb') as compressed:
                    for bytes in iter(uncompressed.readline, b''):
                        compressed.write(compressor.compress(bytes))
                    compressed.write(compressor.flush())
            return (True, compressed_path)
        else:
            return (True, file_path)
    else:
        return (False, "File does not exist: " + file_path)
 def __init__(self, pro):
     AbstractStreamHandler.__init__(self)
     self.__pro = pro
     self.__compressor = BZ2Compressor()
     self.__dying = False
     self.__size = 0
     self.__processed = 0
Exemplo n.º 5
0
 def testCompress(self):
     # "Test BZ2Compressor.compress()/flush()"
     bz2c = BZ2Compressor()
     self.assertRaises(TypeError, bz2c.compress)
     data = bz2c.compress(self.TEXT)
     data += bz2c.flush()
     self.assertEqual(self.decompress(data), self.TEXT)
Exemplo n.º 6
0
    def test_compressor_pickle_error(self):
        from bz2 import BZ2Compressor
        import pickle

        exc = raises(TypeError, pickle.dumps, BZ2Compressor())
        assert exc.value.args[
            0] == "cannot serialize '_bz2.BZ2Compressor' object"
Exemplo n.º 7
0
def create_patch_bsdiff(ffrom, fto, fpatch):
    to_size = file_size(fto)
    from_data = file_read(ffrom)
    start_time = time.time()
    suffix_array = bytearray(4 * (len(from_data) + 1))
    divsufsort(from_data, suffix_array)
    chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto),
                                 bytearray(file_size(fto) + 1))

    LOGGER.info('Bsdiff algorithm completed in %s.',
                format_timespan(time.time() - start_time))

    fctrl = BytesIO()
    fdiff = BytesIO()
    fextra = BytesIO()

    start_time = time.time()

    ctrl_compressor = BZ2Compressor()
    diff_compressor = BZ2Compressor()
    extra_compressor = BZ2Compressor()

    for i in range(0, len(chunks), 5):
        size = offtout(unpack_size_bytes(chunks[i + 0]))
        fctrl.write(ctrl_compressor.compress(size))
        fdiff.write(diff_compressor.compress(chunks[i + 1]))
        size = offtout(unpack_size_bytes(chunks[i + 2]))
        fctrl.write(ctrl_compressor.compress(size))
        fextra.write(extra_compressor.compress(chunks[i + 3]))
        size = offtout(unpack_size_bytes(chunks[i + 4]))
        fctrl.write(ctrl_compressor.compress(size))

    fctrl.write(ctrl_compressor.flush())
    fdiff.write(diff_compressor.flush())
    fextra.write(extra_compressor.flush())

    LOGGER.info('Compression completed in %s.',
                format_timespan(time.time() - start_time))

    # Write everything to the patch file.
    fpatch.write(b'BSDIFF40')
    fpatch.write(offtout(fctrl.tell()))
    fpatch.write(offtout(fdiff.tell()))
    fpatch.write(offtout(to_size))
    fpatch.write(fctrl.getvalue())
    fpatch.write(fdiff.getvalue())
    fpatch.write(fextra.getvalue())
Exemplo n.º 8
0
def compress(data):
    try:
        bc = BZ2Compressor()
        bc.compress(lg)
        attachment = bc.flush()
        return attachment
    except Exception as ex:
        log.error("unable to bzip data", ex)
Exemplo n.º 9
0
    def test_compress(self):
        from bz2 import BZ2Compressor

        bz2c = BZ2Compressor()
        raises(TypeError, bz2c.compress)
        data = bz2c.compress(self.TEXT)
        data = "%s%s" % (data, bz2c.flush())
        assert self.decompress(data) == self.TEXT
Exemplo n.º 10
0
    def test_compress_huge_data(self):
        if not self.HUGE_OK:
            skip("skipping test requiring lots of memory")
        from bz2 import BZ2Compressor

        HUGE_DATA = self.TEXT * 10000
        bz2c = BZ2Compressor()
        raises(TypeError, bz2c.compress)
        data = bz2c.compress(HUGE_DATA)
        data = "%s%s" % (data, bz2c.flush())
        assert self.decompress(data) == HUGE_DATA
 def testCompressChunks10(self):
     bz2c = BZ2Compressor()
     n = 0
     data = b''
     while True:
         str = self.TEXT[n * 10:(n + 1) * 10]
         if not str:
             break
         data += bz2c.compress(str)
         n += 1
     data += bz2c.flush()
     self.assertEqual(self.decompress(data), self.TEXT)
Exemplo n.º 12
0
 def testCompressChunks10(self):
     # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes"
     bz2c = BZ2Compressor()
     n = 0
     data = ''
     while 1:
         str = self.TEXT[n*10:(n+1)*10]
         if not str:
             break
         data += bz2c.compress(str)
         n += 1
     data += bz2c.flush()
     self.assertEqual(self.decompress(data), self.TEXT)
Exemplo n.º 13
0
    def test_compress_chunks_10(self):
        from bz2 import BZ2Compressor

        bz2c = BZ2Compressor()
        n = 0
        data = ""
        while True:
            temp = self.TEXT[n * 10:(n + 1) * 10]
            if not temp:
                break
            data = "%s%s" % (data, bz2c.compress(temp))
            n += 1
        data = "%s%s" % (data, bz2c.flush())
        assert self.decompress(data) == self.TEXT
Exemplo n.º 14
0
 def testCompress4G(self, size):
     bz2c = BZ2Compressor()
     data = b'x' * size
     try:
         compressed = bz2c.compress(data)
         compressed += bz2c.flush()
     finally:
         data = None
     data = bz2.decompress(compressed)
     try:
         self.assertEqual(len(data), size)
         self.assertEqual(len(data.strip(b'x')), 0)
     finally:
         data = None
 def testCompress4G(self, size):
     # "Test BZ2Compressor.compress()/flush() with >4GiB input"
     bz2c = BZ2Compressor()
     data = b"x" * size
     try:
         compressed = bz2c.compress(data)
         compressed += bz2c.flush()
     finally:
         data = None  # Release memory
     data = bz2.decompress(compressed)
     try:
         self.assertEqual(len(data), size)
         self.assertEqual(len(data.strip(b"x")), 0)
     finally:
         data = None
Exemplo n.º 16
0
def create_patch_bsdiff(ffrom, fto, fpatch):
    to_size = file_size(fto)
    from_data = file_read(ffrom)
    suffix_array = sais.sais(from_data)
    chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto))

    fctrl = BytesIO()
    fdiff = BytesIO()
    fextra = BytesIO()

    ctrl_compressor = BZ2Compressor()
    diff_compressor = BZ2Compressor()
    extra_compressor = BZ2Compressor()

    for i in range(0, len(chunks), 5):
        size = offtout(unpack_size_bytes(chunks[i + 0]))
        fctrl.write(ctrl_compressor.compress(size))
        fdiff.write(diff_compressor.compress(chunks[i + 1]))
        size = offtout(unpack_size_bytes(chunks[i + 2]))
        fctrl.write(ctrl_compressor.compress(size))
        fextra.write(extra_compressor.compress(chunks[i + 3]))
        size = offtout(unpack_size_bytes(chunks[i + 4]))
        fctrl.write(ctrl_compressor.compress(size))

    fctrl.write(ctrl_compressor.flush())
    fdiff.write(diff_compressor.flush())
    fextra.write(extra_compressor.flush())

    # Write everything to the patch file.
    fpatch.write(b'BSDIFF40')
    fpatch.write(offtout(fctrl.tell()))
    fpatch.write(offtout(fdiff.tell()))
    fpatch.write(offtout(to_size))
    fpatch.write(fctrl.getvalue())
    fpatch.write(fdiff.getvalue())
    fpatch.write(fextra.getvalue())
Exemplo n.º 17
0
 def __init__(self):
     """Constructor to build the appropriate compressor type."""
     if compressor_type == 'bzip2':
         from bz2 import BZ2Compressor
         self._comp = BZ2Compressor(9)
         self._comp_func = self._comp.compress
         self._flush_passthru = False
     elif compressor_type == 'gzip':
         from zlib import compress
         self._comp = None
         self._comp_func = lambda x: compress(x, 9)
         self._flush_passthru = True
     else:
         self._comp = None
         self._comp_func = lambda x: x
         self._flush_passthru = True
 def thr_func(sock):
     chunk_size = 4096
     data = sock.recv(chunk_size)
     data_size = int(data[:data.find('x')])
     data = data[data.find('x') + 1:]
     processed = 0
     compressor = BZ2Compressor()
     while processed != data_size:
         processed += len(data)
         data_c = compressor.compress(data)
         if processed == data_size:
             break
         if data_c != '':
             sock.sendall(data_c)
         data = sock.recv(chunk_size)
     data_c = compressor.flush()
     sock.sendall(data_c)
     sock.close()
Exemplo n.º 19
0
def create_compressor(compression):
    if compression == 'lzma':
        compressor = lzma.LZMACompressor(format=lzma.FORMAT_ALONE)
    elif compression == 'bz2':
        compressor = BZ2Compressor()
    elif compression == 'none':
        compressor = NoneCompressor()
    elif compression == 'crle':
        compressor = CrleCompressor()
    elif compression == 'heatshrink':
        compressor = HeatshrinkCompressor()
    elif compression == 'zstd':
        compressor = ZstdCompressor()
    elif compression == 'lz4':
        compressor = Lz4Compressor()
    else:
        raise Error(format_bad_compression_string(compression))

    return compressor
Exemplo n.º 20
0
 def __init__(self, text):
     self.text = text
     self.comp = BZ2Compressor()
 def testPickle(self):
     for proto in range(pickle.HIGHEST_PROTOCOL + 1):
         with self.assertRaises(TypeError):
             pickle.dumps(BZ2Compressor(), proto)
 def testCompressEmptyString(self):
     bz2c = BZ2Compressor()
     data = bz2c.compress(b'')
     data += bz2c.flush()
     self.assertEqual(data, self.EMPTY_DATA)
Exemplo n.º 23
0
 def _reset_compressor(self):
     self.compressed_behavior = b''
     self.compressor = BZ2Compressor(2)
     self.step_count = 0
     self.aggregate = None
Exemplo n.º 24
0
 def __init__(self, **args):
     self.classobj = BZ2Compressor(args.get('ratio'))
Exemplo n.º 25
0
 def test_buffer(self):
     from bz2 import BZ2Compressor
     bz2c = BZ2Compressor()
     data = bz2c.compress(buffer(self.TEXT))
     data = "%s%s" % (data, bz2c.flush())
     assert self.decompress(data) == self.TEXT
Exemplo n.º 26
0
 def testPickle(self):
     with self.assertRaises(TypeError):
         pickle.dumps(BZ2Compressor())
Exemplo n.º 27
0
    def __init__(self, filename, mode="r", buffering=None, compresslevel=9):
        """Open a bzip2-compressed file.

        If filename is a str, bytes or unicode object, it gives the name
        of the file to be opened. Otherwise, it should be a file object,
        which will be used to read or write the compressed data.

        mode can be 'r' for reading (default), 'w' for (over)writing,
        'x' for creating exclusively, or 'a' for appending. These can
        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.

        buffering is ignored. Its use is deprecated.

        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
        and 9 specifying the level of compression: 1 produces the least
        compression, and 9 (default) produces the most compression.

        If mode is 'r', the input file may be the concatenation of
        multiple compressed streams.
        """
        # This lock must be recursive, so that BufferedIOBase's
        # readline(), readlines() and writelines() don't deadlock.
        self._lock = RLock()
        self._fp = None
        self._closefp = False
        self._mode = _MODE_CLOSED
        self._pos = 0
        self._size = -1

        if buffering is not None:
            warnings.warn("Use of 'buffering' argument is deprecated",
                          DeprecationWarning)

        if not (1 <= compresslevel <= 9):
            raise ValueError("compresslevel must be between 1 and 9")

        if mode in ("", "r", "rb"):
            mode = "rb"
            mode_code = _MODE_READ
            self._decompressor = BZ2Decompressor()
            self._buffer = b""
            self._buffer_offset = 0
        elif mode in ("w", "wb"):
            mode = "wb"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        elif mode in ("x", "xb") and _HAS_OPEN_X_MODE:
            mode = "xb"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        elif mode in ("a", "ab"):
            mode = "ab"
            mode_code = _MODE_WRITE
            self._compressor = BZ2Compressor(compresslevel)
        else:
            raise ValueError("Invalid mode: %r" % (mode, ))

        if isinstance(filename, _STR_TYPES):
            self._fp = _builtin_open(filename, mode)
            self._closefp = True
            self._mode = mode_code
        elif hasattr(filename, "read") or hasattr(filename, "write"):
            self._fp = filename
            self._mode = mode_code
        else:
            raise TypeError("filename must be a %s or %s object, or a file" %
                            (_STR_TYPES[0].__name__, _STR_TYPES[1].__name__))
Exemplo n.º 28
0
 def testCompressEmptyString(self):
     # "Test BZ2Compressor.compress()/flush() of empty string"
     bz2c = BZ2Compressor()
     data = bz2c.compress('')
     data += bz2c.flush()
     self.assertEqual(data, self.EMPTY_DATA)
Exemplo n.º 29
0
def bz2compress(data):
    compressor = BZ2Compressor(9)
    return compressor.compress(data) + compressor.flush()
Exemplo n.º 30
0
 def testCompress(self):
     bz2c = BZ2Compressor()
     self.assertRaises(TypeError, bz2c.compress)
     data = bz2c.compress(self.TEXT)
     data += bz2c.flush()
     self.assertEqual(ext_decompress(data), self.TEXT)