def test_creation(self): from bz2 import BZ2Compressor raises(TypeError, BZ2Compressor, "foo") raises(ValueError, BZ2Compressor, 10) BZ2Compressor(1) BZ2Compressor(9)
def __init__(self, filename=None, mode="r", buffering=None, compresslevel=9, fileobj=None): """Open a bzip2-compressed file. If filename is given, open the named file. Otherwise, operate on the file object given by fileobj. Exactly one of these two parameters should be provided. mode can be 'r' for reading (default), or 'w' for writing. buffering is ignored. Its use is deprecated. If mode is 'w', compresslevel can be a number between 1 and 9 specifying the level of compression: 1 produces the least compression, and 9 (default) produces the most compression. """ # This lock must be recursive, so that BufferedIOBase's # readline(), readlines() and writelines() don't deadlock. import threading self._lock = threading.RLock() self._fp = None self._closefp = False self._mode = _MODE_CLOSED self._pos = 0 self._size = -1 if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") if mode in ("", "r", "rb"): mode = "rb" mode_code = _MODE_READ self._decompressor = BZ2Decompressor() self._buffer = None elif mode in ("w", "wb"): mode = "wb" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) elif mode in ("a", "ab"): mode = "ab" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) else: raise ValueError("Invalid mode: {!r}".format(mode)) if filename is not None and fileobj is None: self._fp = open(filename, mode) self._closefp = True self._mode = mode_code elif fileobj is not None and filename is None: self._fp = fileobj self._mode = mode_code else: raise ValueError("Must give exactly one of filename and fileobj")
def compress(file_path): """ Compress in place file. If the file does not exist we return False If the file already has a bz2 extension we don't compress it. returns a tuple of success and the path string describing the compressed file. Parameters ---------- file_path a valid path to a file to be compressed. """ if Path(file_path).exists(): if not has_bz2_extension(file_path): compressed_path = file_path + BZ2_FILE_EXTENSION compressor = BZ2Compressor() with open(file_path, 'rb') as uncompressed: with open(compressed_path, 'wb') as compressed: for bytes in iter(uncompressed.readline, b''): compressed.write(compressor.compress(bytes)) compressed.write(compressor.flush()) return (True, compressed_path) else: return (True, file_path) else: return (False, "File does not exist: " + file_path)
def __init__(self, pro): AbstractStreamHandler.__init__(self) self.__pro = pro self.__compressor = BZ2Compressor() self.__dying = False self.__size = 0 self.__processed = 0
def testCompress(self): # "Test BZ2Compressor.compress()/flush()" bz2c = BZ2Compressor() self.assertRaises(TypeError, bz2c.compress) data = bz2c.compress(self.TEXT) data += bz2c.flush() self.assertEqual(self.decompress(data), self.TEXT)
def test_compressor_pickle_error(self): from bz2 import BZ2Compressor import pickle exc = raises(TypeError, pickle.dumps, BZ2Compressor()) assert exc.value.args[ 0] == "cannot serialize '_bz2.BZ2Compressor' object"
def create_patch_bsdiff(ffrom, fto, fpatch): to_size = file_size(fto) from_data = file_read(ffrom) start_time = time.time() suffix_array = bytearray(4 * (len(from_data) + 1)) divsufsort(from_data, suffix_array) chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto), bytearray(file_size(fto) + 1)) LOGGER.info('Bsdiff algorithm completed in %s.', format_timespan(time.time() - start_time)) fctrl = BytesIO() fdiff = BytesIO() fextra = BytesIO() start_time = time.time() ctrl_compressor = BZ2Compressor() diff_compressor = BZ2Compressor() extra_compressor = BZ2Compressor() for i in range(0, len(chunks), 5): size = offtout(unpack_size_bytes(chunks[i + 0])) fctrl.write(ctrl_compressor.compress(size)) fdiff.write(diff_compressor.compress(chunks[i + 1])) size = offtout(unpack_size_bytes(chunks[i + 2])) fctrl.write(ctrl_compressor.compress(size)) fextra.write(extra_compressor.compress(chunks[i + 3])) size = offtout(unpack_size_bytes(chunks[i + 4])) fctrl.write(ctrl_compressor.compress(size)) fctrl.write(ctrl_compressor.flush()) fdiff.write(diff_compressor.flush()) fextra.write(extra_compressor.flush()) LOGGER.info('Compression completed in %s.', format_timespan(time.time() - start_time)) # Write everything to the patch file. fpatch.write(b'BSDIFF40') fpatch.write(offtout(fctrl.tell())) fpatch.write(offtout(fdiff.tell())) fpatch.write(offtout(to_size)) fpatch.write(fctrl.getvalue()) fpatch.write(fdiff.getvalue()) fpatch.write(fextra.getvalue())
def compress(data): try: bc = BZ2Compressor() bc.compress(lg) attachment = bc.flush() return attachment except Exception as ex: log.error("unable to bzip data", ex)
def test_compress(self): from bz2 import BZ2Compressor bz2c = BZ2Compressor() raises(TypeError, bz2c.compress) data = bz2c.compress(self.TEXT) data = "%s%s" % (data, bz2c.flush()) assert self.decompress(data) == self.TEXT
def test_compress_huge_data(self): if not self.HUGE_OK: skip("skipping test requiring lots of memory") from bz2 import BZ2Compressor HUGE_DATA = self.TEXT * 10000 bz2c = BZ2Compressor() raises(TypeError, bz2c.compress) data = bz2c.compress(HUGE_DATA) data = "%s%s" % (data, bz2c.flush()) assert self.decompress(data) == HUGE_DATA
def testCompressChunks10(self): bz2c = BZ2Compressor() n = 0 data = b'' while True: str = self.TEXT[n * 10:(n + 1) * 10] if not str: break data += bz2c.compress(str) n += 1 data += bz2c.flush() self.assertEqual(self.decompress(data), self.TEXT)
def testCompressChunks10(self): # "Test BZ2Compressor.compress()/flush() with chunks of 10 bytes" bz2c = BZ2Compressor() n = 0 data = '' while 1: str = self.TEXT[n*10:(n+1)*10] if not str: break data += bz2c.compress(str) n += 1 data += bz2c.flush() self.assertEqual(self.decompress(data), self.TEXT)
def test_compress_chunks_10(self): from bz2 import BZ2Compressor bz2c = BZ2Compressor() n = 0 data = "" while True: temp = self.TEXT[n * 10:(n + 1) * 10] if not temp: break data = "%s%s" % (data, bz2c.compress(temp)) n += 1 data = "%s%s" % (data, bz2c.flush()) assert self.decompress(data) == self.TEXT
def testCompress4G(self, size): bz2c = BZ2Compressor() data = b'x' * size try: compressed = bz2c.compress(data) compressed += bz2c.flush() finally: data = None data = bz2.decompress(compressed) try: self.assertEqual(len(data), size) self.assertEqual(len(data.strip(b'x')), 0) finally: data = None
def testCompress4G(self, size): # "Test BZ2Compressor.compress()/flush() with >4GiB input" bz2c = BZ2Compressor() data = b"x" * size try: compressed = bz2c.compress(data) compressed += bz2c.flush() finally: data = None # Release memory data = bz2.decompress(compressed) try: self.assertEqual(len(data), size) self.assertEqual(len(data.strip(b"x")), 0) finally: data = None
def create_patch_bsdiff(ffrom, fto, fpatch): to_size = file_size(fto) from_data = file_read(ffrom) suffix_array = sais.sais(from_data) chunks = bsdiff.create_patch(suffix_array, from_data, file_read(fto)) fctrl = BytesIO() fdiff = BytesIO() fextra = BytesIO() ctrl_compressor = BZ2Compressor() diff_compressor = BZ2Compressor() extra_compressor = BZ2Compressor() for i in range(0, len(chunks), 5): size = offtout(unpack_size_bytes(chunks[i + 0])) fctrl.write(ctrl_compressor.compress(size)) fdiff.write(diff_compressor.compress(chunks[i + 1])) size = offtout(unpack_size_bytes(chunks[i + 2])) fctrl.write(ctrl_compressor.compress(size)) fextra.write(extra_compressor.compress(chunks[i + 3])) size = offtout(unpack_size_bytes(chunks[i + 4])) fctrl.write(ctrl_compressor.compress(size)) fctrl.write(ctrl_compressor.flush()) fdiff.write(diff_compressor.flush()) fextra.write(extra_compressor.flush()) # Write everything to the patch file. fpatch.write(b'BSDIFF40') fpatch.write(offtout(fctrl.tell())) fpatch.write(offtout(fdiff.tell())) fpatch.write(offtout(to_size)) fpatch.write(fctrl.getvalue()) fpatch.write(fdiff.getvalue()) fpatch.write(fextra.getvalue())
def __init__(self): """Constructor to build the appropriate compressor type.""" if compressor_type == 'bzip2': from bz2 import BZ2Compressor self._comp = BZ2Compressor(9) self._comp_func = self._comp.compress self._flush_passthru = False elif compressor_type == 'gzip': from zlib import compress self._comp = None self._comp_func = lambda x: compress(x, 9) self._flush_passthru = True else: self._comp = None self._comp_func = lambda x: x self._flush_passthru = True
def thr_func(sock): chunk_size = 4096 data = sock.recv(chunk_size) data_size = int(data[:data.find('x')]) data = data[data.find('x') + 1:] processed = 0 compressor = BZ2Compressor() while processed != data_size: processed += len(data) data_c = compressor.compress(data) if processed == data_size: break if data_c != '': sock.sendall(data_c) data = sock.recv(chunk_size) data_c = compressor.flush() sock.sendall(data_c) sock.close()
def create_compressor(compression): if compression == 'lzma': compressor = lzma.LZMACompressor(format=lzma.FORMAT_ALONE) elif compression == 'bz2': compressor = BZ2Compressor() elif compression == 'none': compressor = NoneCompressor() elif compression == 'crle': compressor = CrleCompressor() elif compression == 'heatshrink': compressor = HeatshrinkCompressor() elif compression == 'zstd': compressor = ZstdCompressor() elif compression == 'lz4': compressor = Lz4Compressor() else: raise Error(format_bad_compression_string(compression)) return compressor
def __init__(self, text): self.text = text self.comp = BZ2Compressor()
def testPickle(self): for proto in range(pickle.HIGHEST_PROTOCOL + 1): with self.assertRaises(TypeError): pickle.dumps(BZ2Compressor(), proto)
def testCompressEmptyString(self): bz2c = BZ2Compressor() data = bz2c.compress(b'') data += bz2c.flush() self.assertEqual(data, self.EMPTY_DATA)
def _reset_compressor(self): self.compressed_behavior = b'' self.compressor = BZ2Compressor(2) self.step_count = 0 self.aggregate = None
def __init__(self, **args): self.classobj = BZ2Compressor(args.get('ratio'))
def test_buffer(self): from bz2 import BZ2Compressor bz2c = BZ2Compressor() data = bz2c.compress(buffer(self.TEXT)) data = "%s%s" % (data, bz2c.flush()) assert self.decompress(data) == self.TEXT
def testPickle(self): with self.assertRaises(TypeError): pickle.dumps(BZ2Compressor())
def __init__(self, filename, mode="r", buffering=None, compresslevel=9): """Open a bzip2-compressed file. If filename is a str, bytes or unicode object, it gives the name of the file to be opened. Otherwise, it should be a file object, which will be used to read or write the compressed data. mode can be 'r' for reading (default), 'w' for (over)writing, 'x' for creating exclusively, or 'a' for appending. These can equivalently be given as 'rb', 'wb', 'xb', and 'ab'. buffering is ignored. Its use is deprecated. If mode is 'w', 'x' or 'a', compresslevel can be a number between 1 and 9 specifying the level of compression: 1 produces the least compression, and 9 (default) produces the most compression. If mode is 'r', the input file may be the concatenation of multiple compressed streams. """ # This lock must be recursive, so that BufferedIOBase's # readline(), readlines() and writelines() don't deadlock. self._lock = RLock() self._fp = None self._closefp = False self._mode = _MODE_CLOSED self._pos = 0 self._size = -1 if buffering is not None: warnings.warn("Use of 'buffering' argument is deprecated", DeprecationWarning) if not (1 <= compresslevel <= 9): raise ValueError("compresslevel must be between 1 and 9") if mode in ("", "r", "rb"): mode = "rb" mode_code = _MODE_READ self._decompressor = BZ2Decompressor() self._buffer = b"" self._buffer_offset = 0 elif mode in ("w", "wb"): mode = "wb" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) elif mode in ("x", "xb") and _HAS_OPEN_X_MODE: mode = "xb" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) elif mode in ("a", "ab"): mode = "ab" mode_code = _MODE_WRITE self._compressor = BZ2Compressor(compresslevel) else: raise ValueError("Invalid mode: %r" % (mode, )) if isinstance(filename, _STR_TYPES): self._fp = _builtin_open(filename, mode) self._closefp = True self._mode = mode_code elif hasattr(filename, "read") or hasattr(filename, "write"): self._fp = filename self._mode = mode_code else: raise TypeError("filename must be a %s or %s object, or a file" % (_STR_TYPES[0].__name__, _STR_TYPES[1].__name__))
def testCompressEmptyString(self): # "Test BZ2Compressor.compress()/flush() of empty string" bz2c = BZ2Compressor() data = bz2c.compress('') data += bz2c.flush() self.assertEqual(data, self.EMPTY_DATA)
def bz2compress(data): compressor = BZ2Compressor(9) return compressor.compress(data) + compressor.flush()
def testCompress(self): bz2c = BZ2Compressor() self.assertRaises(TypeError, bz2c.compress) data = bz2c.compress(self.TEXT) data += bz2c.flush() self.assertEqual(ext_decompress(data), self.TEXT)