def __init__(self): if Zstd is None: raise UnsupportedCompressionMethodError self._ctc = Zstd.ZstdCompressor() # type: ignore
def compress_stream_writer_size(chunks, zparams): zctx = zstd.ZstdCompressor(compression_params=zparams) for chunk in chunks: b = bio() with zctx.stream_writer(b, size=len(chunk)) as compressor: compressor.write(chunk)
def compress(data, level): buffer = io.BytesIO() cctx = zstd.ZstdCompressor(level=level) with cctx.write_to(buffer) as compressor: compressor.write(data) return buffer.getvalue()
c = zlib.compress(chunk, args.zlib_level) compressed_discrete_zlib.append(c) ratios.append(float(len(c)) / float(len(chunk))) compressed_size = sum(map(len, compressed_discrete_zlib)) ratio = float(compressed_size) / float(orig_size) * 100.0 bad_count = sum(1 for r in ratios if r >= 1.00) good_ratio = 100.0 - (float(bad_count) / float(len(chunks)) * 100.0) print( "zlib discrete compressed size (l=%d): %d (%.2f%%); smaller: %.2f%%" % (args.zlib_level, compressed_size, ratio, good_ratio)) # In discrete mode, each input is compressed independently, possibly # with a dictionary. if args.discrete: zctx = zstd.ZstdCompressor(compression_params=zparams) compressed_discrete = [] ratios = [] # Always use multiple threads here so we complete faster. if hasattr(zctx, "multi_compress_to_buffer"): for i, c in enumerate( zctx.multi_compress_to_buffer(chunks, threads=-1)): compressed_discrete.append(c.tobytes()) ratios.append(float(len(c)) / float(len(chunks[i]))) else: for chunk in chunks: compressed = zctx.compress(chunk) compressed_discrete.append(chunk) ratios.append(float(len(compressed)) / float(len(chunk))) compressed_size = sum(map(len, compressed_discrete))
def open_tar_zst(path): cctx = zstandard.ZstdCompressor() with open(path, "wb") as f: with cctx.stream_writer(f) as compressor: with tarfile.open(mode="w|", fileobj=compressor) as tar: yield tar
def test_compressobj_empty(self): cctx = zstd.ZstdCompressor(level=1) cobj = cctx.compressobj() self.assertEqual(cobj.compress(b''), b'') self.assertEqual(cobj.flush(), b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
def test_empty_roundtrip(self): cctx = zstd.ZstdCompressor() empty = cctx.compress(b'') self.assertEqual(decompress_via_writer(empty), b'')
def compress_content_dict_compress(chunks, zparams): zstd.ZstdCompressor(compression_params=zparams).compress(chunks[0]) for i, chunk in enumerate(chunks[1:]): d = zstd.ZstdCompressionDict(chunks[i]) zstd.ZstdCompressor(dict_data=d, compression_params=zparams).compress(chunk)
def save(self, path): with open(path, "wb") as fh: cctx = zstandard.ZstdCompressor() with cctx.stream_writer(fh) as compressor: self.imls_log_daily = sparse.COO.from_numpy(self.imls_log_daily) compressor.write(pickle.dumps(self, protocol=4))
def test_simple(self): data = zstd.ZstdCompressor(level=1).compress(b'foobar') dctx = zstd.ZstdDecompressor() dobj = dctx.decompressobj() self.assertEqual(dobj.decompress(data), b'foobar')
from enum import Enum, auto import logging import asyncio import struct import json import zstandard as zstd _compressor = zstd.ZstdCompressor() _decompressor = zstd.ZstdDecompressor() logger = logging.getLogger(__name__) class MessageType(Enum): REQUEST_REGISTER = auto() REQUEST_PUBLISH = auto() REQUEST_FILE_LIST = auto() REQUEST_FILE_LOCATION = auto() REQUEST_CHUNK_REGISTER = auto() REPLY_REGISTER = auto() REPLY_FILE_LIST = auto() REPLY_PUBLISH = auto() REPLY_FILE_LOCATION = auto() PEER_REQUEST_CHUNK = auto() PEER_REPLY_CHUNK = auto() PEER_PING_PONG = auto() def _message_log(message): log_message = {key: message[key] for key in message if key != 'data'} log_message['type'] = MessageType(message['type']).name return log_message
def start(self): ''' Prepare for processing based on available input parameters. Assume source data is not available at this moment. Create output files and internal buffers ''' # create part-file base_filename = self._init_params.image_filename.stem self._intermediate_file_name = os.path.join( self._init_params._output_directory, base_filename + '.rc' + str(self._input_params.reduction_level) + '_part' + '{0:03d}'.format(self._node_id)) self._intermediate_file = open(self._intermediate_file_name, 'wb') # serialize ReCoDe header self._header.serialize_to(self._intermediate_file) # serialize source header # self._source.serialize_header(str(self._intermediate_file_name)) # create validation file if self._init_params.validation_frame_gap > 0: self._validation_file_name = os.path.join( self._init_params._output_directory, base_filename + '_part' + '{0:03d}'.format(self._node_id) + '_validation_frames.bin') self._validation_file = open(self._validation_file_name, 'wb') # create buffer to hold reduced_compressed data self._buffer_sz = 1000 # best to ensure buffer size is large enough to hold the expected amount of data to be processed by this thread for a single chunk self._rct_buffer = bytearray(self._buffer_sz) self._rct_buffer_fill_position = -1 self._available_buffer_space = self._buffer_sz # self._bytes_per_pixel = np.dtype(get_dtype_string(self._header._rc_header["source_dtype"])).itemsize self._bytes_per_pixel = self._src_dtype.itemsize self._n_pixels_in_frame = self._header._rc_header[ 'ny'] * self._header._rc_header['nx'] self._frame_sz = np.uint64( self._n_pixels_in_frame) * self._bytes_per_pixel self._frame_buffer = bytearray(self._buffer_sz) self._n_bytes_in_binary_image = math.ceil(self._n_pixels_in_frame / 8) if self._init_params.use_C: self._c_reader = c_recode.Reader() _max_sz = int( math.ceil((self._n_pixels_in_frame * self._input_params.source_bit_depth * 1.0) / 8.0)) self._pixvals = memoryview( bytearray(self._n_pixels_in_frame * self._bytes_per_pixel)) self._packed_pixvals = memoryview(bytearray(_max_sz)) self._chunk_offset = 0 self._num_frames_in_part = 0 # initialize validation counting parameters self._vc_roi['nx'] = min(self._header._rc_header['nx'], 128) self._vc_roi['ny'] = min(self._header._rc_header['ny'], 128) self._vc_roi['x_start'] = math.floor( (self._header._rc_header['nx'] - self._vc_roi['nx']) / 2.0) self._vc_roi['y_start'] = math.floor( (self._header._rc_header['ny'] - self._vc_roi['ny']) / 2.0) self._vc_n_pixels = self._vc_roi['nx'] * self._vc_roi['ny'] if self._input_params.compression_scheme == 1: #zstd self._compressor_context = zstd.ZstdCompressor( level=self._input_params.compression_level, write_content_size=False)
if __name__ == '__main__': conn = psycopg2.connect( host="", port="", user="", password="", dbname="feed_archiver" ) cur = conn.cursor() cur.execute("SELECT COUNT(*) FROM %s" % TABLE) row_count = cur.fetchone()[0] cur.execute("DECLARE cur1 CURSOR FOR SELECT * FROM %s" % TABLE) rows = pg_fetch_cursor_all(cur, name="cur1", batch_size=5000) with open("out_mp.ndjson.zst", "wb") as f: cctx = zstd.ZstdCompressor(level=19, threads=THREADS) with cctx.stream_writer(f) as compressor: for row in tqdm(rows, total=row_count, unit="row"): _id, archived_on, data = row data["_archived_on"] = int(archived_on.timestamp()) compressor.write(orjson.dumps(data)) compressor.write(b"\n") conn.close()
def compress_read_to_iter_size(chunks, zparams): zctx = zstd.ZstdCompressor(compression_params=zparams) for chunk in chunks: for d in zctx.read_to_iter(chunk, size=len(chunk)): pass
def create_zst_file(db_path, content=b'{"Hello": "World"}'): with open(db_path, "wb") as output_f: cctx = zstandard.ZstdCompressor() with cctx.stream_writer(output_f) as compressor: compressor.write(content)
def compress_compressobj_size(chunks, zparams): zctx = zstd.ZstdCompressor(compression_params=zparams) for chunk in chunks: cobj = zctx.compressobj(size=len(chunk)) cobj.compress(chunk) cobj.flush()
import struct import pyarrow import scipy.sparse import zstandard # The magic intial bytes which tell us that a given binary chunk is ZStandard # compressed data ZSTD_MAGIC_NUMBER = struct.pack('<I', 0xFD2FB528) compressor = zstandard.ZstdCompressor(level=16) decompressor = zstandard.ZstdDecompressor() context = pyarrow.SerializationContext() def serialize_csc(matrix): """ Decompose a matrix in Compressed Sparse Column format into more basic data types (tuples and numpy arrays) which PyArrow knows how to serialize """ return ((matrix.data, matrix.indices, matrix.indptr), matrix.shape) def deserialize_csc(args): """ Reconstruct a Compressed Sparse Column matrix from its decomposed parts """ return scipy.sparse.csc_matrix(*args)
def zstd_compress(path): cctx = zstandard.ZstdCompressor() with open(path, "rb") as input_f: with open(f"{path}.zst", "wb") as output_f: cctx.copy_stream(input_f, output_f)
def encode_zstd(content: bytes) -> bytes: zstd_ctx = zstd.ZstdCompressor() return zstd_ctx.compress(content)
def polydata_list_to_json(polydata_list, manager=None): # noqa: C901 """Serialize a list of a Python object that represents vtk.js PolyData. The returned data is compatibile with vtk.js PolyData with compressed data buffers. """ if polydata_list is None: return None else: compressor = zstd.ZstdCompressor(level=3) json = [] for polydata in polydata_list: json_polydata = dict() for top_key, top_value in polydata.items(): if isinstance(top_value, dict): nested_value_copy = dict() for nested_key, nested_value in top_value.items(): if not nested_key == 'values': nested_value_copy[nested_key] = nested_value json_polydata[top_key] = nested_value_copy else: json_polydata[top_key] = top_value if 'points' in json_polydata: point_values = polydata['points']['values'] compressed = compressor.compress(point_values.data) compressedView = memoryview(compressed) json_polydata['points']['compressedValues'] = compressedView for cell_type in ['verts', 'lines', 'polys', 'strips']: if cell_type in json_polydata: values = polydata[cell_type]['values'] compressed = compressor.compress(values.data) compressedView = memoryview(compressed) json_polydata[cell_type][ 'compressedValues'] = compressedView for data_type in ['pointData', 'cellData']: if data_type in json_polydata: data = polydata[data_type] compressed_data = dict() for nested_key, nested_value in data.items(): if not nested_key == 'arrays': compressed_data[nested_key] = nested_value compressed_arrays = [] for array in polydata[data_type]['arrays']: compressed_array = dict() for nested_key, nested_value in array['data'].items(): if not nested_key == 'values': compressed_array[nested_key] = nested_value values = array['data']['values'] compressed = compressor.compress(values.data) compressedView = memoryview(compressed) compressed_array['compressedValues'] = compressedView compressed_arrays.append({'data': compressed_array}) compressed_data['arrays'] = compressed_arrays json_polydata[data_type] = compressed_data json.append(json_polydata) return json
def zstd_compress(body): c = zstd.ZstdCompressor() return c.compress(body)
def test_level_bounds(self): with self.assertRaises(ValueError): zstd.ZstdCompressor(level=0) with self.assertRaises(ValueError): zstd.ZstdCompressor(level=23)
def compress_multi_compress_to_buffer_list(chunks, zparams, threads): zctx = zstd.ZstdCompressor(compression_params=zparams) zctx.multi_compress_to_buffer(chunks, threads=threads)
"needsdiagnosis", "regression", "stepstoreproduce", "spambug", "testlabelselect", "testgroupselect", ] DEFAULT_EXPIRATION_TTL = 7 * 24 * 3600 # A week redis = Redis.from_url(os.environ.get("REDIS_URL", "redis://localhost/0")) MODEL_CACHE: ReadthroughTTLCache[str, Model] = ReadthroughTTLCache( timedelta(hours=1), lambda m: Model.load(f"{m}model")) MODEL_CACHE.start_ttl_thread() cctx = zstandard.ZstdCompressor(level=10) def setkey(key: str, value: bytes, compress: bool = False) -> None: LOGGER.debug(f"Storing data at {key}: {value!r}") if compress: value = cctx.compress(value) redis.set(key, value) redis.expire(key, DEFAULT_EXPIRATION_TTL) def classify_bug(model_name: str, bug_ids: Sequence[int], bugzilla_token: str) -> str: from bugbug_http.app import JobInfo # This should be called in a process worker so it should be safe to set
def compress_stream_reader(chunks, zparams): zctx = zstd.ZstdCompressor(compression_params=zparams) for chunk in chunks: with zctx.stream_reader(chunk) as reader: while reader.read(16384): pass
def compress_one_use(chunks, zparams): for chunk in chunks: zctx = zstd.ZstdCompressor(compression_params=zparams) zctx.compress(chunk)
def solidCompress(filePath, compressionLevel = 18, outputDir = None, threads = -1): ncaHeaderSize = 0x4000 filePath = os.path.abspath(filePath) container = Fs.factory(filePath) container.open(filePath, 'rb') CHUNK_SZ = 0x1000000 if outputDir is None: nszPath = filePath[0:-1] + 'z' else: nszPath = os.path.join(outputDir, os.path.basename(filePath[0:-1] + 'z')) nszPath = os.path.abspath(nszPath) nszFilename = os.path.basename(nszPath) # Getting title ID to check for NSZ file in the output directory # We should still keep this part of title ID comparison because not all files have titleID in # filename. titleId = '' for nspf in container: if isinstance(nspf, Fs.Ticket.Ticket): nspf.getRightsId() titleId = nspf.titleId() break # No need to go for other objects Print.info('compressing (level %d) %s -> %s' % (compressionLevel, filePath, nszPath)) newNsp = Fs.Pfs0.Pfs0Stream(nszPath) try: for nspf in container: if isinstance(nspf, Fs.Nca.Nca) and nspf.header.contentType == Fs.Type.Content.DATA: Print.info('skipping delta fragment') continue if isinstance(nspf, Fs.Nca.Nca) and (nspf.header.contentType == Fs.Type.Content.PROGRAM or nspf.header.contentType == Fs.Type.Content.PUBLICDATA): if SectionFs.isNcaPacked(nspf, ncaHeaderSize): newFileName = nspf._path[0:-1] + 'z' f = newNsp.add(newFileName, nspf.size) start = f.tell() nspf.seek(0) f.write(nspf.read(ncaHeaderSize)) sections = [] for fs in SectionFs.sortedFs(nspf): sections += fs.getEncryptionSections() if len(sections) == 0: raise Exception("NCA can't be decrypted. Outdated keys.txt?") header = b'NCZSECTN' header += len(sections).to_bytes(8, 'little') i = 0 for fs in sections: i += 1 header += fs.offset.to_bytes(8, 'little') header += fs.size.to_bytes(8, 'little') header += fs.cryptoType.to_bytes(8, 'little') header += b'\x00' * 8 header += fs.cryptoKey header += fs.cryptoCounter f.write(header) blockID = 0 chunkRelativeBlockID = 0 startChunkBlockID = 0 blocksHeaderFilePos = f.tell() compressedblockSizeList = [] decompressedBytes = ncaHeaderSize with tqdm(total=nspf.size, unit_scale=True, unit="B") as bar: partitions = [] for section in sections: #print('offset: %x\t\tsize: %x\t\ttype: %d\t\tiv%s' % (section.offset, section.size, section.cryptoType, str(hx(section.cryptoCounter)))) partitions.append(nspf.partition(offset = section.offset, size = section.size, n = None, cryptoType = section.cryptoType, cryptoKey = section.cryptoKey, cryptoCounter = bytearray(section.cryptoCounter), autoOpen = True)) partNr = 0 bar.update(f.tell()) if threads > 1: cctx = zstandard.ZstdCompressor(level=compressionLevel, threads=threads) else: cctx = zstandard.ZstdCompressor(level=compressionLevel) compressor = cctx.stream_writer(f) while True: buffer = partitions[partNr].read(CHUNK_SZ) while (len(buffer) < CHUNK_SZ and partNr < len(partitions)-1): partitions[partNr].close() partitions[partNr] = None partNr += 1 buffer += partitions[partNr].read(CHUNK_SZ - len(buffer)) if len(buffer) == 0: break compressor.write(buffer) decompressedBytes += len(buffer) bar.update(len(buffer)) partitions[partNr].close() partitions[partNr] = None compressor.flush(zstandard.FLUSH_FRAME) compressor.flush(zstandard.COMPRESSOBJ_FLUSH_FINISH) written = f.tell() - start print('compressed %d%% %d -> %d - %s' % (int(written * 100 / nspf.size), decompressedBytes, written, nspf._path)) newNsp.resize(newFileName, written) continue else: print('not packed!') f = newNsp.add(nspf._path, nspf.size) nspf.seek(0) while not nspf.eof(): buffer = nspf.read(CHUNK_SZ) f.write(buffer) except KeyboardInterrupt: os.remove(nszPath) raise KeyboardInterrupt except BaseException as e: Print.error(traceback.format_exc()) os.remove(nszPath) finally: newNsp.close() container.close() return nszPath