def process(self, data): keywords = {} mode, filters = self._get_lz_mode_and_filters(False) if self.args.raw: keywords['filters'] = filters lz = lzma_.LZMADecompressor(mode, **keywords) with MemoryFile() as output: pos, size = 0, 4096 with MemoryFile(data) as stream: while not stream.eof and not stream.closed: pos = stream.tell() try: chunk = lz.decompress(stream.read(size)) except lzma_.LZMAError as error: self.log_debug(error.args) if size > 1: lz = lzma_.LZMADecompressor(mode, **keywords) stream.seek(0) output.seek(0) if pos > 0: output.write(lz.decompress(stream.read(pos))) self.log_debug( 'decompression error, reverting to one byte at a time' ) size = 1 else: remaining = len(stream.getbuffer()) - pos raise RefineryPartialResult( F'compression failed with {remaining} bytes remaining', output.getvalue()) else: output.write(chunk) return output.getvalue()
def decompress_lzma(data: bytes) -> bytes: """decompresses lzma-compressed data :param data: compressed data :type data: bytes :raises _lzma.LZMAError: Compressed data ended before the end-of-stream marker was reached :return: uncompressed data :rtype: bytes """ props, dict_size = struct.unpack("<BI", data[:5]) lc = props % 9 props = props // 9 pb = props // 5 lp = props % 5 dec = lzma.LZMADecompressor( format=lzma.FORMAT_RAW, filters=[{ "id": lzma.FILTER_LZMA1, "dict_size": dict_size, "lc": lc, "lp": lp, "pb": pb, }], ) return dec.decompress(data[5:])
def decompress(self, buf): if not self.compressed: return buf ty = self.compression_type if ty == CompressionType.LZMA: props, dict_size = struct.unpack("<BI", buf.read(5)) lc = props % 9 props = int(props / 9) pb = int(props / 5) lp = props % 5 dec = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=[{ "id": lzma.FILTER_LZMA1, "dict_size": dict_size, "lc": lc, "lp": lp, "pb": pb, }]) res = dec.decompress(buf.read()) return BytesIO(res) if ty in (CompressionType.LZ4, CompressionType.LZ4HC): res = lz4_decompress(buf.read(self.compressed_size), self.uncompressed_size) return BytesIO(res) raise NotImplementedError("Unimplemented compression method: %r" % (ty))
def Fetch(self, statepath, update=True, logger=NoopLogger()): if os.path.isfile(statepath) and not update: logger.Log('no update requested, skipping') return # Get and parse repomd.xml repomd_url = self.url + 'repodata/repomd.xml' logger.Log('fetching metadata from ' + repomd_url) repomd_content = Fetch(repomd_url, check_status=True).text repomd_xml = xml.etree.ElementTree.fromstring(repomd_content) repodata_url = self.url + repomd_xml.find( '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location' ).attrib['href'] logger.Log('fetching ' + repodata_url) data = Fetch(repodata_url).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) if repodata_url.endswith('gz'): logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) elif repodata_url.endswith('xz'): logger.GetIndented().Log('decompressing with xz') data = lzma.LZMADecompressor().decompress(data) logger.GetIndented().Log( 'size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') with StateFile(statepath, 'wb') as statefile: statefile.write(data)
def _get_lzma_decompressor(self, coders: List[Dict[str, Any]], unpacksize: int): filters = [] # type: List[Dict[str, Any]] lzma1 = False for coder in coders: if coder['numinstreams'] != 1 or coder['numoutstreams'] != 1: raise UnsupportedCompressionMethodError( 'Only a simple compression method is currently supported.') if not SupportedMethods.is_native_coder(coder): raise UnsupportedCompressionMethodError properties = coder.get('properties', None) filter_id = SupportedMethods.get_filter_id(coder) if filter_id == FILTER_LZMA: lzma1 = True if properties is not None: filters[:0] = [ lzma._decode_filter_properties(filter_id, properties) ] # type: ignore else: filters[:0] = [{'id': filter_id}] if lzma1: return LZMA1Decompressor(filters, unpacksize) else: return lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters)
def test_decompression_stream_props(self): # test decompression with properties in separate step decompress = lzma.LZMADecompressor() data = decompress.decompress(self.compressed_stream_xz[:5]) data += decompress.decompress(self.compressed_stream_xz[5:]) data += decompress.flush() self.assertEqual(data, self.plain)
def _decompressor_stream(url, imgdir, decompress): fd = None decompressor = None fname, suffix = _url_to_fname_suffix(url, imgdir) if suffix == 'gz' and decompress: decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif suffix == 'bz2' and decompress: decompressor = bz2.BZ2Decompressor() elif suffix == 'xz' and decompress: decompressor = lzma.LZMADecompressor() else: # don't remove the file's real suffix fname = '%s.%s' % (fname, suffix) def write(buff): if decompressor: buff = decompressor.decompress(buff) fd.write(buff) try: fd = open(fname, 'wb') yield (write, fname) finally: if fd: fd.close()
def from_bundle(cls, bundle, buf): ret = cls() offset = buf.tell() if not bundle.compressed: ret.name = buf.read_string() header_size = buf.read_uint() size = buf.read_uint() else: header_size = bundle.asset_header_size # FIXME: this offset needs to be explored more ofs = buf.tell() if bundle.compressed: dec = lzma.LZMADecompressor() data = dec.decompress(buf.read()) data = BytesIO(data[header_size:]) else: if ret.is_resource: buf.seek(offset + header_size - 4 - len(ret.name)) else: buf.seek(offset + header_size - 4) data = BytesIO(buf.read()) ret.data = BinaryReader(data, endian=">") buf.seek(ofs) ret.bundle = bundle ret.environment = bundle.environment return ret
def unzip(data: bytes) -> bytes: if data[:2] == b"VZ": if data[-2:] != b"zv": raise RuntimeError(f"VZ: Invalid footer: {data[-2:]!r}") if data[2:3] != b"a": raise RuntimeError(f"VZ: Invalid version: {data[2:3]!r}") filters = (lzma._decode_filter_properties(lzma.FILTER_LZMA1, data[7:12]), ) # type: ignore decompressor = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=filters) checksum, decompressed_size = struct.unpack("<II", data[-10:-2]) data = decompressor.decompress(data[12:-9], max_length=decompressed_size) if crc32(data) != checksum: raise RuntimeError( "VZ: CRC32 checksum doesn't match for decompressed data") else: try: with ZipFile(BytesIO(data)) as zf: data = zf.read(zf.filelist[0]) except BadZipFile: pass return data
def decode_file(path): basename = os.path.altsep.join(os.path.splitext(path)[0].split(os.path.altsep)[1:]) decodedname = f'csv/decoded/{basename}.csv' with open(path, 'rb') as f: data = f.read() tempdata = bytearray() for i in range(0, 8): tempdata.append(data[i]) for i in range(0, 4): tempdata.append(0) for i in range(8, len(data)): tempdata.append(data[i]) try: with open(decodedname, 'wb') as f: decompressor = lzma.LZMADecompressor() unpack_data = decompressor.decompress(tempdata) f.write(unpack_data) except: print("invalid input:", path)
def unpack(self): unpacked_files = [] if self.data.major_version == 2: for partition in self.manifest_data.partitions: out_labels = [] file_path = partition.partition_name outfile_rel = self.rel_unpack_dir / file_path outfile_full = self.scan_environment.unpack_path(outfile_rel) os.makedirs(outfile_full.parent, exist_ok=True) outfile = open(outfile_full, 'wb') for operation in partition.operations: self.infile.seek(self.start_of_payload + operation.data_offset) data = self.infile.read(operation.data_length) if operation.type == update_metadata_pb2.InstallOperation.Type.REPLACE: outfile.write(data) elif operation.type == update_metadata_pb2.InstallOperation.Type.REPLACE_BZ: decompressor = bz2.BZ2Decompressor() outfile.write(decompressor.decompress(data)) elif operation.type == update_metadata_pb2.InstallOperation.Type.REPLACE_XZ: decompressor = lzma.LZMADecompressor() outfile.write(decompressor.decompress(data)) pass else: pass outfile.close() fr = FileResult(self.fileresult, self.rel_unpack_dir / file_path, set(out_labels)) unpacked_files.append(fr) return unpacked_files
def parse_bi5(bi5, last_date, point=5): lzma_ = lzma.LZMADecompressor() i, o, h, l, c, v = [], [], [], [], [], [] s = struct.Struct('>L') content = lzma_.decompress(bi5) size = len(content) idx = 0 while idx < size: time_delta = s.unpack(content[idx:idx + 4])[0] price_open = s.unpack(content[idx + 4:idx + 8])[0] / 10**point price_high = s.unpack(content[idx + 8:idx + 12])[0] / 10**point price_low = s.unpack(content[idx + 12:idx + 16])[0] / 10**point price_close = s.unpack(content[idx + 16:idx + 20])[0] / 10**point volume = s.unpack(content[idx + 20:idx + 24])[0] tick = last_date + timedelta(seconds=time_delta) i.append(tick) o.append(price_open) h.append(price_high) l.append(price_low) c.append(price_close) v.append(volume) idx += 24 return pd.DataFrame(data={ 'open': o, 'high': h, 'low': l, 'close': c, 'volume': v }, index=i)
def _decompress(self, chunk_size=32768): if self.compression == "lzma": # create a bytes stream to store the uncompressed cluster data self.buffer = io.BytesIO() decompressor = lzma.LZMADecompressor() # prepare the decompressor # move the file pointer to the start of the blobs as long as we # don't reach the end of the stream. self.file.seek(self.offset + 1) while not decompressor.eof: chunk = self.file.read(chunk_size) # read in a chunk data = decompressor.decompress(chunk) # decompress the chunk self.buffer.write(data) # and store it in the buffer area elif self.compression == "zstd": # create a bytes stream to store the uncompressed cluster data self.buffer = io.BytesIO() decompressor = zstandard.ZstdDecompressor().decompressobj( ) # prepare the decompressor # move the file pointer to the start of the blobs as long as we # don't reach the end of the stream. self.file.seek(self.offset + 1) while True: chunk = self.file.read(chunk_size) # read in a chunk try: data = decompressor.decompress( chunk) # decompress the chunk self.buffer.write(data) # and store it in the buffer area except zstandard.ZstdError as e: break
def update(self, desc, off, size, workdir): fd = binwalk.core.common.BlockFile(self.binfile) fd.seek(off) data = fd.read() temp_dir = make_tempdir("_tmpx") decomp = lzma.LZMADecompressor() stride = 0x10 with open(os.path.join(temp_dir, "tmp"), 'wb') as fd: for i in range(0, len(data), stride): try: unpacked = decomp.decompress( binwalk.core.compat.str2bytes(data[i:i + stride])) fd.write(unpacked) except lzma.LZMAError as e: break except EOFError as e: break self.index += 1 Extractor(os.path.join(temp_dir, "tmp"), toplevel=temp_dir, recursion_level=self.level + 1).extract(workdir, extra_file_dir=False) self.workspace_cleanup(workdir) if not DEBUG: shutil.rmtree(temp_dir)
def do_fetch(self, statefile, logger): # Get and parse repomd.xml repomd_url = self.url + 'repodata/repomd.xml' logger.Log('fetching metadata from ' + repomd_url) repomd_content = fetch(repomd_url, check_status=True).text repomd_xml = xml.etree.ElementTree.fromstring(repomd_content) repodata_url = self.url + repomd_xml.find( '{http://linux.duke.edu/metadata/repo}data[@type="primary"]/{http://linux.duke.edu/metadata/repo}location' ).attrib['href'] logger.Log('fetching ' + repodata_url) data = fetch(repodata_url).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) if repodata_url.endswith('gz'): logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) elif repodata_url.endswith('xz'): logger.GetIndented().Log('decompressing with xz') data = lzma.LZMADecompressor().decompress(data) logger.GetIndented().Log( 'size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') statefile.write(data)
def _create_decompressor(self, alg): if alg == "snappy": return snappy.StreamDecompressor() elif alg == "lzma": return lzma.LZMADecompressor() raise InvalidConfigurationError( "invalid compression algorithm: {!r}".format(alg))
def test_decompression_stream_two(self): # test decompression in two steps decompress = lzma.LZMADecompressor() data = decompress.decompress(self.compressed_stream_xz[:10]) data += decompress.decompress(self.compressed_stream_xz[10:]) data += decompress.flush() self.assertEqual(data, self.plain)
def _decompressor_stream(self): # pylint: disable=too-many-branches dwnld_file = None compression = False if 'images' in self.parameters and self.key in self.parameters[ 'images']: compression = self.parameters['images'][self.key].get( 'compression', False) else: if self.key == 'ramdisk': self.logger.debug( "Not decompressing ramdisk as can be used compressed.") else: compression = self.parameters[self.key].get( 'compression', False) fname, _ = self._url_to_fname_suffix(self.path, compression) if os.path.isdir(fname): raise JobError("Download '%s' is a directory, not a file" % fname) if os.path.exists(fname): os.remove(fname) decompressor = None if compression: if compression == 'gz': decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif compression == 'bz2': decompressor = bz2.BZ2Decompressor() elif compression == 'xz': decompressor = lzma.LZMADecompressor() # pylint: disable=no-member self.logger.debug("Using %s decompression" % compression) else: self.logger.debug("No compression specified.") def write(buff): if decompressor: try: buff = decompressor.decompress(buff) except EOFError as eof_exc: # EOFError can be raised when decompressing a bz2 archive # generated by pbzip2. If there is something in unsused_data # try to continue decompression. if compression == 'bz2' and decompressor.unused_data: buff = decompressor.unused_data else: error_message = str(eof_exc) self.logger.exception(error_message) raise JobError(error_message) except (IOError, lzma.error, zlib.error) as exc: # pylint: disable=no-member error_message = str(exc) self.logger.exception(error_message) raise JobError(error_message) dwnld_file.write(buff) try: with open(fname, 'wb') as dwnld_file: yield (write, fname) except (IOError, OSError) as exc: msg = "Unable to open %s: %s" % (fname, exc.strerror) self.logger.error(msg) raise InfrastructureError(msg)
def open_packages_url(url): """Open a Packages.bz2 file pointed to by a URL""" socket = None for ext in ['.xz', '.bz2', '.gz', '']: try: socket = urllib2.urlopen(url + ext) except urllib2.HTTPError as httperror: pass else: break if socket is None: raise httperror url = socket.geturl() if ext == '.bz2': decompressor = bz2.BZ2Decompressor() decompressed = DecompressedStream(socket, decompressor) elif ext == '.gz': decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) decompressed = DecompressedStream(socket, decompressor) elif ext == '.xz': decompressor = lzma.LZMADecompressor() decompressed = DecompressedStream(socket, decompressor) elif ext == '': decompressed = socket else: raise ext return (url, decompressed)
def lzma_decompress(buf): decomp = lzma.LZMADecompressor(memlimit=0x10000000) try: return decomp.decompress(buf) except lzma.LZMAError: pass return b''
def benchmark_lzma_decompressor(): print("== Benchmark LZMADecompressor ==") size = 0 t0 = time.time() with open(sys.argv[1], 'rb') as file: t1 = time.time() while True: data = file.read(32*1024*1024) compressed = lzma.compress(data) decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_XZ) data = decompressor.decompress(compressed) readSize = len(data) if readSize == 0: break size += readSize if time.time() - t1 > 5: t1 = time.time() print(f"{t1 - t0:.2f}s {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024} MiB RSS") t1 = time.time() print(f"After closing file: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024} MiB RSS") gc.collect() print(f"After garbage collection: {resource.getrusage(resource.RUSAGE_SELF).ru_maxrss // 1024} MiB RSS") print(f"Reading {size} B took: {t1-t0}s")
def process(self, data): keywords = {} mode, filters = self._get_lz_mode_and_filters(False) if self.args.raw: keywords['filters'] = filters lz = lzma_.LZMADecompressor(mode, **keywords) return lz.decompress(data)
def test_simple_compress_and_decompress(): filters = [ { "id": lzma.FILTER_LZMA2, "preset": 7 | lzma.PRESET_DEFAULT }, ] lzc = py7zr.compressor.SevenZipCompressor(filters=filters) outbuf = io.BytesIO() _, _, _ = lzc.compress(io.BytesIO(b"Some data\n"), outbuf) _, _, _ = lzc.compress(io.BytesIO(b"Another piece of data\n"), outbuf) _, _, _ = lzc.compress(io.BytesIO(b"Even more data\n"), outbuf) _ = lzc.flush(outbuf) result = outbuf.getvalue() size = len(result) # filters = lzc.filters decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) out5 = decompressor.decompress(result) assert out5 == b"Some data\nAnother piece of data\nEven more data\n" # coders = lzc.coders crc = py7zr.helpers.calculate_crc32(result) decompressor = py7zr.compressor.SevenZipDecompressor( coders, size, [len(out5)], crc) outbuf.seek(0, 0) out6 = decompressor.decompress(outbuf) assert out6 == b"Some data\nAnother piece of data\nEven more data\n"
def __init__(self, coders: List[Dict[str, Any]], size: int, crc: Optional[int]) -> None: self.input_size = size self.consumed = 0 # type: int self.crc = crc self.digest = None # type: Optional[int] filters = [] # type: List[Dict[str, Any]] try: for coder in coders: if coder['numinstreams'] != 1 or coder['numoutstreams'] != 1: raise UnsupportedCompressionMethodError('Only a simple compression method is currently supported.') filter = self.lzma_methods_map.get(coder['method'], None) if filter is not None: properties = coder.get('properties', None) if properties is not None: filters[:0] = [lzma._decode_filter_properties(filter, properties)] # type: ignore else: filters[:0] = [{'id': filter}] else: raise UnsupportedCompressionMethodError except UnsupportedCompressionMethodError as e: filter = self.alt_methods_map.get(coders[0]['method'], None) if len(coders) == 1 and filter is not None: if filter == self.FILTER_BZIP2: self.decompressor = bz2.BZ2Decompressor() # type: Union[bz2.BZ2Decompressor, lzma.LZMADecompressor] else: raise e self.can_partial_decompress = False else: raise e else: self.decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters) self.can_partial_decompress = True self.filters = filters
def _decompressor_stream(self): fd = None decompressor = None decompress = True # FIXME: get from job.parameters fname, suffix = self._url_to_fname_suffix( ) # FIXME: use the context tmpdir if suffix == 'gz' and decompress: decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) elif suffix == 'bz2' and decompress: decompressor = bz2.BZ2Decompressor() elif suffix == 'xz' and decompress: decompressor = lzma.LZMADecompressor() else: # don't remove the file's real suffix fname = '%s.%s' % (fname, suffix) def write(buff): if decompressor: buff = decompressor.decompress(buff) fd.write(buff) try: fd = open(fname, 'wb') yield (write, fname) finally: if fd: fd.close()
def Fetch(self, statepath, update=True, logger=NoopLogger()): tmppath = statepath + '.tmp' if os.path.isfile(statepath) and not update: logger.Log('no update requested, skipping') return with open(tmppath, 'wb') as statefile: logger.Log('fetching ' + self.url) data = Get(self.url).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) if self.compression == 'gz': logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) elif self.compression == 'bz2': logger.GetIndented().Log('decompressing with bz2') data = bz2.decompress(data) elif self.compression == 'xz': logger.GetIndented().Log('decompressing with xz') data = lzma.LZMADecompressor().decompress(data) if self.compression: logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') statefile.write(data) os.replace(tmppath, statepath)
def decompress_lump(reader: ByteIO) -> ByteIO: magic = reader.read_fourcc() assert magic == 'LZMA', f'Invalid LZMA compressed header: {magic}' decompressed_size = reader.read_uint32() compressed_size = reader.read_uint32() filter_properties = lzma._decode_filter_properties( lzma.FILTER_LZMA1, reader.read(5)) compressed_buffer = reader.read(compressed_size) decompressed_buffer = bytearray() while True: decompressor = lzma.LZMADecompressor(lzma.FORMAT_RAW, filters=(filter_properties, )) try: result = decompressor.decompress(compressed_buffer) except lzma.LZMAError: if not decompressed_buffer: raise # Error on the first iteration; bail out. break # Leftover data is not a valid LZMA/XZ stream; ignore it. decompressed_buffer.extend(result) compressed_buffer = decompressor.unused_data if not compressed_buffer: break assert decompressor.eof, 'Compressed data ended before the end-of-stream marker was reached' decompressed_buffer = decompressed_buffer[:decompressed_size] assert decompressed_size == len( decompressed_buffer ), 'Decompressed data does not match the expected size' return ByteIO(decompressed_buffer)
def do_fetch(self, statefile, logger): fetching_what = [self.url] if isinstance(self.post, dict): fetching_what.append('{} fields of form data'.format(len(self.post))) elif self.post: fetching_what.append('{} bytes of post data'.format(len(self.post))) if self.headers: fetching_what.append('{} extra headers'.format(len(self.headers))) logger.Log('fetching ' + ', with '.join(fetching_what)) data = do_http(self.url, data=self.post, headers=self.headers, timeout=self.fetch_timeout).content logger.GetIndented().Log('size is {} byte(s)'.format(len(data))) if self.compression == 'gz': logger.GetIndented().Log('decompressing with gzip') data = gzip.decompress(data) elif self.compression == 'bz2': logger.GetIndented().Log('decompressing with bz2') data = bz2.decompress(data) elif self.compression == 'xz': logger.GetIndented().Log('decompressing with xz') data = lzma.LZMADecompressor().decompress(data) if self.compression: logger.GetIndented().Log('size after decompression is {} byte(s)'.format(len(data))) logger.GetIndented().Log('saving') statefile.write(data)
def from_bundle(cls, bundle, buf): ret = cls() ret.bundle = bundle ret.environment = bundle.environment offset = buf.tell() ret._buf = BinaryReader(buf, endian=">") if bundle.is_unityfs: ret._buf_ofs = buf.tell() return ret if not bundle.compressed: ret.name = buf.read_string() header_size = buf.read_uint() buf.read_uint() # size else: header_size = bundle.asset_header_size # FIXME: this offset needs to be explored more ofs = buf.tell() if bundle.compressed: dec = lzma.LZMADecompressor() data = dec.decompress(buf.read()) ret._buf = BinaryReader(BytesIO(data[header_size:]), endian=">") ret._buf_ofs = 0 buf.seek(ofs) else: ret._buf_ofs = offset + header_size - 4 if ret.is_resource: ret._buf_ofs -= len(ret.name) return ret
def unprocess(self, data): super(DeenPluginLzma, self).unprocess(data) if not lzma: self.log.error('lzma module not found') return data results = [] while True: decomp = lzma.LZMADecompressor(lzma.FORMAT_AUTO, None, None) try: res = decomp.decompress(data) except lzma.LZMAError as e: self.log.error(e) self.log.debug(e, exc_info=True) if results: break else: self.error = e return results.append(res) data = decomp.unused_data if not data: break if not decomp.eof: ex = lzma.LZMAError('Compressed data ended before the end-of-stream marker was reached') self.error = ex self.log.error(self.error) self.log.debug(self.error, exc_info=True) data = b''.join(results) return data