def _compress(self, data, action=m.LZMA_RUN): # TODO use realloc like in LZMADecompressor BUFSIZ = 8192 lzs = self.lzs lzs.next_in = input_ = ffi.new('uint8_t[]', to_bytes(data)) lzs.avail_in = input_len = len(data) outs = [ffi.new('uint8_t[]', BUFSIZ)] lzs.next_out, = outs lzs.avail_out = BUFSIZ siz = BUFSIZ while True: next_out_pos = int(ffi.cast('intptr_t', lzs.next_out)) ret = catch_lzma_error(m.lzma_code, lzs, action, ignore_buf_error=(input_len==0 and lzs.avail_out > 0)) data_size = int(ffi.cast('intptr_t', lzs.next_out)) - next_out_pos if (action == m.LZMA_RUN and lzs.avail_in == 0) or \ (action == m.LZMA_FINISH and ret == m.LZMA_STREAM_END): break elif lzs.avail_out == 0: # ran out of space in the output buffer #siz = (BUFSIZ << 1) + 6 siz = 512 outs.append(ffi.new('uint8_t[]', siz)) lzs.next_out = outs[-1] lzs.avail_out = siz last_out = outs.pop() last_out_len = siz - lzs.avail_out last_out_piece = ffi.buffer(last_out[0:last_out_len], last_out_len)[:] return b''.join(ffi.buffer(nn)[:] for nn in outs) + last_out_piece
def __init__(self, format=FORMAT_AUTO, memlimit=None, filters=None, header=None, check=None, unpadded_size=None): decoder_flags = m.LZMA_TELL_ANY_CHECK | m.LZMA_TELL_NO_CHECK if memlimit is not None: if format == FORMAT_RAW: raise ValueError("Cannot specify memory limit with FORMAT_RAW") else: memlimit = m.UINT64_MAX if format == FORMAT_RAW and filters is None: raise ValueError("Must specify filters for FORMAT_RAW") elif format != FORMAT_RAW and filters is not None: raise ValueError("Cannot specify filters except with FORMAT_RAW") if format == FORMAT_BLOCK and (header is None or unpadded_size is None or check is None): raise ValueError("Must specify header, unpadded_size and check " "with FORMAT_BLOCK") elif format != FORMAT_BLOCK and (header is not None or unpadded_size is not None or check is not None): raise ValueError("Cannot specify header, unpadded_size or check " "except with FORMAT_BLOCK") format = _parse_format(format) self.lock = threading.Lock() self.check = CHECK_UNKNOWN self.unused_data = b'' self.eof = False self.lzs = _new_lzma_stream() self._bufsiz = max(8192, io.DEFAULT_BUFFER_SIZE) self.needs_input = True self._input_buffer = ffi.NULL self._input_buffer_size = 0 if format == FORMAT_AUTO: catch_lzma_error(m.lzma_auto_decoder, self.lzs, memlimit, decoder_flags) elif format == FORMAT_XZ: catch_lzma_error(m.lzma_stream_decoder, self.lzs, memlimit, decoder_flags) elif format == FORMAT_ALONE: self.check = CHECK_NONE catch_lzma_error(m.lzma_alone_decoder, self.lzs, memlimit) elif format == FORMAT_RAW: self.check = CHECK_NONE filters = parse_filter_chain_spec(filters) catch_lzma_error(m.lzma_raw_decoder, self.lzs, filters) elif format == FORMAT_BLOCK: self.__block = block = ffi.new('lzma_block*') block.version = 0 block.check = check block.header_size = len(header) block.filters = self.__filters = ffi.new('lzma_filter[]', m.LZMA_FILTERS_MAX+1) header_b = ffi.new('char[]', to_bytes(header)) catch_lzma_error(m.lzma_block_header_decode, block, self.lzs.allocator, header_b) if unpadded_size is not None: catch_lzma_error(m.lzma_block_compressed_size, block, unpadded_size) self.expected_size = block.compressed_size catch_lzma_error(m.lzma_block_decoder, self.lzs, block) else: raise ValueError("invalid container format: %s" % format)
def decode_index(s, stream_padding=0): indexp = ffi.new('lzma_index**') memlimit = ffi.new('uint64_t*') memlimit[0] = m.UINT64_MAX allocator = ffi.NULL in_buf = ffi.new('char[]', to_bytes(s)) in_pos = ffi.new('size_t*') in_pos[0] = 0 catch_lzma_error(m.lzma_index_buffer_decode, indexp, memlimit, allocator, in_buf, in_pos, len(s)) return Index(indexp[0], allocator, stream_padding)
def _encode_filter_properties(filterspec): """_encode_filter_properties(filter) -> bytes Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict). The result does not include the filter ID itself, only the options.""" filter = parse_filter_spec(filterspec) size = ffi.new("uint32_t*") catch_lzma_error(m.lzma_properties_size, size, filter) result = ffi.new('uint8_t[]', size[0]) catch_lzma_error(m.lzma_properties_encode, filter, result) return ffi.buffer(result)[:]
def find(self, offset): iterator = ffi.new('lzma_index_iter*') m.lzma_index_iter_init(iterator, self.i) if m.lzma_index_iter_locate(iterator, offset): # offset too high return None return (IndexStreamData(iterator.stream), IndexBlockData(iterator.block))
def parse_filter_spec(spec): if not isinstance(spec, collections.Mapping): raise TypeError("Filter specifier must be a dict or dict-like object") ret = ffi.new('lzma_filter*') try: ret.id = spec['id'] except KeyError: raise ValueError("Filter specifier must have an \"id\" entry") if ret.id in (m.LZMA_FILTER_LZMA1, m.LZMA_FILTER_LZMA2): try: options = parse_filter_spec_lzma(**spec) except TypeError: raise ValueError("Invalid filter specifier for LZMA filter") elif ret.id == m.LZMA_FILTER_DELTA: try: options = parse_filter_spec_delta(**spec) except TypeError: raise ValueError("Invalid filter specifier for delta filter") elif ret.id in BCJ_FILTERS: try: options = parse_filter_spec_bcj(**spec) except TypeError: raise ValueError("Invalid filter specifier for BCJ filter") else: raise ValueError("Invalid %d" % (ret.id, )) ret.options = options _owns[ret] = options return ret
def __init__(self): self.owns = {} self.lzma_allocator = ffi.new('lzma_allocator*') alloc = self.owns['a'] = ffi.callback("void*(void*, size_t, size_t)", self.__alloc) free = self.owns['b'] = ffi.callback("void(void*, void*)", self.__free) self.lzma_allocator.alloc = alloc self.lzma_allocator.free = free self.lzma_allocator.opaque = ffi.NULL
def decompress(self, data, max_length=-1): """ decompress(data, max_length=-1) -> bytes Provide data to the decompressor object. Returns a chunk of decompressed data if possible, or b"" otherwise. Attempting to decompress data after the end of the stream is reached raises an EOFError. Any data found after the end of the stream is ignored, and saved in the unused_data attribute. """ if not isinstance(max_length, int): raise TypeError( "max_length parameter object cannot be interpreted as an integer" ) with self.lock: if self.eof: raise EOFError("Already at end of stream") lzs = self.lzs data = to_bytes(data) buf = ffi.new('uint8_t[]', data) buf_size = len(data) if lzs.next_in: buf, buf_size = self.pre_decompress_left_data(buf, buf_size) used__input_buffer = True else: lzs.avail_in = buf_size lzs.next_in = ffi.cast("uint8_t*", buf) used__input_buffer = False # actual decompression result = self._decompress(buf, buf_size, max_length) if self.eof: self.needs_input = False if lzs.avail_in > 0: self.unused_data = ffi.buffer(lzs.next_in, lzs.avail_in)[:] self.clear_input_buffer() elif lzs.avail_in == 0: # completed successfully! lzs.next_in = ffi.NULL if lzs.avail_out == 0: # (avail_in==0 && avail_out==0) # Maybe lzs's internal state still have a few bytes can # be output, try to output them next time. self.needs_input = False assert max_length >= 0 # if < 0, lzs.avail_out always > 0 else: # Input buffer exhausted, output buffer has space. self.needs_input = True self.clear_input_buffer() else: self.needs_input = False if not used__input_buffer: self.post_decompress_avail_data() return result
def parse_filter_spec_lzma(id, preset=m.LZMA_PRESET_DEFAULT, **kwargs): ret = ffi.new('lzma_options_lzma*') if m.lzma_lzma_preset(ret, preset): raise LZMAError("Invalid compression preset: %s" % preset) for arg, val in kwargs.items(): if arg in ('dict_size', 'lc', 'lp', 'pb', 'nice_len', 'depth'): setattr(ret, arg, val) elif arg in ('mf', 'mode'): setattr(ret, arg, int(val)) else: raise ValueError("Invalid filter specifier for LZMA filter") return ret
def _decode_filter_properties(filter_id, encoded_props): """_decode_filter_properties(filter_id, encoded_props) -> dict Return a dict describing a filter with ID *filter_id*, and options (properties) decoded from the bytes object *encoded_props*.""" filter = ffi.new('lzma_filter*') filter.id = filter_id catch_lzma_error(m.lzma_properties_decode, filter, ffi.NULL, encoded_props, len(encoded_props)) try: return build_filter_spec(filter) finally: # TODO do we need this, the only use of m.free? m.free(filter.options)
def parse_filter_chain_spec(filterspecs): if len(filterspecs) > m.LZMA_FILTERS_MAX: raise ValueError( "Too many filters - liblzma supports a maximum of %s" % m.LZMA_FILTERS_MAX) filters = ffi.new('lzma_filter[]', m.LZMA_FILTERS_MAX + 1) _owns[filters] = children = [] for i in range(m.LZMA_FILTERS_MAX + 1): try: filterspec = filterspecs[i] except KeyError: raise TypeError except IndexError: filters[i].id = m.LZMA_VLI_UNKNOWN else: filter = parse_filter_spec(filterspecs[i]) children.append(filter) filters[i].id = filter.id filters[i].options = filter.options return filters
def __init__(self, format=FORMAT_XZ, check=-1, preset=None, filters=None): if format != FORMAT_XZ and check not in (-1, m.LZMA_CHECK_NONE): raise ValueError("Integrity checks are only supported by FORMAT_XZ") if preset is not None and filters is not None: raise ValueError("Cannot specify both preset and filter chain") if preset is None: preset = m.LZMA_PRESET_DEFAULT format = _parse_format(format) self.lock = threading.Lock() self.flushed = 0 self.lzs = _new_lzma_stream() __pypy__.add_memory_pressure(COMPRESSION_STREAM_SIZE) if format == FORMAT_XZ: if filters is None: if check == -1: check = m.LZMA_CHECK_CRC64 catch_lzma_error(m.lzma_easy_encoder, self.lzs, preset, check) else: filters = parse_filter_chain_spec(filters) catch_lzma_error(m.lzma_stream_encoder, self.lzs, filters, check) elif format == FORMAT_ALONE: if filters is None: options = ffi.new('lzma_options_lzma*') if m.lzma_lzma_preset(options, preset): raise LZMAError("Invalid compression preset: %s" % preset) catch_lzma_error(m.lzma_alone_encoder, self.lzs, options) else: raise NotImplementedError elif format == FORMAT_RAW: if filters is None: raise ValueError("Must specify filters for FORMAT_RAW") filters = parse_filter_chain_spec(filters) catch_lzma_error(m.lzma_raw_encoder, self.lzs, filters) else: raise ValueError("invalid container format: %s" % format)
def __alloc(self, _opaque, _nmemb, size): new_mem = ffi.new('char[]', size) self.owns[self._addr(new_mem)] = new_mem return new_mem
def copy(self): other_i = ffi.new('lzma_stream_flags*', self.i) return StreamFlags(other_i)
def _new_lzma_stream(): ret = ffi.new('lzma_stream*') m._pylzma_stream_init(ret) return ffi.gc(ret, m.lzma_end)
def iterator(self, type=m.LZMA_INDEX_ITER_BLOCK): iterator = ffi.new('lzma_index_iter*') m.lzma_index_iter_init(iterator, self.i) while not m.lzma_index_iter_next(iterator, type): yield (IndexStreamData(iterator.stream), IndexBlockData(iterator.block))
def _decode_stream_header_or_footer(decode_f, in_bytes): footer_o = ffi.new('char[]', to_bytes(in_bytes)) stream_flags = ffi.new('lzma_stream_flags*') catch_lzma_error(decode_f, stream_flags, footer_o) return StreamFlags(stream_flags)
def parse_filter_spec_bcj(id, start_offset=0): ret = ffi.new('lzma_options_bcj*') ret.start_offset = start_offset return ret
def parse_filter_spec_delta(id, dist=1): ret = ffi.new('lzma_options_delta*') ret.type = m.LZMA_DELTA_TYPE_BYTE ret.dist = dist return ret