def __init__(self, urlstate_data=None, encryption_key=None): """ If urlstate is empty a new empty state instance will be returned. If urlstate is not empty the constructor will rebuild the state attribute objects from the urlstate string. :type urlstate_data: str :type encryption_key: str :rtype: State :param encryption_key: The key to be used for encryption. :param urlstate_data: A string created by the method urlstate in this class. :return: An instance of this class. """ self._state_dict = {} self._delete = False if urlstate_data is not None: urlstate_data = urlstate_data.encode("utf-8") urlstate_data = base64.urlsafe_b64decode(urlstate_data) lzma = LZMADecompressor() urlstate_data = lzma.decompress(urlstate_data) urlstate_data = AESCipher(encryption_key).decrypt(urlstate_data) lzma = LZMADecompressor() urlstate_data = lzma.decompress(urlstate_data) urlstate_data = urlstate_data.decode("UTF-8") self._state_dict = json.loads(urlstate_data)
def __init__(self, urlstate_data=None, encryption_key=None): """ If urlstate is empty a new empty state instance will be returned. If urlstate is not empty the constructor will rebuild the state attribute objects from the urlstate string. :type urlstate_data: str :type encryption_key: str :rtype: State :param encryption_key: The key to be used for encryption. :param urlstate_data: A string created by the method urlstate in this class. :return: An instance of this class. """ self.delete = False if urlstate_data and not encryption_key: raise ValueError( "If an 'urlstate_data' is supplied 'encrypt_key' must be specified." ) if urlstate_data: urlstate_data = urlstate_data.encode("utf-8") urlstate_data = base64.urlsafe_b64decode(urlstate_data) lzma = LZMADecompressor() urlstate_data = lzma.decompress(urlstate_data) urlstate_data = _AESCipher(encryption_key).decrypt(urlstate_data) lzma = LZMADecompressor() urlstate_data = lzma.decompress(urlstate_data) urlstate_data = urlstate_data.decode("UTF-8") urlstate_data = json.loads(urlstate_data) super().__init__(urlstate_data or {})
def try_decompress_at(input_file: bytes, offset: int) -> bytes: decoded = None try: if Signature.check(input_file, offset, Signature.Compressed_GZIP): decoded = SingleGzipReader(BytesIO(input_file[offset:])).read( -1 ) # Will stop reading after the GZip footer thanks to our modification above. elif (Signature.check(input_file, offset, Signature.Compressed_XZ) or Signature.check(input_file, offset, Signature.Compressed_LZMA)): try: decoded = LZMADecompressor().decompress( input_file[offset:] ) # Will discard the extra bytes and put it an attribute. except Exception: decoded = LZMADecompressor().decompress( input_file[offset:offset + 5] + b'\xff' * 8 + input_file[offset + 5:]) # pylzma format compatibility elif Signature.check(input_file, offset, Signature.Compressed_BZ2): decoded = BZ2Decompressor().decompress( input_file[offset:] ) # Will discard the extra bytes and put it an attribute. elif Signature.check(input_file, offset, Signature.Compressed_LZ4): try: LZ4Decompressor = importlib.import_module('lz4.frame') except ModuleNotFoundError: logging.error('ERROR: This kernel requres LZ4 decompression.') logging.error( ' But "lz4" python package does not found.') logging.error( ' Example installation command: "sudo pip3 install lz4"' ) logging.error() return context = LZ4Decompressor.create_decompression_context() decoded, bytes_read, end_of_frame = LZ4Decompressor.decompress_chunk( context, input_file[offset:]) except Exception: pass if decoded and len(decoded) > 0x1000: logging.info(( '[+] Kernel successfully decompressed in-memory (the offsets that ' + 'follow will be given relative to the decompressed binary)')) return decoded
def __init__(self, urlstate_data=None, encryption_key=None): """ If urlstate is empty a new empty state instance will be returned. If urlstate is not empty the constructor will rebuild the state attribute objects from the urlstate string. :type urlstate_data: str :type encryption_key: str :rtype: State :param encryption_key: The key to be used for encryption. :param urlstate_data: A string created by the method urlstate in this class. :return: An instance of this class. """ self.delete = False urlstate_data = {} if urlstate_data is None else urlstate_data if urlstate_data and not encryption_key: raise ValueError("If an 'urlstate_data' is supplied 'encrypt_key' must be specified.") if urlstate_data: try: urlstate_data_bytes = urlstate_data.encode("utf-8") urlstate_data_b64decoded = base64.urlsafe_b64decode(urlstate_data_bytes) lzma = LZMADecompressor() urlstate_data_decompressed = lzma.decompress(urlstate_data_b64decoded) urlstate_data_decrypted = _AESCipher(encryption_key).decrypt( urlstate_data_decompressed ) lzma = LZMADecompressor() urlstate_data_decrypted_decompressed = lzma.decompress(urlstate_data_decrypted) urlstate_data_obj = json.loads(urlstate_data_decrypted_decompressed) except Exception as e: error_context = { "message": "Failed to load state data. Reinitializing empty state.", "reason": str(e), "urlstate_data": urlstate_data, } logger.warning(error_context) urlstate_data = {} else: urlstate_data = urlstate_data_obj session_id = ( urlstate_data[_SESSION_ID_KEY] if urlstate_data and _SESSION_ID_KEY in urlstate_data else uuid4().urn ) urlstate_data[_SESSION_ID_KEY] = session_id super().__init__(urlstate_data)
def __init__(self, input_queue, output_path, time_queue, temp_overlay_file=None): self.input_queue = input_queue self.time_queue = time_queue self.output_path = output_path self.decompressor = LZMADecompressor() self.temp_overlay_file = temp_overlay_file Process.__init__(self, target=self.decompress_blobs)
def download(url, fi): d = LZMADecompressor() with get(url) as r: with open(fi + ".tmp", 'wb') as f: for chunk in r.iter_content(128): if not d.eof: f.write(d.decompress(chunk)) rename(fi + ".tmp", fi)
def decompress( in_path, out_path ): with open( in_path, 'rb' ) as in_file, open( out_path, 'wb' ) as out_file: lzmad = LZMADecompressor( FORMAT_ALONE ) in_file.seek( 4, SEEK_SET ) data = in_file.read( 1024 ) while len( data ): out_file.write( lzmad.decompress( data ) ) data = in_file.read( 1024 )
async def download(self, index): await self.update() if index not in self.block_files: raise FileNotFoundError block_file = self.block_files[index] if block_file.partial or not block_file.complete: raise FileNotFoundError loop = asyncio.get_running_loop() lz = LZMADecompressor() async for chunk in self._session.download(block_file.siapath): yield await loop.run_in_executor(None, lz.decompress, chunk)
def data(self): if self.compression == 4: if self._data is None: decompressor = LZMADecompressor(format=FORMAT_XZ) offset = self.offset + 1 self._data = b"" while decompressor.needs_input: idata = self.buf[offset:offset + 1024] self._data += decompressor.decompress(idata) offset += 1024 return self._data, 0 else: return self.buf, self.offset + 1
def try_decompress_at(input_file: bytes, offset: int) -> bytes: decoded = None try: if input_file[offset:offset + 3] == b'\x1f\x8b\x08': # GZIP Signature decoded = SingleGzipReader(BytesIO(input_file[offset:])).read( -1 ) # Will stop reading after the GZip footer thanks to our modification above. elif input_file[offset:offset + 6] == b'\xfd7zXZ\x00' or input_file[ offset:offset + 3] == b']\x00\x00': # XZ/LZMA Signature try: decoded = LZMADecompressor().decompress( input_file[offset:] ) # Will discard the extra bytes and put it an attribute. except Exception: decoded = LZMADecompressor().decompress( input_file[offset:offset + 5] + b'\xff' * 8 + input_file[offset + 5:]) # pylzma format compatibility elif input_file[offset:offset + 3] == b'BZh': # BZ2 Signature decoded = BZ2Decompressor().decompress( input_file[offset:] ) # Will discard the extra bytes and put it an attribute. except Exception: pass if decoded and len(decoded) > 0x1000: print(( '[+] Kernel successfully decompressed in-memory (the offsets that ' + 'follow will be given relative to the decompressed binary)')) return decoded
def decomp_overlay(meta, output_path): meta_dict = msgpack.unpackb(open(meta, "r").read()) decomp_start_time = time() comp_overlay_files = meta_dict[Const.META_OVERLAY_FILES] comp_overlay_files = [item[Const.META_OVERLAY_FILE_NAME] for item in comp_overlay_files] comp_overlay_files = [os.path.join(os.path.dirname(meta), item) for item in comp_overlay_files] overlay_file = open(output_path, "w+b") for comp_file in comp_overlay_files: decompressor = LZMADecompressor() comp_data = open(comp_file, "r").read() decomp_data = decompressor.decompress(comp_data) decomp_data += decompressor.flush() overlay_file.write(decomp_data) LOG.debug("Overlay decomp time for %d files: %f at %s\n" % \ (len(comp_overlay_files), (time()-decomp_start_time), output_path)) overlay_file.close() return meta_dict
def decompress_lzma(data): results = [] len(data) while True: decomp = LZMADecompressor(FORMAT_AUTO, None, None) try: res = decomp.decompress(data) except LZMAError: if results: break # Leftover data is not a valid LZMA/XZ stream; ignore it. else: raise # Error on the first iteration; bail out. results.append(res) data = decomp.unused_data if not data: break if not decomp.eof: raise LZMAError("Compressed data ended before the end-of-stream marker was reached") return b"".join(results)
def decompress_lzma(data): results = [] len(data) while True: decomp = LZMADecompressor(FORMAT_AUTO, None, None) try: res = decomp.decompress(data) except LZMAError: if results: break else: raise results.append(res) data = decomp.unused_data if not data: break if not decomp.eof: raise LZMAError("Compressed data ended before the end-of-stream marker was reached") return b"".join(results)
def __init__(self, fpatch, compression): if compression == 'lzma': self._decompressor = LZMADecompressor() elif compression == 'bz2': self._decompressor = BZ2Decompressor() elif compression == 'crle': self._decompressor = CrleDecompressor(patch_data_length(fpatch)) elif compression == 'none': self._decompressor = NoneDecompressor(patch_data_length(fpatch)) elif compression == 'heatshrink': self._decompressor = HeatshrinkDecompressor(patch_data_length(fpatch)) elif compression == 'zstd': self._decompressor = ZstdDecompressor(patch_data_length(fpatch)) elif compression == 'lz4': self._decompressor = Lz4Decompressor() else: raise Error(format_bad_compression_string(compression)) self._fpatch = fpatch
def decompress_raw(self) -> bytes: f = open(self.path, 'rb') magic_dict = { b'\x1f\x8b\x08': 'gzip', b'\x42\x5a\x68': 'bz2', b'\xfd\x37\x7a\x58\x5a\x00': 'lzma', b'\x5d\x00\x00': 'lzma', b'\x04\x22\x4d\x18': 'lz4.frame', b'\x28\xb5\x2f\xfd': 'zstandard', b'\x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a': 'lzo', } maxlen = max(len(x) for x in magic_dict) header = f.read(maxlen) f.seek(0) for magic, comp in magic_dict.items(): if header.startswith(magic): try: mod = importlib.import_module(comp) except ModuleNotFoundError: raise MissingDecompressorError( f'The Python module {comp!r} that is required ' f'to decompress kernel file {self.path} ' f'is not installed.') if comp == 'zstandard': # Technically a redundant import, this is just # to make your IDE happy :) import zstandard reader = zstandard.ZstdDecompressor().stream_reader(f) decomp = b'' while True: chunk = reader.read(1024 * 1024) if not chunk: break decomp += chunk return decomp elif comp == 'lzma': # Using .decompress() causes an error because of # no end-of-stream marker return LZMADecompressor().decompress(f.read()) else: return getattr(mod, 'decompress')(f.read()) return f.read()
def __lzma_decompress(self, compressed: bytes) -> bytes: """Decompress LZMA data.""" # Log the printf-friendly hex representation of the bytes to decompress hex = compressed.hex() hex = "\\x" + "\\x".join([hex[i:i + 2] for i in range(0, len(hex), 2)]) log.debug("Attempting LZMA decompression of bytes: {}".format(hex)) properties, dictionary_size, uncompressed_size = struct.unpack( "<BIQ", compressed[:13]) if properties > (4 * 5 + 4) * 9 + 8: log.warning("There seems to be an issue in the LZMA header") position_bits = properties // (9 * 5) literal_position_bits = (properties - position_bits * 9 * 5) // 9 literal_context_bits = ( properties - position_bits * 9 * 5) - literal_position_bits * 9 log.debug("LZMA dictionary_size={}".format(dictionary_size)) log.debug("LZMA uncompressed_size={}".format(uncompressed_size)) log.debug("LZMA literal_context_bits={}".format(literal_context_bits)) log.debug( "LZMA literal_position_bits={}".format(literal_position_bits)) log.debug("LZMA position_bits={}".format(position_bits)) if literal_context_bits + literal_position_bits > 4: log.warning( "literal_context_bits + litereal_position_bits > 4 which may indicate LZMA header issues" ) # lzma-js as used by PXT has a bug where the EOF marker is written incorrectly # disable it. # See: https://github.com/LZMA-JS/LZMA-JS/issues/44 # See: https://github.com/LZMA-JS/LZMA-JS/issues/54 decompressor = LZMADecompressor(lzma.FORMAT_ALONE, None, None) # This is sort of shady and may cause artefacts further down the road - # it does not smartly remove the end marker, but works for some tested # project files (also see workaround in __extract_sources) # it may have to be changed (6-7 seems to work well) return decompressor.decompress(compressed[:-6])
def get_debug_symbols(filename): print('Processing file:', filename) with open(filename, 'rb') as f: elffile = ELFFile(f) section_name = '.gnu_debugdata' debugdata = elffile.get_section_by_name(section_name) if not isinstance(debugdata, Section): print(' The file has no %s section' % section_name) return print(' Found %s section' % section_name) debugdata_filename = filename + '-symbols.elf' with open(debugdata_filename, 'wb') as f: print(' Extracting...') decompressor = LZMADecompressor(FORMAT_XZ) data = decompressor.decompress(debugdata.data()) f.write(data) symbols = [] with open(debugdata_filename, 'rb') as f: elffile = ELFFile(f) symbol_tables = [ s for s in elffile.iter_sections() if isinstance(s, SymbolTableSection) ] if not symbol_tables and elffile.num_sections() == 0: print(' INFO: No debug symbols.') for section in symbol_tables: symbols += [(symbol['st_value'], symbol.name) for symbol in section.iter_symbols() if len(symbol.name) != 0] remove(debugdata_filename) return symbols
def _decompress_lzma(self, data: bytes) -> bytes: ''' Correctly decompress LZMA data files. NOTE: https://stackoverflow.com/a/37400585/2895581 :params data: Bytes representation of response data. :returns outs: Bytes representation of decompressed LXMA data. ''' results: list = [] while True: decomp = LZMADecompressor(FORMAT_AUTO, None, None) try: res = decomp.decompress(data) # If there is leftover data, then it is not valid LZMA format and # we should ignore it. # If we encounter an error on the first iteration, bail out. except LZMAError: if results: break else: raise results.append(res) data = decomp.unused_data if not data: break if not decomp.eof: raise LZMAError( f'Compressed data ended before the end-of-stream marker was reached' ) return b"".join(results)
def bdecompress(cb): if cb is None: return None com = LZMADecompressor() chunk = com.decompress(cb) return chunk
def try_decompress_at(input_file: bytes, offset: int) -> bytes: decoded = None try: if Signature.check( input_file, offset, Signature.DTB_Appended_Qualcomm ): # Merely unpack a Qualcomm kernel file containing a magic and DTB offset at the start (so that offsets aren't wrong) dtb_offset_le = int.from_bytes(input_file[offset + 16:offset + 20], 'little') dtb_offset_be = int.from_bytes(input_file[offset + 16:offset + 20], 'big') decoded = input_file[offset + 20:offset + 20 + min(dtb_offset_le, dtb_offset_be)] elif Signature.check( input_file, offset, Signature.Android_Bootimg ): # Unpack an uncompressed Android Bootimg file, version 0, 1, 2 or 3 # See, for reference: # - https://github.com/osm0sis/mkbootimg/blob/master/unpackbootimg.c # - https://github.com/osm0sis/mkbootimg/blob/master/bootimg.h assert len(input_file) > 4096 header_version_raw = input_file[offset + 10 * 4:offset + 11 * 4] endianness = 'little' if header_version_raw in (b'\0\0\0\3', b'\3\0\0\0'): page_size = 4096 if header_version_raw == b'\0\0\0\3': endianness = 'big' else: page_size_raw = input_file[offset + 9 * 4:offset + 10 * 4] page_size_le = int.from_bytes(page_size_raw, 'little') page_size_be = int.from_bytes(page_size_raw, 'big') if page_size_le < page_size_be: page_size = page_size_le else: endianness = 'big' page_size = page_size_be kernel_size = int.from_bytes( input_file[offset + 2 * 4:offset + 3 * 4], endianness) assert len(input_file) > kernel_size > 0x1000 assert len(input_file) > page_size > 0x200 decoded = input_file[offset + page_size:offset + page_size + kernel_size] # Also try to re-unpack the output image in the case where the nested # kernel would start with a "UNCOMPRESSED_IMG" Qualcomm magic, for example decoded = try_decompress_at(decoded, 0) or decoded elif Signature.check(input_file, offset, Signature.Compressed_GZIP): decoded = SingleGzipReader(BytesIO(input_file[offset:])).read( -1 ) # GZIP - Will stop reading after the GZip footer thanks to our modification above. elif (Signature.check(input_file, offset, Signature.Compressed_XZ) or Signature.check(input_file, offset, Signature.Compressed_LZMA)): try: decoded = LZMADecompressor().decompress( input_file[offset:] ) # LZMA - Will discard the extra bytes and put it an attribute. except Exception: decoded = LZMADecompressor().decompress( input_file[offset:offset + 5] + b'\xff' * 8 + input_file[offset + 5:]) # pylzma format compatibility elif Signature.check(input_file, offset, Signature.Compressed_BZ2): decoded = BZ2Decompressor().decompress( input_file[offset:] ) # BZ2 - Will discard the extra bytes and put it an attribute. elif Signature.check(input_file, offset, Signature.Compressed_LZ4): # LZ4 support try: LZ4Decompressor = importlib.import_module('lz4.frame') except ModuleNotFoundError: logging.error('ERROR: This kernel requres LZ4 decompression.') logging.error(' But "lz4" python package was not found.') logging.error( ' Example installation command: "sudo pip3 install lz4"' ) logging.error() return context = LZ4Decompressor.create_decompression_context() decoded, bytes_read, end_of_frame = LZ4Decompressor.decompress_chunk( context, input_file[offset:]) elif Signature.check(input_file, offset, Signature.Compressed_LZ4_Legacy ): # LZ4 support (legacy format) try: from utils.lz4_legacy import decompress_lz4_buffer except ImportError: try: from vmlinux_to_elf.utils.lz4_legacy import decompress_lz4_buffer except ModuleNotFoundError: logging.error( 'ERROR: This kernel requres LZ4 decompression.') logging.error( ' But "lz4" python package was not found.') logging.error( ' Example installation command: "sudo pip3 install lz4"' ) logging.error() return decoded = decompress_lz4_buffer(BytesIO(input_file[offset:])) elif Signature.check(input_file, offset, Signature.Compressed_ZSTD): try: import zstandard as zstd except ModuleNotFoundError: logging.error('ERROR: This kernel requres ZSTD decompression.') logging.error( ' But "zstandard" python package was not found.') logging.error( ' Example installation command: "sudo pip3 install zstandard"' ) logging.error() return buf = BytesIO() context = zstd.ZstdDecompressor() for chunk in context.read_to_iter(BytesIO(input_file[offset:])): buf.write(chunk) buf.seek(0) decoded = buf.read() elif Signature.check(input_file, offset, Signature.Compressed_LZO): try: import lzo except ModuleNotFoundError: logging.error('ERROR: This kernel requres LZO decompression.') logging.error( ' But "python-lzo" python package was not found.') logging.error( ' Example installation command: "sudo pip3 install git+https://github.com/clubby789/python-lzo@b4e39df"' ) logging.error() return buf = BytesIO(input_file[offset:]) decoded = lzo.LzoFile(fileobj=buf, mode='rb').read() except Exception: pass if decoded and len(decoded) > 0x1000: logging.info(( '[+] Kernel successfully decompressed in-memory (the offsets that ' + 'follow will be given relative to the decompressed binary)')) return decoded
def reset(self) -> None: self.fileobj.seek(0, SEEK_SET) self.pos = 0 self.decompressor = LZMADecompressor(format=FORMAT_XZ)
return delta_list if __name__ == "__main__": import random import string if sys.argv[1] == "comp": base = ''.join( random.choice(string.ascii_uppercase + string.digits) for x in range(2096)) compressor = LZMACompressor(LZMA_OPTION) comp = compressor.compress(base) comp += compressor.flush() decompressor = LZMADecompressor() decomp = decompressor.decompress(comp) decomp += decompressor.flush() if base != decomp: print "result is wrong" print "%d == %d" % (len(base), len(decomp)) sys.exit(1) print "success" elif sys.argv[1] == "xdelta": base = ''.join( random.choice(string.ascii_uppercase + string.digits) for x in range(4096)) modi = "~" * 4096 patch = diff_data(base, modi, len(base))