def __init__(self, reader: StructReader): if reader.read(4) != self.SIGNATURE: raise ValueError self.disk_number = reader.u16() self.start_disk_number = reader.u16() self.entries_on_disk = reader.u16() self.entries_in_directory = reader.u16() self.directory_size = reader.u32() self.directory_offset = reader.u32() try: cl = reader.u32() self.comment = cl and reader.read(cl) or None except EOFError: self.comment = None
def _read_strings(self, reader: StructReader, size: int, offset: int) -> Generator[str, None, None]: def uleb128(): value = 0 more = True for k in range(0, 35, 7): limb = reader.read_integer(7) more = reader.read_bit() value |= limb << k if not more: break assert not more return value with StreamDetour(reader, offset): offsets = [reader.u32() for _ in range(size)] for offset in offsets: reader.seek(offset) size = uleb128() if not size: continue data = reader.read_c_string() string = JvClassFile.decode_utf8m(data) if len(string) != size: raise RuntimeError( F'Read string of length {len(string)}, expected length {size}.' ) yield string
def __init__(self, reader: StructReader): reader.bigendian = True self.max_stack = reader.u16() self.max_locals = reader.u16() self.disassembly: List[JvOpCode] = [] with StructReader(reader.read(reader.u32())) as code: code.bigendian = True while not code.eof: self.disassembly.append(JvOpCode(code, pool=self.pool)) self.exceptions = [JvException(reader) for _ in range(reader.u16())] self.attributes = [JvAttribute(reader) for _ in range(reader.u16())]
def __init__(self, reader: StructReader, calculate_checks=False): if reader.read(4) != b'dex\n': raise ValueError('Invalid Signature') with StreamDetour(reader, 0x28): endian_test_data = reader.u32() if endian_test_data == 0x78563412: reader.bigendian = True self.version = reader.read(4).rstrip(b'\0') self.checksum = reader.u32() if calculate_checks: with StreamDetour(reader): self.calculated_checksum = zlib.adler32(reader.read()) else: self.calculated_checksum = None self.signature = reader.read(20) if calculate_checks: with StreamDetour(reader): self.calculated_signature = hashlib.sha1( reader.read()).digest() else: self.calculated_signature = None self.size_of_file = reader.u32() self.size_of_header = reader.u32() if reader.u32() != 0x12345678: raise ValueError('Invalid Endian Tag') self.link_size = reader.u32() self.link_offset = reader.u32() self.map_offset = reader.u32() self.strings: List[str] = list( self._read_strings(reader, reader.u32(), reader.u32()))
def test_bitreader_structured(self): items = ( 0b1100101, # noqa -0x1337, # noqa 0xDEFACED, # noqa 0xC0CAC01A, # noqa -0o1337, # noqa 2076.171875, # noqa math.pi # noqa ) data = struct.pack('<bhiLqfd', *items) sr = StructReader(data) self.assertEqual(sr.read_nibble(), 0b101) self.assertRaises(sr.Unaligned, lambda: sr.read_exactly(2)) sr.seek(0) self.assertEqual(sr.read_byte(), 0b1100101) self.assertEqual(sr.i16(), -0x1337) self.assertEqual(sr.i32(), 0xDEFACED) self.assertEqual(sr.u32(), 0xC0CAC01A) self.assertEqual(sr.i64(), -0o1337) self.assertAlmostEqual(sr.read_struct('f', True), 2076.171875) self.assertAlmostEqual(sr.read_struct('d', True), math.pi) self.assertTrue(sr.eof)
def _decompress_xpress(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None) -> bytearray: if target is not None: target += writer.tell() flags = BitBufferedReader(reader) nibble_cache = None while not reader.eof: if target is not None and writer.tell() >= target: return if not flags.next(): writer.write(reader.read(1)) continue offset, length = divmod(reader.u16(), 8) offset += 1 if length == 7: length = nibble_cache if length is None: length_pair = reader.u8() nibble_cache = length_pair >> 4 length = length_pair & 0xF else: nibble_cache = None if length == 15: length = reader.u8() if length == 0xFF: length = reader.u16() or reader.u32() length -= 22 if length < 0: raise RuntimeError( F'Invalid match length of {length} for long delta sequence' ) length += 15 length += 7 length += 3 writer.replay(offset, length)
def __init__(self, reader: StructReader): if reader.read(4) != self.SIGNATURE: raise ValueError self.version_made_by = reader.u16() self.version_to_extract = reader.u16() self.flags = reader.u16() self.compression = reader.u16() self.date = datefix.dostime(reader.u32()) self.crc32 = reader.u32() self.compressed_size = reader.u32() self.decompressed_size = reader.u32() len_filename = reader.u16() len_extra = reader.u16() len_comment = reader.u16() self.disk_nr_start = reader.u16() self.internal_attributes = reader.u16() self.external_attributes = reader.u32() self.header_offset = reader.u32() self.filename = len_filename and reader.read(len_filename) or None self.extra = len_extra and reader.read(len_extra) or None self.comment = len_comment and reader.read(len_comment) or None
def __init__(self, reader: StructReader, offset: int, unmarshal: Unmarshal = Unmarshal.No): reader.bigendian = True reader.seekset(offset) self.reader = reader signature = reader.read_bytes(8) if signature != self.MagicSignature: raise ValueError( F'offset 0x{offset:X} has invalid signature {signature.hex().upper()}; ' F'should be {self.MagicSignature.hex().upper()}') self.size = reader.i32() toc_offset = reader.i32() toc_length = reader.i32() self.py_version = '.'.join(str(reader.u32())) self.py_libname = self._read_libname(reader) self.offset = reader.tell() - self.size self.toc: Dict[str, PiTOCEntry] = {} toc_end = self.offset + toc_offset + toc_length reader.seekset(self.offset + toc_offset) while reader.tell() < toc_end: try: entry = PiTOCEntry(reader) except EOF: xtpyi.logger.warning('end of file while reading TOC') break except Exception as error: xtpyi.logger.warning( F'unexpected error while reading TOC: {error!s}') break if entry.name in self.toc: raise KeyError(F'duplicate name {entry.name}') self.toc[entry.name] = entry self.files: Dict[str, PiMeta] = {} no_pyz_found = True pyz_entries: Dict[str, PYZ] = {} for entry in list(self.toc.values()): if entry.type is not PiType.PYZ: continue no_pyz_found = False name, xt = os.path.splitext(entry.name) name_pyz = F'{name}.pyz' if name == entry.name: del self.toc[name] self.toc[name_pyz] = entry entry.name = name_pyz reader.seekset(self.offset + entry.offset) if entry.is_compressed: data = self.extract(entry.name).unpack() else: data = reader pyz_entries[name] = PYZ(data, self.py_version) magics = {pyz.magic for pyz in pyz_entries.values()} if not magics: if not no_pyz_found: xtpyi.logger.warning( 'no magic signature could be recovered from embedded pyzip archives; this is ' 'unsual and means that there is no way to guess the missing magic for source ' 'file entries and it will likely not be possible to decompile them.' ) return elif len(magics) > 1: xtpyi.logger.warning( 'more than one magic signature was recovered; this is unusual.' ) magics = list(magics) keys: Set[bytes] = set() for entry in self.toc.values(): extracted = self.extract(entry.name) if entry.type not in (PiType.SOURCE, PiType.MODULE): self.files[entry.name] = extracted continue data = extracted.unpack() name, _ = os.path.splitext(extracted.name) del self.files[extracted.name] extracted.name = F'{name}.pyc' self.files[extracted.name] = extracted if len(magics) == 1 and data[:4] != magics[0]: extracted.data = magics[0] + data decompiled = make_decompiled_item(name, data, *magics) if entry.type is PiType.SOURCE: decompiled.type = PiType.USERCODE self.files[F'{name}.py'] = decompiled if name.endswith('crypto_key'): for key in decompiled.unpack() | carve('string', decode=True): if len(key) != 0x10: continue xtpyi.logger.info(F'found key: {key.decode(xtpyi.codec)}') keys.add(key) if unmarshal is Unmarshal.No: return if not keys: key = None else: key = next(iter(keys)) for name, pyz in pyz_entries.items(): pyz.unpack(unmarshal is Unmarshal.YesAndDecompile, key) for unpacked in pyz.entries: unpacked.name = path = F'{name}/{unpacked.name}' if path in self.files: raise ValueError(F'duplicate file name: {path}') self.files[path] = unpacked
def _decompress_xpress_huffman(self, reader: StructReader, writer: MemoryFile, target: Optional[int] = None, max_chunk_size: int = 0x10000) -> None: limit = writer.tell() if target is not None: target += limit while not reader.eof: if reader.remaining_bytes < XPRESS_NUM_SYMBOLS // 2: raise IndexError( F'There are only {reader.remaining_bytes} bytes reamining in the input buffer,' F' but at least {XPRESS_NUM_SYMBOLS//2} are required to read a Huffman table.' ) table = bytearray( reader.read_integer(4) for _ in range(XPRESS_NUM_SYMBOLS)) table = make_huffman_decode_table(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) limit = limit + max_chunk_size flags = BitBufferedReader(reader, 16) while True: position = writer.tell() if position == target: if reader.remaining_bytes: self.log_info( F'chunk decompressed with {reader.remaining_bytes} bytes remaining in input buffer' ) return if position >= limit: if position > limit: limit = position self.log_info( F'decompression of one chunk generated more than the limit of {max_chunk_size} bytes' ) flags.collect() break try: sym = flags.huffman_symbol(table, XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN) except EOFError: self.log_debug('end of file while reading huffman symbol') break if sym < XPRESS_NUM_CHARS: writer.write_byte(sym) continue length = sym & 0xF offsetlog = (sym >> 4) & 0xF flags.collect() if reader.eof: break offset = (1 << offsetlog) | flags.read(offsetlog) if length == 0xF: nudge = reader.read_byte() if nudge < 0xFF: length += nudge else: length = reader.u16() or reader.u32() length += XPRESS_MIN_MATCH_LEN writer.replay(offset, length)
def __init__(self, reader: StructReader): self.name = reader.u16() self.data = reader.read(reader.u32())