def _read_libname(self, reader: StructReader) -> Optional[str]: position = reader.tell() try: libname, t, rest = reader.read_bytes(64).partition(B'\0') except EOF: reader.seekset(position) return None try: libname = libname.decode('utf8') except Exception: reader.seekset(position) return None if not t or any(rest) or len(rest) < 10 or not re.fullmatch( R'[\s!-~]+', libname): reader.seekset(position) return None return libname
def __init__(self, reader: StructReader, offset: int, unmarshal: Unmarshal = Unmarshal.No): reader.bigendian = True reader.seekset(offset) self.reader = reader signature = reader.read_bytes(8) if signature != self.MagicSignature: raise ValueError( F'offset 0x{offset:X} has invalid signature {signature.hex().upper()}; ' F'should be {self.MagicSignature.hex().upper()}') self.size = reader.i32() toc_offset = reader.i32() toc_length = reader.i32() self.py_version = '.'.join(str(reader.u32())) self.py_libname = self._read_libname(reader) self.offset = reader.tell() - self.size self.toc: Dict[str, PiTOCEntry] = {} toc_end = self.offset + toc_offset + toc_length reader.seekset(self.offset + toc_offset) while reader.tell() < toc_end: try: entry = PiTOCEntry(reader) except EOF: xtpyi.logger.warning('end of file while reading TOC') break except Exception as error: xtpyi.logger.warning( F'unexpected error while reading TOC: {error!s}') break if entry.name in self.toc: raise KeyError(F'duplicate name {entry.name}') self.toc[entry.name] = entry self.files: Dict[str, PiMeta] = {} no_pyz_found = True pyz_entries: Dict[str, PYZ] = {} for entry in list(self.toc.values()): if entry.type is not PiType.PYZ: continue no_pyz_found = False name, xt = os.path.splitext(entry.name) name_pyz = F'{name}.pyz' if name == entry.name: del self.toc[name] self.toc[name_pyz] = entry entry.name = name_pyz reader.seekset(self.offset + entry.offset) if entry.is_compressed: data = self.extract(entry.name).unpack() else: data = reader pyz_entries[name] = PYZ(data, self.py_version) magics = {pyz.magic for pyz in pyz_entries.values()} if not magics: if not no_pyz_found: xtpyi.logger.warning( 'no magic signature could be recovered from embedded pyzip archives; this is ' 'unsual and means that there is no way to guess the missing magic for source ' 'file entries and it will likely not be possible to decompile them.' ) return elif len(magics) > 1: xtpyi.logger.warning( 'more than one magic signature was recovered; this is unusual.' ) magics = list(magics) keys: Set[bytes] = set() for entry in self.toc.values(): extracted = self.extract(entry.name) if entry.type not in (PiType.SOURCE, PiType.MODULE): self.files[entry.name] = extracted continue data = extracted.unpack() name, _ = os.path.splitext(extracted.name) del self.files[extracted.name] extracted.name = F'{name}.pyc' self.files[extracted.name] = extracted if len(magics) == 1 and data[:4] != magics[0]: extracted.data = magics[0] + data decompiled = make_decompiled_item(name, data, *magics) if entry.type is PiType.SOURCE: decompiled.type = PiType.USERCODE self.files[F'{name}.py'] = decompiled if name.endswith('crypto_key'): for key in decompiled.unpack() | carve('string', decode=True): if len(key) != 0x10: continue xtpyi.logger.info(F'found key: {key.decode(xtpyi.codec)}') keys.add(key) if unmarshal is Unmarshal.No: return if not keys: key = None else: key = next(iter(keys)) for name, pyz in pyz_entries.items(): pyz.unpack(unmarshal is Unmarshal.YesAndDecompile, key) for unpacked in pyz.entries: unpacked.name = path = F'{name}/{unpacked.name}' if path in self.files: raise ValueError(F'duplicate file name: {path}') self.files[path] = unpacked
def process(self, data: bytearray): formatter = string.Formatter() until = self.args.until until = until and PythonExpression(until, all_variables_allowed=True) reader = StructReader(memoryview(data)) mainspec = self.args.spec byteorder = mainspec[:1] if byteorder in '<!=@>': mainspec = mainspec[1:] else: byteorder = '=' def fixorder(spec): if spec[0] not in '<!=@>': spec = byteorder + spec return spec it = itertools.count() if self.args.multi else (0, ) for index in it: if reader.eof: break if index >= self.args.count: break meta = metavars(data, ghost=True) meta['index'] = index args = [] last = None checkpoint = reader.tell() try: for prefix, name, spec, conversion in formatter.parse( mainspec): if prefix: args.extend(reader.read_struct(fixorder(prefix))) if name is None: continue if conversion: reader.byte_align( PythonExpression.evaluate(conversion, meta)) if spec: spec = meta.format_str(spec, self.codec, args) if spec != '': try: spec = PythonExpression.evaluate(spec, meta) except ParserError: pass if spec == '': last = value = reader.read() elif isinstance(spec, int): last = value = reader.read_bytes(spec) else: value = reader.read_struct(fixorder(spec)) if not value: self.log_warn(F'field {name} was empty, ignoring.') continue if len(value) > 1: self.log_info( F'parsing field {name} produced {len(value)} items reading a tuple' ) else: value = value[0] args.append(value) if name == _SHARP: raise ValueError( 'Extracting a field with name # is forbidden.') elif name.isdecimal(): index = int(name) limit = len(args) - 1 if index > limit: self.log_warn( F'cannot assign index field {name}, the highest index is {limit}' ) else: args[index] = value continue elif name: meta[name] = value if until and not until(meta): self.log_info( F'the expression ({until}) evaluated to zero; aborting.' ) break with StreamDetour(reader, checkpoint) as detour: full = reader.read(detour.cursor - checkpoint) if last is None: last = full outputs = [] for template in self.args.outputs: used = set() outputs.append( meta.format(template, self.codec, [full, *args], {_SHARP: last}, True, used=used)) for key in used: meta.pop(key, None) for output in outputs: chunk = self.labelled(output, **meta) chunk.set_next_batch(index) yield chunk except EOF: leftover = repr(SizeInt(len(reader) - checkpoint)).strip() self.log_info(F'discarding {leftover} left in buffer') break