示例#1
0
 def _extract_ole(self, data: bytearray) -> str:
     stream = MemoryFile(data)
     with self._olefile.OleFileIO(stream) as ole:
         doc = ole.openstream('WordDocument').read()
         with StructReader(doc) as reader:
             table_name = F'{(doc[11]>>1)&1}Table'
             reader.seek(0x1A2)
             offset = reader.u32()
             length = reader.u32()
         with StructReader(ole.openstream(table_name).read()) as reader:
             reader.seek(offset)
             table = reader.read(length)
         piece_table = self._load_piece_table(table)
         return self._get_text(doc, piece_table)
示例#2
0
 def process(self, data):
     dst = bytearray()
     src = StructReader(data)
     while not src.eof:
         copy = src.read_byte()
         for mask in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80):
             if src.eof:
                 break
             if not copy & mask:
                 dst.append(src.read_byte())
                 continue
             elif not dst:
                 raise ValueError('copy requested against empty buffer')
             with src.be:
                 match_len = src.read_integer(6) + _MATCH_MIN
                 match_pos = src.read_integer(10)
             if not match_pos or match_pos > len(dst):
                 raise RuntimeError(F'invalid match offset at position {src.tell()}')
             match_pos = len(dst) - match_pos
             while match_len > 0:
                 match = dst[match_pos:match_pos + match_len]
                 dst.extend(match)
                 match_pos += len(match)
                 match_len -= len(match)
     return dst
示例#3
0
 def process(self, data: bytearray):
     with MemoryFile() as output, StructReader(data) as reader:
         if reader.read(2) != B'JC':
             self.log_warn(
                 'data does not begin with magic sequence, assuming that header is missing'
             )
             reader.seek(0)
             size = checksum = None
         else:
             size = reader.u32()
             checksum = reader.u32()
         if self.args.ignore_header:
             size = None
         self._decompress(output, reader, size)
         if size is not None:
             if len(output) > size:
                 self.log_info(F'tuncating to size {size}')
                 output.truncate(size)
             elif len(output) < size:
                 self.log_warn(
                     F'header size was {size}, but only {len(data)} bytes were decompressed'
                 )
         data = output.getvalue()
         if checksum:
             c = self._checksum(data)
             if c != checksum:
                 self.log_warn(
                     F'header checksum was {checksum:08X}, computed value is {c:08X}'
                 )
         return data
示例#4
0
 def __init__(self,
              buffer: Union[bytearray, StructReader],
              bits_per_read: int = 32):
     if not isinstance(buffer, StructReader):
         buffer = StructReader(memoryview(buffer), bigendian=False)
     self._reader: StructReader[memoryview] = buffer
     self._bit_buffer_data: int = 0
     self._bit_buffer_size: int = 0
     self._bits_per_read = bits_per_read
示例#5
0
    def unpack(self, data):
        def cpio():
            with suppress(EOF):
                return CPIOEntry(reader)

        reader = StructReader(memoryview(data))
        for entry in iter(cpio, None):
            if entry.name == 'TRAILER!!!':
                break
            yield self._pack(entry.name, entry.mtime, entry.data)
示例#6
0
文件: java.py 项目: binref/refinery
 def __init__(self, reader: StructReader):
     reader.bigendian = True
     self.max_stack = reader.u16()
     self.max_locals = reader.u16()
     self.disassembly: List[JvOpCode] = []
     with StructReader(reader.read(reader.u32())) as code:
         code.bigendian = True
         while not code.eof:
             self.disassembly.append(JvOpCode(code, pool=self.pool))
     self.exceptions = [JvException(reader) for _ in range(reader.u16())]
     self.attributes = [JvAttribute(reader) for _ in range(reader.u16())]
示例#7
0
 def _load_piece_table(self, table: bytes) -> bytes:
     with StructReader(table) as reader:
         while not reader.eof:
             entry_type = reader.read_byte()
             if entry_type == 1:
                 reader.seekrel(reader.read_byte())
                 continue
             if entry_type == 2:
                 length = reader.u32()
                 return reader.read(length)
             raise NotImplementedError(
                 F'Unsupported table entry type value 0x{entry_type:X}.')
示例#8
0
 def process(self, data):
     with StructReader(data) as archive:
         if archive.read(8) != b'SZDD\x88\xF0\x27\x33':
             if not self.args.lenient:
                 raise ValueError('signature missing')
             self.log_fail(
                 'the header signature is invalid, this is likely not an SZDD archive'
             )
         if archive.read_byte() != 0x41:
             raise ValueError('Unsupported compression mode')
         # ignore the missing file extension letter:
         archive.seekrel(1)
         output_len = archive.u32()
         window_pos = 0x1000 - 0x10
         output_pos = 0
         output = bytearray(output_len)
         window = bytearray(0x1000)
         for k in range(len(window)):
             window[k] = 0x20
         while not archive.eof:
             control = archive.read_byte()
             for cb in (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80):
                 if archive.eof:
                     break
                 if control & cb:
                     output[output_pos] = window[
                         window_pos] = archive.read_byte()
                     output_pos += 1
                     window_pos += 1
                     window_pos &= 0xFFF
                 else:
                     match_pos = archive.read_byte()
                     match_len = archive.read_byte()
                     match_pos |= (match_len & 0xF0) << 4
                     match_len = (match_len & 0x0F) + 3
                     match_pos &= 0xFFF
                     for _ in range(match_len):
                         window[window_pos] = window[match_pos]
                         output[output_pos] = window[window_pos]
                         output_pos += 1
                         window_pos += 1
                         match_pos += 1
                         window_pos &= 0xFFF
                         match_pos &= 0xFFF
         return output
示例#9
0
 def test_bitreader_le(self):
     data = 0b10010100111010100100001111101_11_00000000_0101010101010010010111100000101001010101100000001110010111110100_111_000_100
     size, remainder = divmod(data.bit_length(), 8)
     self.assertEqual(remainder, 0)
     data = memoryview(data.to_bytes(size, 'little'))
     sr = StructReader(data)
     self.assertEqual(sr.read_integer(3), 0b100)
     self.assertEqual(sr.read_integer(3), 0b000)
     self.assertEqual(sr.read_integer(3), 0b111)
     self.assertEqual(
         sr.u64(),
         0b0101010101010010010111100000101001010101100000001110010111110100)
     self.assertFalse(any(sr.read_flags(8, reverse=True)))
     self.assertEqual(sr.read_bit(), 1)
     self.assertRaises(ValueError, lambda: sr.read_struct(''))
     self.assertEqual(sr.read_bit(), 1)
     self.assertEqual(sr.read_integer(29), 0b10010100111010100100001111101)
     self.assertTrue(sr.eof)
示例#10
0
 def test_bitreader_be(self):
     data = 0b01010_10011101_0100100001_1111_0111101010000101010101010010010111100000101001010101100000001110010111110100111000_101
     size, remainder = divmod(data.bit_length(), 8)
     self.assertEqual(remainder, 7)
     data = memoryview(data.to_bytes(size + 1, 'big'))
     sr = StructReader(data)
     with sr.be:
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_bit(), 1)
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_bit(), 1)
         self.assertEqual(sr.read_bit(), 0)
         self.assertEqual(sr.read_byte(), 0b10011101)
         self.assertEqual(sr.read_integer(10), 0b100100001)
         self.assertTrue(all(sr.read_flags(4)))
         self.assertEqual(
             sr.read_integer(82),
             0b0111101010000101010101010010010111100000101001010101100000001110010111110100111000
         )
         self.assertRaises(EOF, sr.u16)
示例#11
0
文件: lzip.py 项目: binref/refinery
    def process(self, data: bytearray):
        view = memoryview(data)
        with MemoryFile() as output, StructReader(view) as reader:
            for k in count(1):
                if reader.eof:
                    break
                trailing_size = len(data) - reader.tell()
                try:
                    ID, VN, DS = reader.read_struct('4sBB')
                    if ID != B'LZIP':
                        if k > 1:
                            raise EOF
                        else:
                            self.log_warn(F'ignoring invalid LZIP signature: {ID.hex()}')
                    if VN != 1:
                        self.log_warn(F'ignoring invalid LZIP version: {VN}')
                    dict_size = 1 << (DS & 0x1F)
                    dict_size -= (dict_size // 16) * ((DS >> 5) & 7)
                    if dict_size not in range(_MIN_DICT_SIZE, _MAX_DICT_SIZE + 1):
                        raise ValueError(
                            F'The dictionary size {dict_size} is out of the valid range '
                            F'[{_MIN_DICT_SIZE}, {_MAX_DICT_SIZE}]; unable to proceed.'
                        )
                    decoder = MemberDecoder(dict_size, reader, output)
                    if not decoder():
                        raise ValueError(F'Data error in stream {k}.')
                    crc32, data_size, member_size = reader.read_struct('<LQQ')
                    if crc32 != decoder.crc32:
                        self.log_warn(F'checksum in stream {k} was {decoder.crc:08X}, should have been {crc32:08X}.')
                    if member_size - 20 != decoder.member_position:
                        self.log_warn(F'member size in stream {k} was {decoder.member_position}, should have been {member_size}.')
                    if data_size != decoder.data_position:
                        self.log_warn(F'data size in stream {k} was {decoder.data_position}, should have been {data_size}.')
                except EOF:
                    if k <= 1:
                        raise
                    self.log_info(F'silently ignoring {trailing_size} bytes of trailing data')
                    break

            return output.getvalue()
示例#12
0
 def test_bitreader_structured(self):
     items = (
         0b1100101,  # noqa
         -0x1337,  # noqa
         0xDEFACED,  # noqa
         0xC0CAC01A,  # noqa
         -0o1337,  # noqa
         2076.171875,  # noqa
         math.pi  # noqa
     )
     data = struct.pack('<bhiLqfd', *items)
     sr = StructReader(data)
     self.assertEqual(sr.read_nibble(), 0b101)
     self.assertRaises(sr.Unaligned, lambda: sr.read_exactly(2))
     sr.seek(0)
     self.assertEqual(sr.read_byte(), 0b1100101)
     self.assertEqual(sr.i16(), -0x1337)
     self.assertEqual(sr.i32(), 0xDEFACED)
     self.assertEqual(sr.u32(), 0xC0CAC01A)
     self.assertEqual(sr.i64(), -0o1337)
     self.assertAlmostEqual(sr.read_struct('f', True), 2076.171875)
     self.assertAlmostEqual(sr.read_struct('d', True), math.pi)
     self.assertTrue(sr.eof)
示例#13
0
 def _get_text(self, doc: bytes, piece_table: bytes) -> str:
     piece_count: int = 1 + (len(piece_table) - 4) // 12
     with StringIO() as text:
         with StructReader(piece_table) as reader:
             character_positions = [
                 reader.u32() for _ in range(piece_count)
             ]
             for i in range(piece_count - 1):
                 cp_start = character_positions[i]
                 cp_end = character_positions[i + 1]
                 fc_value = reader.read_struct('xxLxx', unwrap=True)
                 is_ansi = bool((fc_value >> 30) & 1)
                 fc = fc_value & 0xBFFFFFFF
                 cb = cp_end - cp_start
                 if is_ansi:
                     encoding = 'cp1252'
                     fc = fc // 2
                 else:
                     encoding = 'utf16'
                     cb *= 2
                 raw = doc[fc:fc + cb]
                 text.write(raw.decode(encoding).replace('\r', '\n'))
         return text.getvalue()
示例#14
0
    def decompress_stream(self, data: ByteString, LZOv1: bool = False) -> bytearray:
        """
        An implementation of LZO decompression. We use the article
        "[LZO stream format as understood by Linux's LZO decompressor](https://www.kernel.org/doc/html/latest/staging/lzo.html)"
        as a reference since no proper specification is available.
        """
        def integer() -> int:
            length = 0
            while True:
                byte = src.read_byte()
                if byte:
                    return length + byte
                length += 0xFF
                if length > 0x100000:
                    raise LZOError('Too many zeros in integer encoding.')

        def literal(count):
            dst.write(src.read_bytes(count))

        def copy(distance: int, length: int):
            if distance > len(dst):
                raise LZOError(F'Distance {distance} > bufsize {len(dst)}')
            buffer = dst.getbuffer()
            if distance > length:
                start = len(buffer) - distance
                end = start + length
                dst.write(buffer[start:end])
            else:
                block = buffer[-distance:]
                while len(block) < length:
                    block += block[:length - len(block)]
                if len(block) > length:
                    block[length:] = ()
                dst.write(block)

        src = StructReader(memoryview(data))
        dst = MemoryFile()

        state = 0
        first = src.read_byte()

        if first == 0x10:
            raise LZOError('Invalid first stream byte 0x10.')
        elif first <= 0x12:
            src.seekrel(-1)
        elif first <= 0x15:
            state = first - 0x11
            literal(state)
        else:
            state = 4
            literal(first - 0x11)

        while True:
            instruction = src.read_byte()
            if instruction < 0x10:
                if state == 0:
                    length = instruction or integer() + 15
                    state = length + 3
                    if state < 4:
                        raise LZOError('Literal encoding is too short.')
                else:
                    state = instruction & 0b0011
                    D = (instruction & 0b1100) >> 2
                    H = src.read_byte()
                    distance = (H << 2) + D + 1
                    if state >= 4:
                        distance += 0x800
                        length = 3
                    else:
                        length = 2
                    copy(distance, length)
            elif instruction < 0x20:
                L = instruction & 0b0111
                H = instruction & 0b1000
                length = L or integer() + 7
                argument = src.u16()
                state = argument & 3
                distance = (H << 11) + (argument >> 2)
                if not distance:
                    return dst.getbuffer()
                if LZOv1 and distance & 0x803F == 0x803F and length in range(261, 265):
                    raise LZOError('Compressed data contains sequence that is banned in LZOv1.')
                if LZOv1 and distance == 0xBFFF:
                    X = src.read_byte()
                    count = ((X << 3) | L) + 4
                    self.log_debug(F'Writing run of {X} zero bytes according to LZOv1.')
                    dst.write(B'\0' * count)
                else:
                    copy(distance + 0x4000, length + 2)
            elif instruction < 0x40:
                L = instruction & 0b11111
                length = L or integer() + 31
                argument = src.u16()
                state = argument & 3
                distance = (argument >> 2) + 1
                copy(distance, length + 2)
            else:
                if instruction < 0x80:
                    length = 3 + ((instruction >> 5) & 1)
                else:
                    length = 5 + ((instruction >> 5) & 3)
                H = src.read_byte()
                D = (instruction & 0b11100) >> 2
                state = instruction & 3
                distance = (H << 3) + D + 1
                copy(distance, length)
            if state:
                literal(state)
示例#15
0
文件: blz.py 项目: binref/refinery
 def _begin(self, data):
     self._src = StructReader(memoryview(data))
     self._dst = MemoryFile(bytearray())
     return self
示例#16
0
    def process(self, data: bytearray):
        formatter = string.Formatter()
        until = self.args.until
        until = until and PythonExpression(until, all_variables_allowed=True)
        reader = StructReader(memoryview(data))
        mainspec = self.args.spec
        byteorder = mainspec[:1]
        if byteorder in '<!=@>':
            mainspec = mainspec[1:]
        else:
            byteorder = '='

        def fixorder(spec):
            if spec[0] not in '<!=@>':
                spec = byteorder + spec
            return spec

        it = itertools.count() if self.args.multi else (0, )
        for index in it:

            if reader.eof:
                break
            if index >= self.args.count:
                break

            meta = metavars(data, ghost=True)
            meta['index'] = index
            args = []
            last = None
            checkpoint = reader.tell()

            try:
                for prefix, name, spec, conversion in formatter.parse(
                        mainspec):
                    if prefix:
                        args.extend(reader.read_struct(fixorder(prefix)))
                    if name is None:
                        continue
                    if conversion:
                        reader.byte_align(
                            PythonExpression.evaluate(conversion, meta))
                    if spec:
                        spec = meta.format_str(spec, self.codec, args)
                    if spec != '':
                        try:
                            spec = PythonExpression.evaluate(spec, meta)
                        except ParserError:
                            pass
                    if spec == '':
                        last = value = reader.read()
                    elif isinstance(spec, int):
                        last = value = reader.read_bytes(spec)
                    else:
                        value = reader.read_struct(fixorder(spec))
                        if not value:
                            self.log_warn(F'field {name} was empty, ignoring.')
                            continue
                        if len(value) > 1:
                            self.log_info(
                                F'parsing field {name} produced {len(value)} items reading a tuple'
                            )
                        else:
                            value = value[0]

                    args.append(value)

                    if name == _SHARP:
                        raise ValueError(
                            'Extracting a field with name # is forbidden.')
                    elif name.isdecimal():
                        index = int(name)
                        limit = len(args) - 1
                        if index > limit:
                            self.log_warn(
                                F'cannot assign index field {name}, the highest index is {limit}'
                            )
                        else:
                            args[index] = value
                        continue
                    elif name:
                        meta[name] = value

                if until and not until(meta):
                    self.log_info(
                        F'the expression ({until}) evaluated to zero; aborting.'
                    )
                    break

                with StreamDetour(reader, checkpoint) as detour:
                    full = reader.read(detour.cursor - checkpoint)
                if last is None:
                    last = full

                outputs = []

                for template in self.args.outputs:
                    used = set()
                    outputs.append(
                        meta.format(template,
                                    self.codec, [full, *args], {_SHARP: last},
                                    True,
                                    used=used))
                    for key in used:
                        meta.pop(key, None)

                for output in outputs:
                    chunk = self.labelled(output, **meta)
                    chunk.set_next_batch(index)
                    yield chunk

            except EOF:
                leftover = repr(SizeInt(len(reader) - checkpoint)).strip()
                self.log_info(F'discarding {leftover} left in buffer')
                break
示例#17
0
    def process(self, data):
        mode: MODE = self.args.mode
        with StructReader(memoryview(data)) as reader, MemoryFile() as writer:
            reader: StructReader[memoryview]
            check = zlib.crc32(reader.peek(6))
            magic = reader.read(4)
            if magic != self._SIGNATURE:
                if mode is None:
                    self.log_warn(
                        F'data starts with {magic.hex().upper()} rather than the expected sequence '
                        F'{self._SIGNATURE.hex().upper()}; this could be a raw stream.'
                    )
                else:
                    reader.seek(0)
                    handler = self._get_handler(mode)
                    handler(reader, writer, None)
                    return writer.getbuffer()

            header_size = reader.u16()
            if header_size != 24:
                self.log_warn(
                    F'the header size {header_size} was not equal to 24')

            crc32byte = reader.u8()
            check = zlib.crc32(reader.peek(0x11), check) & 0xFF
            if check != crc32byte:
                self.log_warn(
                    F'the CRC32 check byte was {crc32byte}, computed value was {check}'
                )

            _mode_code = reader.u8()

            try:
                _mode = MODE(_mode_code)
            except ValueError:
                msg = F'header contains unknown compression type code {_mode_code}'
                if mode is None:
                    raise ValueError(msg)
                else:
                    self.log_warn(msg)
            else:
                if mode is not None and mode != _mode:
                    logger = self.log_warn
                else:
                    logger = self.log_info
                    mode = _mode
                logger(F'header specifies algorithm {_mode.name}')

            self.log_info(F'using algorithm {mode.name}')
            decompress = self._get_handler(mode)

            final_size = reader.u32()
            _unknown_1 = reader.u32()
            chunk_size = reader.u32()
            _unknown_2 = reader.u32()

            if _unknown_1 != 0:
                self.log_warn(
                    F'unknown value 1 was unexpectedly nonzero: 0x{_unknown_1:08X}'
                )
            if _unknown_2 != 0:
                self.log_warn(
                    F'unknown value 2 was unexpectedly nonzero: 0x{_unknown_2:08X}'
                )

            self.log_debug(F'final size: 0x{final_size:08X}')
            self.log_debug(F'chunk size: 0x{chunk_size:08X}')

            if chunk_size > COMPRESS_MAX_CHUNK:
                raise ValueError(
                    'the header chunk size is greater than the maximum value')

            while len(writer) < final_size:
                src_size = reader.u32()
                src_data = reader.read(src_size)
                if len(src_data) != src_size:
                    raise IndexError(
                        F'Attempted to read {src_size} bytes, but got only {len(src_data)}.'
                    )
                if src_size + len(writer) == final_size:
                    self.log_debug(
                        F'final chunk is uncompressed, appending {src_size} raw bytes to output'
                    )
                    writer.write(src_data)
                    break
                self.log_debug(F'reading chunk of size {src_size}')
                start = writer.tell()
                chunk = StructReader(src_data)
                target = min(chunk_size, final_size - len(writer))
                decompress(chunk, writer, target)
                writer.flush()
                written = writer.tell() - start
                if written != target:
                    raise RuntimeError(
                        F'decompressed output had unexpected size {written} instead of {chunk_size}'
                    )

            if not reader.eof:
                self.log_info(
                    F'compression complete with {reader.remaining_bytes} bytes remaining in input'
                )
            return writer.getbuffer()