示例#1
0
    def parse_cffolder(self, idx: int, offset: int) -> None:
        """Parse a CFFOLDER entry"""
        fmt = "<I"  # offset to CFDATA
        fmt += "H"  # number of CFDATA blocks
        fmt += "H"  # compression type
        try:
            (offset, ndatab,
             compression) = struct.unpack_from(fmt, self._buf, offset)
            compression &= COMPRESSION_MASK_TYPE
        except struct.error as e:
            raise CorruptionError from e

        # no data blocks?
        if ndatab == 0:
            raise CorruptionError("No CFDATA blocks")

        # no compression is supported
        if compression not in [COMPRESSION_TYPE_NONE, COMPRESSION_TYPE_MSZIP]:
            if compression == COMPRESSION_TYPE_QUANTUM:
                raise NotSupportedError("Quantum compression not supported")
            if compression == COMPRESSION_TYPE_LZX:
                raise NotSupportedError("LZX compression not supported")
            raise NotSupportedError(
                "Compression type 0x{:x} not supported".format(compression))

        # parse CDATA
        self._folder_data.append(bytearray())
        for _ in range(ndatab):
            offset += self.parse_cfdata(idx, offset, compression)
示例#2
0
    def parse_cfdata(self, idx: int, offset: int, compression: int) -> int:
        """Parse a CFDATA entry"""
        fmt = "<I"  # checksum
        fmt += "H"  # compressed bytes
        fmt += "H"  # uncompressed bytes
        try:
            (checksum, blob_comp,
             blob_uncomp) = struct.unpack_from(fmt, self._buf, offset)
        except struct.error as e:
            raise CorruptionError from e
        if compression == COMPRESSION_TYPE_NONE and blob_comp != blob_uncomp:
            raise CorruptionError("Mismatched data %i != %i" %
                                  (blob_comp, blob_uncomp))
        hdr_sz = struct.calcsize(fmt) + self._rsvd_block
        buf_cfdata = self._buf[offset + hdr_sz:offset + hdr_sz + blob_comp]

        # verify checksum
        if checksum != 0:
            checksum_actual = _checksum_compute(buf_cfdata)
            hdr = bytearray(struct.pack("<HH", blob_comp, blob_uncomp))
            checksum_actual = _checksum_compute(hdr, checksum_actual)
            if checksum_actual != checksum:
                raise CorruptionError(
                    "Invalid checksum at {:x}, expected {:x}, got {:x}".format(
                        offset, checksum, checksum_actual))

        # decompress Zlib data after removing *another* header...
        if compression == COMPRESSION_TYPE_MSZIP:
            if buf_cfdata[:2] != b"CK":
                raise CorruptionError("Compression header invalid {}".format(
                    buf_cfdata[:2].decode()))
            assert self._zdict is not None
            decompress = zlib.decompressobj(-zlib.MAX_WBITS, zdict=self._zdict)
            try:
                buf = decompress.decompress(buf_cfdata[2:])
                buf += decompress.flush()
            except zlib.error as e:
                raise CorruptionError("Failed to decompress") from e
            self._zdict = buf
        else:
            buf = buf_cfdata

        assert len(buf) == blob_uncomp
        self._folder_data[idx] += buf
        return blob_comp + hdr_sz
示例#3
0
    def _parse_cfdata(self, idx, offset, is_zlib):
        """ Parse a CFDATA entry """
        fmt = '<I'  # checksum
        fmt += 'H'  # compressed bytes
        fmt += 'H'  # uncompressed bytes
        try:
            vals = struct.unpack_from(fmt, self._buf_file, offset)
        except struct.error as e:
            raise CorruptionError(str(e))
        # debugging
        if os.getenv('PYTHON_CABARCHIVE_DEBUG'):
            print("CFDATA", vals)
        if not is_zlib and vals[1] != vals[2]:
            raise CorruptionError('Mismatched data %i != %i' %
                                  (vals[1], vals[2]))
        hdr_sz = struct.calcsize(fmt)
        newbuf = self._buf_file[offset + hdr_sz:offset + hdr_sz + vals[1]]

        # decompress Zlib data after removing *another* header...
        if is_zlib:
            if newbuf[0] != 'C' or newbuf[1] != 'K':
                raise CorruptionError('Compression header invalid')
            decompress = zlib.decompressobj(-zlib.MAX_WBITS)
            try:
                buf = decompress.decompress(newbuf[2:])
                buf += decompress.flush()
            except zlib.error as e:
                raise CorruptionError('Failed to decompress: ' + str(e))
        else:
            buf = newbuf

        # check checksum
        if vals[0] != 0:
            checksum = _checksum_compute(newbuf)
            hdr = bytearray(struct.pack('<HH', len(newbuf), len(buf)))
            checksum = _checksum_compute(hdr, checksum)
            if checksum != vals[0]:
                raise CorruptionError('Invalid checksum', offset, vals[0],
                                      checksum)

        assert len(buf) == vals[2]
        self._folder_data[idx] += buf
        return vals[1] + hdr_sz
示例#4
0
    def parse_cffile(self, offset: int) -> int:
        """Parse a CFFILE entry"""
        fmt = "<I"  # uncompressed size
        fmt += "I"  # uncompressed offset of this file in the folder
        fmt += "H"  # index into the CFFOLDER area
        fmt += "H"  # date
        fmt += "H"  # time
        fmt += "H"  # attribs
        try:
            (usize, uoffset, index, date, time,
             fattr) = struct.unpack_from(fmt, self._buf, offset)
        except struct.error as e:
            raise CorruptionError from e

        # parse filename
        offset += struct.calcsize(fmt)
        filename = ""
        for i in range(0, 255):
            if self._buf[offset + i] == 0x0:
                filename = self._buf[offset:offset + i].decode()
                break

        # add file
        f = CabFile()
        f._date_decode(date)
        f._time_decode(time)
        f._attr_decode(fattr)
        try:
            f.buf = bytes(self._folder_data[index][uoffset:uoffset + usize])
        except IndexError as e:
            raise CorruptionError(
                "Failed to get buf for {}".format(filename)) from e
        if len(f) != usize:
            raise CorruptionError(
                "Corruption inside archive, %s is size %i but "
                "expected size %i" % (filename, len(f), usize))
        if self.flattern:
            filename = ntpath.basename(filename)
        self.cfarchive[filename] = f

        # return offset to next entry
        return 16 + i + 1
示例#5
0
    def _parse_cffile(self, offset):
        """ Parse a CFFILE entry """
        fmt = '<I'  # uncompressed size
        fmt += 'I'  # uncompressed offset of this file in the folder
        fmt += 'H'  # index into the CFFOLDER area
        fmt += 'H'  # date
        fmt += 'H'  # time
        fmt += 'H'  # attribs
        try:
            vals = struct.unpack_from(fmt, self._buf_file, offset)
        except struct.error as e:
            raise CorruptionError(str(e))

        # debugging
        if os.getenv('PYTHON_CABARCHIVE_DEBUG'):
            print("CFFILE", vals)

        # parse filename
        offset += struct.calcsize(fmt)
        filename = ''
        for i in range(0, 255):
            filename_c = self._buf_file[offset + i]
            if filename_c == 0:
                break
            filename += chr(filename_c)

        # add file
        f = CabFile(filename)
        f._date_decode(vals[3])
        f._time_decode(vals[4])
        f._attr_decode(vals[5])
        f.contents = self._folder_data[vals[2]][vals[1]:vals[1] + vals[0]]
        if len(f.contents) != vals[0]:
            raise CorruptionError(
                "Corruption inside archive, %s is size %i but "
                "expected size %i" % (filename, len(f.contents), vals[0]))
        self.files.append(f)

        # return offset to next entry
        return 16 + len(filename) + 1
示例#6
0
    def _parse_cffolder(self, idx, offset):
        """ Parse a CFFOLDER entry """
        fmt = '<I'  # offset to CFDATA
        fmt += 'H'  # number of CFDATA blocks
        fmt += 'H'  # compression type
        try:
            vals = struct.unpack_from(fmt, self._buf_file, offset)
        except struct.error as e:
            raise CorruptionError(str(e))

        # debugging
        if os.getenv('PYTHON_CABARCHIVE_DEBUG'):
            print("CFFOLDER", vals)

        # no data blocks?
        if vals[1] == 0:
            raise CorruptionError('No CFDATA blocks')

        # no compression is supported
        if vals[2] == 0:
            is_zlib = False
        elif vals[2] == 1:
            is_zlib = True
        else:
            raise NotSupportedError('Compression type not supported')

        # not supported
        if is_zlib and self._is_multi_folder:
            raise NotSupportedError(
                "Compression unsupported in multi-folder archive: "
                "set FolderSizeThreshold=0 in the .ddf file")

        # parse CDATA
        self._folder_data.append(bytearray())
        offset = vals[0]
        for _ in range(vals[1]):
            offset += self._parse_cfdata(idx, offset, is_zlib)
示例#7
0
    def parse(self, buf):
        """ Parse .cab data """

        # slurp the whole buffer at once
        self._buf_file = buf

        # read the file header
        fmt = '<4s'  # signature
        fmt += 'xxxx'  # reserved1
        fmt += 'I'  # size
        fmt += 'xxxx'  # reserved2
        fmt += 'I'  # offset to CFFILE
        fmt += 'xxxx'  # reserved3
        fmt += 'BB'  # version minor, major
        fmt += 'H'  # no of CFFOLDERs
        fmt += 'H'  # no of CFFILEs
        fmt += 'H'  # flags
        fmt += 'H'  # setID
        fmt += 'H'  # cnt of cabs in set
        #        fmt += 'H'      # reserved cab size
        #        fmt += 'B'      # reserved folder size
        #        fmt += 'B'      # reserved block size
        #        fmt += 'B'      # per-cabinet reserved area
        try:
            vals = struct.unpack_from(fmt, self._buf_file, 0)
        except struct.error as e:
            raise CorruptionError(str(e))

        # debugging
        if os.getenv('PYTHON_CABARCHIVE_DEBUG'):
            print("CFHEADER", vals)

        # check magic bytes
        if vals[0] != b'MSCF':
            raise NotSupportedError(
                'Data is not application/vnd.ms-cab-compressed')

        # check size matches
        if vals[1] != len(self._buf_file):
            raise CorruptionError('Cab file internal size does not match data')

        # check version
        if vals[4] != 1 or vals[3] != 3:
            raise NotSupportedError('Version %i.%i not supported' %
                                    (vals[4], vals[3]))

        # chained cabs not supported
        if vals[9] != 0:
            raise NotSupportedError('Chained cab file not supported')

        # verify we actually have data
        nr_files = vals[6]
        if nr_files == 0:
            raise CorruptionError('The cab file is empty')

        # verify we got complete data
        off_cffile = vals[2]
        if off_cffile > len(self._buf_file):
            raise CorruptionError('Cab file corrupt')

        # chained cabs not supported
        if vals[7] != 0:
            raise CorruptionError('Expected header flags to be cleared')

        # read this so we can do round-trip
        self.set_id = vals[8]

        # we don't support compressed folders in multi-folder archives
        if vals[5] > 1:
            self._is_multi_folder = True

        # parse CFFOLDER
        offset = struct.calcsize(fmt)
        for i in range(vals[5]):
            self._parse_cffolder(i, offset)
            offset += struct.calcsize(FMT_CFFOLDER)

        # parse CFFILEs
        for i in range(0, nr_files):
            off_cffile += self._parse_cffile(off_cffile)
示例#8
0
    def parse(self, buf: bytes) -> None:

        # used as internal state
        self._buf = buf
        if self._zdict is None:
            self._zdict = b""

        offset: int = 0

        # read the file header
        fmt = "<4s"  # signature
        fmt += "xxxx"  # reserved1
        fmt += "I"  # size
        fmt += "xxxx"  # reserved2
        fmt += "I"  # offset to CFFILE
        fmt += "xxxx"  # reserved3
        fmt += "BB"  # version minor, major
        fmt += "H"  # no of CFFOLDERs
        fmt += "H"  # no of CFFILEs
        fmt += "H"  # flags
        fmt += "H"  # setID
        fmt += "H"  # cnt of cabs in set
        try:
            (
                signature,
                size,
                off_cffile,
                version_minor,
                version_major,
                nr_folders,
                nr_files,
                flags,
                set_id,
                idx_cabinet,
            ) = struct.unpack_from(fmt, self._buf, 0)
        except struct.error as e:
            raise CorruptionError from e
        offset += struct.calcsize(fmt)

        # check magic bytes
        if signature != b"MSCF":
            raise NotSupportedError(
                "Data is not application/vnd.ms-cab-compressed")

        # check size matches
        if size > len(self._buf):
            raise CorruptionError(
                "File size 0x{:x} does not match header 0x{:x} (delta 0x{:x})".
                format(len(self._buf), size,
                       len(self._buf) - size))

        # check version
        if version_major != 1 or version_minor != 3:
            raise NotSupportedError("Version {}.{} not supported".format(
                version_major, version_minor))

        # chained cabs not supported
        if idx_cabinet != 0:
            raise NotSupportedError("Chained cab file not supported")

        # verify we actually have data
        if nr_files == 0:
            raise CorruptionError("The cab file is empty")

        # verify we got complete data
        if off_cffile > len(self._buf):
            raise CorruptionError("Cab file corrupt")

        # reserved sizes
        if flags & 0x0004:
            try:
                (rsvd_hdr, rsvd_folder,
                 rsvd_block) = struct.unpack_from(FMT_CFHEADER_RESERVE,
                                                  self._buf, offset)
            except struct.error as e:
                raise CorruptionError from e
            offset += struct.calcsize(FMT_CFHEADER_RESERVE)
            self._header_reserved = buf[offset:offset + rsvd_hdr]
            offset += rsvd_hdr
            self._rsvd_block = rsvd_block
        else:
            rsvd_folder = 0
            self._rsvd_block = 0

        # read this so we can do round-trip
        self.cfarchive.set_id = set_id

        # parse CFFOLDER
        for i in range(nr_folders):
            self.parse_cffolder(i, offset)
            offset += struct.calcsize(FMT_CFFOLDER) + rsvd_folder

        # parse CFFILEs
        for i in range(0, nr_files):
            off_cffile += self.parse_cffile(off_cffile)

        # allow reuse
        self._zdict = None