def parse_cffolder(self, idx: int, offset: int) -> None: """Parse a CFFOLDER entry""" fmt = "<I" # offset to CFDATA fmt += "H" # number of CFDATA blocks fmt += "H" # compression type try: (offset, ndatab, compression) = struct.unpack_from(fmt, self._buf, offset) compression &= COMPRESSION_MASK_TYPE except struct.error as e: raise CorruptionError from e # no data blocks? if ndatab == 0: raise CorruptionError("No CFDATA blocks") # no compression is supported if compression not in [COMPRESSION_TYPE_NONE, COMPRESSION_TYPE_MSZIP]: if compression == COMPRESSION_TYPE_QUANTUM: raise NotSupportedError("Quantum compression not supported") if compression == COMPRESSION_TYPE_LZX: raise NotSupportedError("LZX compression not supported") raise NotSupportedError( "Compression type 0x{:x} not supported".format(compression)) # parse CDATA self._folder_data.append(bytearray()) for _ in range(ndatab): offset += self.parse_cfdata(idx, offset, compression)
def parse_cfdata(self, idx: int, offset: int, compression: int) -> int: """Parse a CFDATA entry""" fmt = "<I" # checksum fmt += "H" # compressed bytes fmt += "H" # uncompressed bytes try: (checksum, blob_comp, blob_uncomp) = struct.unpack_from(fmt, self._buf, offset) except struct.error as e: raise CorruptionError from e if compression == COMPRESSION_TYPE_NONE and blob_comp != blob_uncomp: raise CorruptionError("Mismatched data %i != %i" % (blob_comp, blob_uncomp)) hdr_sz = struct.calcsize(fmt) + self._rsvd_block buf_cfdata = self._buf[offset + hdr_sz:offset + hdr_sz + blob_comp] # verify checksum if checksum != 0: checksum_actual = _checksum_compute(buf_cfdata) hdr = bytearray(struct.pack("<HH", blob_comp, blob_uncomp)) checksum_actual = _checksum_compute(hdr, checksum_actual) if checksum_actual != checksum: raise CorruptionError( "Invalid checksum at {:x}, expected {:x}, got {:x}".format( offset, checksum, checksum_actual)) # decompress Zlib data after removing *another* header... if compression == COMPRESSION_TYPE_MSZIP: if buf_cfdata[:2] != b"CK": raise CorruptionError("Compression header invalid {}".format( buf_cfdata[:2].decode())) assert self._zdict is not None decompress = zlib.decompressobj(-zlib.MAX_WBITS, zdict=self._zdict) try: buf = decompress.decompress(buf_cfdata[2:]) buf += decompress.flush() except zlib.error as e: raise CorruptionError("Failed to decompress") from e self._zdict = buf else: buf = buf_cfdata assert len(buf) == blob_uncomp self._folder_data[idx] += buf return blob_comp + hdr_sz
def _parse_cfdata(self, idx, offset, is_zlib): """ Parse a CFDATA entry """ fmt = '<I' # checksum fmt += 'H' # compressed bytes fmt += 'H' # uncompressed bytes try: vals = struct.unpack_from(fmt, self._buf_file, offset) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print("CFDATA", vals) if not is_zlib and vals[1] != vals[2]: raise CorruptionError('Mismatched data %i != %i' % (vals[1], vals[2])) hdr_sz = struct.calcsize(fmt) newbuf = self._buf_file[offset + hdr_sz:offset + hdr_sz + vals[1]] # decompress Zlib data after removing *another* header... if is_zlib: if newbuf[0] != 'C' or newbuf[1] != 'K': raise CorruptionError('Compression header invalid') decompress = zlib.decompressobj(-zlib.MAX_WBITS) try: buf = decompress.decompress(newbuf[2:]) buf += decompress.flush() except zlib.error as e: raise CorruptionError('Failed to decompress: ' + str(e)) else: buf = newbuf # check checksum if vals[0] != 0: checksum = _checksum_compute(newbuf) hdr = bytearray(struct.pack('<HH', len(newbuf), len(buf))) checksum = _checksum_compute(hdr, checksum) if checksum != vals[0]: raise CorruptionError('Invalid checksum', offset, vals[0], checksum) assert len(buf) == vals[2] self._folder_data[idx] += buf return vals[1] + hdr_sz
def parse_cffile(self, offset: int) -> int: """Parse a CFFILE entry""" fmt = "<I" # uncompressed size fmt += "I" # uncompressed offset of this file in the folder fmt += "H" # index into the CFFOLDER area fmt += "H" # date fmt += "H" # time fmt += "H" # attribs try: (usize, uoffset, index, date, time, fattr) = struct.unpack_from(fmt, self._buf, offset) except struct.error as e: raise CorruptionError from e # parse filename offset += struct.calcsize(fmt) filename = "" for i in range(0, 255): if self._buf[offset + i] == 0x0: filename = self._buf[offset:offset + i].decode() break # add file f = CabFile() f._date_decode(date) f._time_decode(time) f._attr_decode(fattr) try: f.buf = bytes(self._folder_data[index][uoffset:uoffset + usize]) except IndexError as e: raise CorruptionError( "Failed to get buf for {}".format(filename)) from e if len(f) != usize: raise CorruptionError( "Corruption inside archive, %s is size %i but " "expected size %i" % (filename, len(f), usize)) if self.flattern: filename = ntpath.basename(filename) self.cfarchive[filename] = f # return offset to next entry return 16 + i + 1
def _parse_cffile(self, offset): """ Parse a CFFILE entry """ fmt = '<I' # uncompressed size fmt += 'I' # uncompressed offset of this file in the folder fmt += 'H' # index into the CFFOLDER area fmt += 'H' # date fmt += 'H' # time fmt += 'H' # attribs try: vals = struct.unpack_from(fmt, self._buf_file, offset) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print("CFFILE", vals) # parse filename offset += struct.calcsize(fmt) filename = '' for i in range(0, 255): filename_c = self._buf_file[offset + i] if filename_c == 0: break filename += chr(filename_c) # add file f = CabFile(filename) f._date_decode(vals[3]) f._time_decode(vals[4]) f._attr_decode(vals[5]) f.contents = self._folder_data[vals[2]][vals[1]:vals[1] + vals[0]] if len(f.contents) != vals[0]: raise CorruptionError( "Corruption inside archive, %s is size %i but " "expected size %i" % (filename, len(f.contents), vals[0])) self.files.append(f) # return offset to next entry return 16 + len(filename) + 1
def _parse_cffolder(self, idx, offset): """ Parse a CFFOLDER entry """ fmt = '<I' # offset to CFDATA fmt += 'H' # number of CFDATA blocks fmt += 'H' # compression type try: vals = struct.unpack_from(fmt, self._buf_file, offset) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print("CFFOLDER", vals) # no data blocks? if vals[1] == 0: raise CorruptionError('No CFDATA blocks') # no compression is supported if vals[2] == 0: is_zlib = False elif vals[2] == 1: is_zlib = True else: raise NotSupportedError('Compression type not supported') # not supported if is_zlib and self._is_multi_folder: raise NotSupportedError( "Compression unsupported in multi-folder archive: " "set FolderSizeThreshold=0 in the .ddf file") # parse CDATA self._folder_data.append(bytearray()) offset = vals[0] for _ in range(vals[1]): offset += self._parse_cfdata(idx, offset, is_zlib)
def parse(self, buf): """ Parse .cab data """ # slurp the whole buffer at once self._buf_file = buf # read the file header fmt = '<4s' # signature fmt += 'xxxx' # reserved1 fmt += 'I' # size fmt += 'xxxx' # reserved2 fmt += 'I' # offset to CFFILE fmt += 'xxxx' # reserved3 fmt += 'BB' # version minor, major fmt += 'H' # no of CFFOLDERs fmt += 'H' # no of CFFILEs fmt += 'H' # flags fmt += 'H' # setID fmt += 'H' # cnt of cabs in set # fmt += 'H' # reserved cab size # fmt += 'B' # reserved folder size # fmt += 'B' # reserved block size # fmt += 'B' # per-cabinet reserved area try: vals = struct.unpack_from(fmt, self._buf_file, 0) except struct.error as e: raise CorruptionError(str(e)) # debugging if os.getenv('PYTHON_CABARCHIVE_DEBUG'): print("CFHEADER", vals) # check magic bytes if vals[0] != b'MSCF': raise NotSupportedError( 'Data is not application/vnd.ms-cab-compressed') # check size matches if vals[1] != len(self._buf_file): raise CorruptionError('Cab file internal size does not match data') # check version if vals[4] != 1 or vals[3] != 3: raise NotSupportedError('Version %i.%i not supported' % (vals[4], vals[3])) # chained cabs not supported if vals[9] != 0: raise NotSupportedError('Chained cab file not supported') # verify we actually have data nr_files = vals[6] if nr_files == 0: raise CorruptionError('The cab file is empty') # verify we got complete data off_cffile = vals[2] if off_cffile > len(self._buf_file): raise CorruptionError('Cab file corrupt') # chained cabs not supported if vals[7] != 0: raise CorruptionError('Expected header flags to be cleared') # read this so we can do round-trip self.set_id = vals[8] # we don't support compressed folders in multi-folder archives if vals[5] > 1: self._is_multi_folder = True # parse CFFOLDER offset = struct.calcsize(fmt) for i in range(vals[5]): self._parse_cffolder(i, offset) offset += struct.calcsize(FMT_CFFOLDER) # parse CFFILEs for i in range(0, nr_files): off_cffile += self._parse_cffile(off_cffile)
def parse(self, buf: bytes) -> None: # used as internal state self._buf = buf if self._zdict is None: self._zdict = b"" offset: int = 0 # read the file header fmt = "<4s" # signature fmt += "xxxx" # reserved1 fmt += "I" # size fmt += "xxxx" # reserved2 fmt += "I" # offset to CFFILE fmt += "xxxx" # reserved3 fmt += "BB" # version minor, major fmt += "H" # no of CFFOLDERs fmt += "H" # no of CFFILEs fmt += "H" # flags fmt += "H" # setID fmt += "H" # cnt of cabs in set try: ( signature, size, off_cffile, version_minor, version_major, nr_folders, nr_files, flags, set_id, idx_cabinet, ) = struct.unpack_from(fmt, self._buf, 0) except struct.error as e: raise CorruptionError from e offset += struct.calcsize(fmt) # check magic bytes if signature != b"MSCF": raise NotSupportedError( "Data is not application/vnd.ms-cab-compressed") # check size matches if size > len(self._buf): raise CorruptionError( "File size 0x{:x} does not match header 0x{:x} (delta 0x{:x})". format(len(self._buf), size, len(self._buf) - size)) # check version if version_major != 1 or version_minor != 3: raise NotSupportedError("Version {}.{} not supported".format( version_major, version_minor)) # chained cabs not supported if idx_cabinet != 0: raise NotSupportedError("Chained cab file not supported") # verify we actually have data if nr_files == 0: raise CorruptionError("The cab file is empty") # verify we got complete data if off_cffile > len(self._buf): raise CorruptionError("Cab file corrupt") # reserved sizes if flags & 0x0004: try: (rsvd_hdr, rsvd_folder, rsvd_block) = struct.unpack_from(FMT_CFHEADER_RESERVE, self._buf, offset) except struct.error as e: raise CorruptionError from e offset += struct.calcsize(FMT_CFHEADER_RESERVE) self._header_reserved = buf[offset:offset + rsvd_hdr] offset += rsvd_hdr self._rsvd_block = rsvd_block else: rsvd_folder = 0 self._rsvd_block = 0 # read this so we can do round-trip self.cfarchive.set_id = set_id # parse CFFOLDER for i in range(nr_folders): self.parse_cffolder(i, offset) offset += struct.calcsize(FMT_CFFOLDER) + rsvd_folder # parse CFFILEs for i in range(0, nr_files): off_cffile += self.parse_cffile(off_cffile) # allow reuse self._zdict = None