def _read(self, fp: BinaryIO): self.numfiles = read_uint64(fp) self.files = [{'emptystream': False} for _ in range(self.numfiles)] numemptystreams = 0 while True: typ = read_uint64(fp) if typ > 255: raise Bad7zFile('invalid type, must be below 256, is %d' % typ) prop = struct.pack(b'B', typ) if prop == Property.END: break size = read_uint64(fp) if prop == Property.DUMMY: # Added by newer versions of 7z to adjust padding. fp.seek(size, os.SEEK_CUR) continue buffer = io.BytesIO(fp.read(size)) if prop == Property.EMPTY_STREAM: isempty = read_boolean(buffer, self.numfiles) list(map(lambda x, y: x.update({'emptystream': y}), self.files, isempty)) for x in isempty: if x: numemptystreams += 1 self.emptyfiles = [False] * numemptystreams self.antifiles = [False] * numemptystreams elif prop == Property.EMPTY_FILE: self.emptyfiles = read_boolean(buffer, numemptystreams) elif prop == Property.ANTI: self.antifiles = read_boolean(buffer, numemptystreams) elif prop == Property.NAME: external = buffer.read(1) if external == b'\x00': self._read_name(buffer) else: self.dataindex = read_uint64(buffer) # try to read external data current_pos = fp.tell() fp.seek(self.dataindex, 0) self._read_name(buffer) fp.seek(current_pos, 0) elif prop == Property.CREATION_TIME: self._readTimes(buffer, self.files, 'creationtime') elif prop == Property.LAST_ACCESS_TIME: self._readTimes(buffer, self.files, 'lastaccesstime') elif prop == Property.LAST_WRITE_TIME: self._readTimes(buffer, self.files, 'lastwritetime') elif prop == Property.ATTRIBUTES: defined = read_boolean(buffer, self.numfiles, checkall=1) external = buffer.read(1) if external == b'\x00': self._read_attributes(buffer, defined) else: self.dataindex = read_uint64(buffer) # try to read external data current_pos = fp.tell() fp.seek(self.dataindex, 0) self._read_attributes(fp, defined) fp.seek(current_pos, 0) else: raise Bad7zFile('invalid type %r' % (prop))
def _read(self, fp: BinaryIO): numfiles = read_uint64(fp) self.files = [{'emptystream': False} for _ in range(numfiles)] numemptystreams = 0 while True: prop = fp.read(1) if prop == Property.END: break size = read_uint64(fp) if prop == Property.DUMMY: # Added by newer versions of 7z to adjust padding. fp.seek(size, os.SEEK_CUR) continue buffer = io.BytesIO(fp.read(size)) if prop == Property.EMPTY_STREAM: isempty = read_boolean(buffer, numfiles, checkall=False) list( map(lambda x, y: x.update({'emptystream': y}), self.files, isempty)) # type: ignore numemptystreams += isempty.count(True) elif prop == Property.EMPTY_FILE: self.emptyfiles = read_boolean(buffer, numemptystreams, checkall=False) elif prop == Property.ANTI: self.antifiles = read_boolean(buffer, numemptystreams, checkall=False) elif prop == Property.NAME: external = buffer.read(1) if external == b'\x00': self._read_name(buffer) else: dataindex = read_uint64(buffer) current_pos = fp.tell() fp.seek(dataindex, 0) self._read_name(fp) fp.seek(current_pos, 0) elif prop == Property.CREATION_TIME: self._read_times(buffer, 'creationtime') elif prop == Property.LAST_ACCESS_TIME: self._read_times(buffer, 'lastaccesstime') elif prop == Property.LAST_WRITE_TIME: self._read_times(buffer, 'lastwritetime') elif prop == Property.ATTRIBUTES: defined = read_boolean(buffer, numfiles, checkall=True) external = buffer.read(1) if external == b'\x00': self._read_attributes(buffer, defined) else: dataindex = read_uint64(buffer) # try to read external data current_pos = fp.tell() fp.seek(dataindex, 0) self._read_attributes(fp, defined) fp.seek(current_pos, 0) elif prop == Property.START_POS: self._read_start_pos(buffer) else: raise Bad7zFile('invalid type %r' % prop)
def _get_headerdata_from_streams(self, fp: BinaryIO, streams: StreamsInfo) -> BytesIO: """get header data from given streams.unpackinfo and packinfo. folder data are stored in raw data positioned in afterheader.""" buffer = io.BytesIO() src_start = self._start_pos for folder in streams.unpackinfo.folders: if folder.is_encrypted(): raise UnsupportedCompressionMethodError() uncompressed = folder.unpacksizes if not isinstance(uncompressed, (list, tuple)): uncompressed = [uncompressed] * len(folder.coders) compressed_size = streams.packinfo.packsizes[0] uncompressed_size = uncompressed[-1] src_start += streams.packinfo.packpos fp.seek(src_start, 0) decompressor = folder.get_decompressor(compressed_size) folder_data = decompressor.decompress( fp.read(compressed_size))[:uncompressed_size] src_start += uncompressed_size if folder.digestdefined: if folder.crc != calculate_crc32(folder_data): raise Bad7zFile('invalid block data') buffer.write(folder_data) buffer.seek(0, 0) return buffer
def _retrieve_coders_info(self, file: BinaryIO): pid = file.read(1) if pid != Property.CODERS_UNPACK_SIZE: raise Bad7zFile('coders unpack size id expected but %s found' % repr(pid)) for folder in self.folders: folder.unpacksizes = [read_uint64(file) for _ in range(folder.totalout)] pid = file.read(1) if pid == Property.CRC: defined = read_boolean(file, self.numfolders, checkall=True) crcs = read_crcs(file, self.numfolders) for idx, folder in enumerate(self.folders): folder.digestdefined = defined[idx] folder.crc = crcs[idx] pid = file.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % repr(pid))
def read(self, file: BinaryIO) -> None: pid = file.read(1) if pid == PROPERTY.PACK_INFO: self.packinfo = PackInfo.retrieve(file) pid = file.read(1) if pid == PROPERTY.UNPACK_INFO: self.unpackinfo = UnpackInfo.retrieve(file) pid = file.read(1) if pid == PROPERTY.SUBSTREAMS_INFO: if self.unpackinfo is None: raise Bad7zFile("Header is broken") self.substreamsinfo = SubstreamsInfo.retrieve( file, self.unpackinfo.numfolders, self.unpackinfo.folders) pid = file.read(1) if pid != PROPERTY.END: raise Bad7zFile("end id expected but %s found" % repr(pid)) # pragma: no-cover
def _retrieve_coders_info(self, file: BinaryIO): pid = file.read(1) if pid != Property.CODERS_UNPACK_SIZE: raise Bad7zFile('coders unpack size id expected but %s found' % repr(pid)) # pragma: no-cover for folder in self.folders: for c in folder.coders: for _ in range(c['numoutstreams']): folder.unpacksizes.append(read_uint64(file)) pid = file.read(1) if pid == Property.CRC: defined = read_boolean(file, self.numfolders, checkall=True) crcs = read_crcs(file, self.numfolders) for idx, folder in enumerate(self.folders): folder.digestdefined = defined[idx] folder.crc = crcs[idx] pid = file.read(1) if pid != Property.END: raise Bad7zFile('end id expected but 0x{:02x} found at 0x{:08x}'.format(ord(pid), file.tell())) # pragma: no-cover # noqa
def _extract_header_info(self, fp: BinaryIO) -> None: pid = fp.read(1) if pid == Property.MAIN_STREAMS_INFO: self.main_streams = StreamsInfo.retrieve(fp) pid = fp.read(1) if pid == Property.FILES_INFO: self.files_info = FilesInfo.retrieve(fp) pid = fp.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % (repr(pid))) # pragma: no-cover
def _read(self, file: BinaryIO) -> None: file.seek(len(MAGIC_7Z), 0) self.version = read_bytes(file, 2) self.startheadercrc, _ = read_uint32(file) self.nextheaderofs, data = read_real_uint64(file) crc = calculate_crc32(data) self.nextheadersize, data = read_real_uint64(file) crc = calculate_crc32(data, crc) self.nextheadercrc, data = read_uint32(file) crc = calculate_crc32(data, crc) if crc != self.startheadercrc: raise Bad7zFile('invalid header data')
def read(self, file: BinaryIO) -> None: pid = file.read(1) if pid == Property.PACK_INFO: self.packinfo = PackInfo.retrieve(file) pid = file.read(1) if pid == Property.UNPACK_INFO: self.unpackinfo = UnpackInfo.retrieve(file) pid = file.read(1) if pid == Property.SUBSTREAMS_INFO: self.substreamsinfo = SubstreamsInfo.retrieve(file, self.unpackinfo.numfolders, self.unpackinfo.folders) pid = file.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % repr(pid))
def _read(self, file: BinaryIO): self.packpos = read_uint64(file) self.numstreams = read_uint64(file) pid = file.read(1) if pid == Property.SIZE: self.packsizes = [read_uint64(file) for _ in range(self.numstreams)] pid = file.read(1) if pid == Property.CRC: self.crcs = [read_uint64(file) for _ in range(self.numstreams)] pid = file.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % repr(pid)) self.packpositions = [sum(self.packsizes[:i]) for i in range(self.numstreams)] # type: List[int] return self
def _real_get_contents(self, fp: BinaryIO) -> None: if not self._check_7zfile(fp): raise Bad7zFile('not a 7z file') self.sig_header = SignatureHeader.retrieve(self.fp) self.afterheader = self.fp.tell() buffer = self._read_header_data() header = Header.retrieve(self.fp, buffer, self.afterheader) if header is None: return self.header = header buffer.close() self.files = ArchiveFileList() if getattr(self.header, 'files_info', None) is not None: self._filelist_retrieve()
def _read(self, file: BinaryIO) -> None: file.seek(len(MAGIC_7Z), 0) major_version = file.read(1) minor_version = file.read(1) self.version = (major_version, minor_version) self.startheadercrc, _ = read_uint32(file) self.nextheaderofs, data = read_real_uint64(file) crc = calculate_crc32(data) self.nextheadersize, data = read_real_uint64(file) crc = calculate_crc32(data, crc) self.nextheadercrc, data = read_uint32(file) crc = calculate_crc32(data, crc) if crc != self.startheadercrc: raise Bad7zFile("invalid header data")
def _read(self, fp: BinaryIO, buffer: BytesIO, start_pos: int, password) -> None: """ Decode header data or encoded header data from buffer. When buffer consist of encoded buffer, it get stream data from it and call itself recursively """ self._start_pos = start_pos fp.seek(self._start_pos) pid = buffer.read(1) if not pid: # empty archive return if pid == PROPERTY.HEADER: self._extract_header_info(buffer) return if pid != PROPERTY.ENCODED_HEADER: raise TypeError("Unknown field: %r" % id) # pragma: no-cover # get from encoded header streams = HeaderStreamsInfo.retrieve(buffer) buffer2 = io.BytesIO() src_start = self._start_pos for folder in streams.unpackinfo.folders: uncompressed = folder.unpacksizes if not isinstance(uncompressed, (list, tuple)): uncompressed = [uncompressed] * len(folder.coders) compressed_size = streams.packinfo.packsizes[0] uncompressed_size = uncompressed[-1] folder.password = password src_start += streams.packinfo.packpos fp.seek(src_start, 0) decompressor = folder.get_decompressor(compressed_size) remaining = uncompressed_size folder_data = bytearray() while remaining > 0: folder_data += decompressor.decompress(fp, max_length=remaining) remaining = uncompressed_size - len(folder_data) self.size += compressed_size src_start += compressed_size if folder.digestdefined: streams.packinfo.enable_digests = True if folder.crc != calculate_crc32(folder_data): raise Bad7zFile("invalid block data") buffer2.write(folder_data) buffer2.seek(0, 0) pid = buffer2.read(1) if pid != PROPERTY.HEADER: raise TypeError("Unknown field: %r" % pid) # pragma: no-cover self._extract_header_info(buffer2)
def _read(self, file: BinaryIO, numfolders: int, folders: List[Folder]): pid = file.read(1) if pid == PROPERTY.NUM_UNPACK_STREAM: self.num_unpackstreams_folders = [ read_uint64(file) for _ in range(numfolders) ] pid = file.read(1) else: self.num_unpackstreams_folders = [1] * numfolders if pid == PROPERTY.SIZE: self.unpacksizes = [] for i in range(len(self.num_unpackstreams_folders)): totalsize = 0 # type: int for j in range(1, self.num_unpackstreams_folders[i]): size = read_uint64(file) self.unpacksizes.append(size) totalsize += size self.unpacksizes.append(folders[i].get_unpack_size() - totalsize) pid = file.read(1) num_digests = 0 num_digests_total = 0 for i in range(numfolders): numsubstreams = self.num_unpackstreams_folders[i] if numsubstreams != 1 or not folders[i].digestdefined: num_digests += numsubstreams num_digests_total += numsubstreams if pid == PROPERTY.CRC: defined = read_boolean(file, num_digests, checkall=True) crcs = read_crcs(file, num_digests) didx = 0 for i in range(numfolders): folder = folders[i] numsubstreams = self.num_unpackstreams_folders[i] if numsubstreams == 1 and folder.digestdefined and folder.crc is not None: self.digestsdefined.append(True) self.digests.append(folder.crc) else: for j in range(numsubstreams): self.digestsdefined.append(defined[didx]) self.digests.append(crcs[didx]) didx += 1 pid = file.read(1) if pid != PROPERTY.END: raise Bad7zFile("end id expected but %r found" % pid) # pragma: no-cover if not self.digestsdefined: self.digestsdefined = [False] * num_digests_total self.digests = [0] * num_digests_total
def _extract_header_info(self, fp: BinaryIO) -> None: pid = fp.read(1) if pid == Property.ARCHIVE_PROPERTIES: self.properties = ArchiveProperties.retrieve(fp) pid = fp.read(1) if pid == Property.ADDITIONAL_STREAMS_INFO: self.additional_streams = StreamsInfo.retrieve(fp) pid = fp.read(1) if pid == Property.MAIN_STREAMS_INFO: self.main_streams = StreamsInfo.retrieve(fp) pid = fp.read(1) if pid == Property.FILES_INFO: self.files_info = FilesInfo.retrieve(fp) pid = fp.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % (repr(pid)))
def _read(self, file: BinaryIO): pid = file.read(1) if pid != Property.FOLDER: raise Bad7zFile('folder id expected but %s found' % repr(pid)) self.numfolders = read_uint64(file) self.folders = [] external = read_byte(file) if external == 0x00: self.folders = [Folder.retrieve(file) for _ in range(self.numfolders)] else: datastreamidx = read_uint64(file) current_pos = file.tell() file.seek(datastreamidx, 0) self.folders = [Folder.retrieve(file) for _ in range(self.numfolders)] file.seek(current_pos, 0) self._retrieve_coders_info(file)
def _read(self, file: BinaryIO): self.packpos = read_uint64(file) self.numstreams = read_uint64(file) pid = file.read(1) if pid == Property.SIZE: self.packsizes = [read_uint64(file) for _ in range(self.numstreams)] pid = file.read(1) if pid == Property.CRC: self.enable_digests = True self.digestdefined = read_boolean(file, self.numstreams, True) for crcexist in self.digestdefined: if crcexist: self.crcs.append(read_uint32(file)[0]) pid = file.read(1) if pid != Property.END: raise Bad7zFile('end id expected but %s found' % repr(pid)) # pragma: no-cover # noqa self.packpositions = [sum(self.packsizes[:i]) for i in range(self.numstreams + 1)] # type: List[int] return self
def _read(self, file: BinaryIO): pid = file.read(1) if pid != PROPERTY.FOLDER: raise Bad7zFile("folder id expected but %s found" % repr(pid)) # pragma: no-cover self.numfolders = read_uint64(file) self.folders = [] external = read_byte(file) if external == 0x00: self.folders = [ Folder.retrieve(file) for _ in range(self.numfolders) ] else: # pragma: no-cover # there is no live example datastreamidx = read_uint64(file) current_pos = file.tell() file.seek(datastreamidx, 0) self.folders = [ Folder.retrieve(file) for _ in range(self.numfolders) ] file.seek(current_pos, 0) self._retrieve_coders_info(file)
def _read_header_data(self) -> BytesIO: self.fp.seek(self.sig_header.nextheaderofs, os.SEEK_CUR) buffer = io.BytesIO(self.fp.read(self.sig_header.nextheadersize)) if self.sig_header.nextheadercrc != calculate_crc32(buffer.getvalue()): raise Bad7zFile('invalid header data') return buffer