def base_test_failure_bad_zip(self): resolve_bundle_mock = MagicMock(return_value=(self.bundle_file_name, self.bundle_file)) stderr = MagicMock() bundle_open_mock = MagicMock( side_effect=BadZipFile('test bad zip error')) with patch('conductr_cli.resolver.resolve_bundle', resolve_bundle_mock), \ patch('conductr_cli.conduct_load.open_bundle', bundle_open_mock): logging_setup.configure_logging(MagicMock(**self.default_args), err_output=stderr) result = conduct_load.load(MagicMock(**self.default_args)) self.assertFalse(result) resolve_bundle_mock.assert_called_with(self.custom_settings, self.bundle_resolve_cache_dir, self.bundle_file, self.offline_mode) bundle_open_mock.assert_called_with( self.bundle_file_name, self.bundle_file, bundle_utils.conf(self.bundle_file)) self.assertEqual( as_error( strip_margin( """|Error: Problem with the bundle: test bad zip error |""")), self.output(stderr))
def test_parse_when_zipfile_fails(self, mock_file_parser, mock_zipfile): mock_file_parser.is_zip_file.return_value = True mock_file_parser.return_value.parse.return_value = any_file() mock_zipfile.side_effect = BadZipFile() with self.assertRaises(BadZipFile): ApkParser().parse("any-file-path", extended_processing=False)
def __download_climate_observations_data(remote_file: str) -> bytes: try: zip_file = download_file_from_dwd(remote_file) except InvalidURL as e: raise InvalidURL( f"Error: the station data {remote_file} could not be reached." ) from e except Exception: raise FailedDownload(f"Download failed for {remote_file}") try: zip_file_opened = ZipFile(zip_file) # Files of archive archive_files = zip_file_opened.namelist() for file in archive_files: # If found file load file in bytes, close zipfile and return bytes if file.startswith(PRODUCT_FILE_IDENTIFIER): file_in_bytes = zip_file_opened.open(file).read() zip_file_opened.close() return file_in_bytes # If whatsoever no file was found and returned already throw exception raise ProductFileNotFound( f"The archive of {remote_file} does not hold a 'produkt' file.") except BadZipFile as e: raise BadZipFile( f"The archive of {remote_file} seems to be corrupted.") from e
def extract_zip(self): assert self.FILE_COUNT>0 try: with zipfile.ZipFile(self.archive_path, "r") as zip: namelist = zip.namelist() print("namelist():", namelist) if len(namelist) != self.FILE_COUNT: msg = ( "Wrong archive content?!?" " There exists %i files, but it should exist %i." "Existing names are: %r" ) % (len(namelist), self.FILE_COUNT, namelist) log.error(msg) raise RuntimeError(msg) for filename in namelist: content = zip.read(filename) dst = self.file_rename(filename) out_filename=os.path.join(self.ROM_PATH, dst) with open(out_filename, "wb") as f: f.write(content) if dst == filename: print("%r extracted" % out_filename) else: print("%r extracted to %r" % (filename, out_filename)) self.post_processing(out_filename) except BadZipFile as err: msg = "Error extracting archive %r: %s" % (self.archive_path, err) log.error(msg) raise BadZipFile(msg)
def open(self, zinfo): zef_file = self.fp if not zef_file: raise RuntimeError( "Attempt to read ZIP archive that was already closed") zef_file.seek(zinfo.header_offset, 0) # Skip the file header: fheader = zef_file.read(sizeFileHeader) if len(fheader) != sizeFileHeader: raise BadZipFile("Truncated file header") fheader = struct.unpack(structFileHeader, fheader) if fheader[_FH_SIGNATURE] != stringFileHeader: raise BadZipFile("Bad magic number for file header") fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) if fheader[_FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) if zinfo.flag_bits & 0x20: # Zip 2.7: compressed patched data raise NotImplementedError("compressed patched data (flag bit 5)") if zinfo.flag_bits & 0x40: # strong encryption raise NotImplementedError("strong encryption (flag bit 6)") if zinfo.flag_bits & _UTF8_EXTENSION_FLAG: # UTF-8 filename fname_str = fname.decode("utf-8") else: fname_str = fname.decode("cp437") if fname_str != zinfo.orig_filename: raise BadZipFile( 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) if sys.version_info[:2] < (2, 7): return _ZipExtFile(zef_file, zinfo) elif sys.version_info[:2] < (3, 4) and sys.platform == 'win32': return ZipExtFile(zef_file, 'r', zinfo) else: return ZipExtFile(zef_file, 'r', zinfo, None, close_fileobj=False)
def load(self, filepath: PathLike, pwd: OptBytes = None) -> None: """Load Workspace from file. Args: filepath: String or :term:`path-like object`, that points to a valid workspace file. If the filepath points to a valid workspace file, then the class instance is initialized with a memory copy of the file. If the given file, however, does not exist, isn't a valid ZipFile, or does not contain a workspace configuration, respectively one of the errors FileNotFoundError, BadZipFile or BadWsFile is raised. pwd: Bytes representing password of workspace file. """ # Initialize instance Variables, Buffer and buffered ZipFile self._changed = False self._path = env.expand(filepath) self._pwd = pwd self._buffer = BytesIO() self._file = ZipFile(self._buffer, mode='w') # Copy contents from ZipFile to buffered ZipFile with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) try: with ZipFile(self.path, mode='r') as fh: for zinfo in fh.infolist(): data = fh.read(zinfo, pwd=pwd) # TODO ([email protected]): The zipfile standard # module currently does not support encryption in write # mode of new ZipFiles. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line for writing # files shall be replaced by: # self._file.writestr(zinfo, data, pwd=pwd) self._file.writestr(zinfo, data) except FileNotFoundError as err: raise FileNotFoundError( f"file '{self.path}' does not exist") from err except BadZipFile as err: raise BadZipFile( f"file '{self.path}' is not a valid ZIP file") from err # Try to open and load workspace configuration from buffer structure = { 'dc': self._get_attr_types(group='dc'), 'hooks': self._get_attr_types(category='hooks')} try: with self.open(self._config_file) as file: cfg = inifile.load(file, structure=structure) except KeyError as err: raise BadWsFile( f"workspace '{self.path}' is not valid: " f"file '{self._config_file}' could not be loaded") from err # Link configuration self._set_attr_values(cfg.get('dc', {}), group='dc') # type: ignore
def perform_download(): r = requests.get(queue_base + 'task/%s/artifacts/%s' % (task_id, artifact_name), stream=True) with open(artifact_path, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) if artifact_path.endswith('.zip') and not is_zipfile(artifact_path): raise BadZipFile('File is not a zip file')
def unzip(self, overwrite: bool = False): if self.zip_content and not overwrite: raise FileExistsError(self.tmpdir.abspath()) logger.debug('unzipping miz to temp dir') try: with ZipFile(self.miz_path.abspath()) as zip_file: logger.debug('reading infolist') self.zip_content = [f.filename for f in zip_file.infolist()] for item in zip_file.infolist(): # not using ZipFile.extractall() for security reasons assert isinstance(item, ZipInfo) logger.debug('unzipping item: {}'.format(item)) try: zip_file.extract(item, self.tmpdir.abspath()) except: logger.error('failed to extract archive member: {}'.format(item)) raise except BadZipFile: raise BadZipFile(self.miz_path.abspath()) except: logger.exception('error while unzipping miz file: {}'.format(self.miz_path.abspath())) raise logger.debug('checking miz content') # noinspection PyTypeChecker for miz_item in map( join, [self.tmpdir.abspath()], [ 'mission', 'options', 'warehouses', 'l10n/DEFAULT/dictionary', 'l10n/DEFAULT/mapResource' ]): if not exists(miz_item): logger.error('missing file in miz: {}'.format(miz_item)) raise FileNotFoundError(miz_item) for filename in self.zip_content: p = self.tmpdir.joinpath(filename) if not p.exists(): raise FileNotFoundError(p.abspath()) logger.debug('all files have been found, miz successfully unzipped')
def perform_download() -> None: r = requests.get(url, stream=True) r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=1048576): f.write(chunk) if path.endswith(".zip") and not is_zipfile(path): raise BadZipFile("File is not a zip file")
def _open_zip(self, filepath: str) -> ZipFile: try: return ZipFile(filepath, mode='r') except FileNotFoundError: raise FileNotFoundError(f'Zip archive {filepath} was not found') except BadZipFile: raise BadZipFile(f'File: {filepath} is not ZIP archive') except LargeZipFile: raise LargeZipFile( f'ZIP file {filepath} Too large for open, MAX size 4GB')
def perform_download(): r = requests.get(url, stream=True) r.raise_for_status() with open(artifact_path, "wb") as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) if artifact_path.endswith(".zip") and not is_zipfile(artifact_path): raise BadZipFile("File is not a zip file")
def perform_download(): r = requests.get( queue_base + "task/{}/artifacts/{}".format(task_id, artifact_name), stream=True, ) r.raise_for_status() with open(artifact_path, "wb") as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) if artifact_path.endswith(".zip") and not is_zipfile(artifact_path): raise BadZipFile("File is not a zip file")
def _extract_zip(self) -> None: """ 解压缩当前文档的 zip 文件到 work_dir,以 guid 为子文件夹名称 """ self.note_extract_dir = self.documents_dir.joinpath(self.guid) # 如果目标文件夹已经存在,就不解压了 if self.note_extract_dir.exists(): # logger.info(f'{self.note_extract_dir!s} |{self.title}| 已经存在,跳过。') return try: zip_file = ZipFile(self.note_file) zip_file.extractall(self.note_extract_dir) except BadZipFile as e: msg = f'ZIP 文件错误,可能是需要密码。 {self.note_file!s} |{self.title}|' raise BadZipFile(msg)
def _download_climate_observations_data_parallel( remote_file: Union[str, Path]) -> BytesIO: """ This function downloads the station data for which the link is provided by the 'select_dwd' function. It checks the shortened filepath (just the zipfile) for its parameters, creates the full filepath and downloads the file(s) according to the set up folder. Args: remote_file: contains path to file that should be downloaded and the path to the folder to store the files Returns: stores data on local file system """ try: zip_file = download_file_from_dwd(remote_file, DWDCDCBase.CLIMATE_OBSERVATIONS) except InvalidURL as e: raise InvalidURL( f"Error: the station data {remote_file} couldn't be reached." ) from e except Exception: raise FailedDownload(f"Download failed for {remote_file}") try: zip_file_opened = ZipFile(zip_file) # Files of archive archive_files = zip_file_opened.namelist() for file in archive_files: # If found file load file in bytes, close zipfile and return bytes if file.startswith(PRODUCT_FILE_IDENTIFIER): file_in_bytes = BytesIO(zip_file_opened.open(file).read()) zip_file_opened.close() return file_in_bytes # If whatsoever no file was found and returned already throw exception raise ProductFileNotFound( f"The archive of {remote_file} does not hold a 'produkt' file.") except BadZipFile as e: raise BadZipFile( f"The archive of {remote_file} seems to be corrupted.") from e
def extract_zip(self): assert self.FILE_COUNT>0 try: with zipfile.ZipFile(self.archive_path, "r") as zip: content = zip.read("ROMS/6809/EXT_BASIC_NO_USING.hex") out_filename=os.path.join(self.ROM_PATH, "EXT_BASIC_NO_USING.hex") with open(out_filename, "wb") as f: f.write(content) print("%r extracted" % out_filename) self.post_processing(out_filename) except BadZipFile as err: msg = "Error extracting archive %r: %s" % (self.archive_path, err) log.error(msg) raise BadZipFile(msg)
def convert_to_cbz(self) -> None: """ Takes all of the previously downloaded pages and compresses them in a .cbz file, erasing them afterwards. """ with ZipFile("{}.cbz".format(self.name), mode="a") as cbz_file: images = os.listdir(self.name) for image in images: image_location = "{}/{}".format(self.name, image) cbz_file.write(image_location, image) os.remove(image_location) os.rmdir(self.name) if cbz_file.testzip() is not None: raise BadZipFile( "Error while testing the archive; it might be corrupted." )
def _add(self, zipinfo): d = self if zipinfo.filename.endswith('/'): # directory parts = zipinfo.filename.rstrip('/').split('/') filename = '' for part in parts: filename += part + '/' d = d.directories.setdefault(part, ZipTree(filename)) else: # file parts = zipinfo.filename.split('/') basename = parts.pop() for part in parts: d = d.directories.setdefault(part, ZipTree()) if basename not in d.files: d.files[basename] = zipinfo else: raise BadZipFile('duplicate file entry in zipfile')
def clean_file(self): data = self.cleaned_data["file"] try: zipfile = ZipFile(data) interesting_files = { x for x in zipfile.namelist() if x.lower()[-4:] in (".shp", ".shx", ".dbf") } extensions = sorted([x.lower()[-4:] for x in interesting_files]) if extensions != [".dbf", ".shp", ".shx"]: raise BadZipFile() except BadZipFile: raise forms.ValidationError( "This is not a zip file, or it doesn't contain exactly one .shp, .shx " "and .dbf file.") return data
def unzip(self, overwrite: bool = False): """ Flattens a MIZ file into the temp dir Args: overwrite: allow overwriting exiting files """ if self.zip_content and not overwrite: raise FileExistsError(str(self.temp_dir)) LOGGER.debug('unzipping miz to temp dir') try: with ZipFile(str(self.miz_path)) as zip_file: LOGGER.debug('reading infolist') self.zip_content = [f.filename for f in zip_file.infolist()] self._extract_files_from_zip(zip_file) except BadZipFile: raise BadZipFile(str(self.miz_path)) except: # noqa: E722 LOGGER.exception('error while unzipping miz file: %s', self.miz_path) raise LOGGER.debug('checking miz content') # noinspection PyTypeChecker for miz_item in [ 'mission', 'options', 'warehouses', 'l10n/DEFAULT/dictionary', 'l10n/DEFAULT/mapResource' ]: if not Path(self.temp_dir.joinpath(miz_item)).exists(): LOGGER.error('missing file in miz: %s', miz_item) raise FileNotFoundError(miz_item) self._check_extracted_content() LOGGER.debug('all files have been found, miz successfully unzipped')
def most_recent_tfidf(): list_of_files = glob.glob( res_path + '/models/tfidf/tfidf_matrix_*.npz' ) # * means all if need specific format then *.csv latest_file = max(list_of_files, key=os.path.getmtime) retries = 0 while True: retries += 1 try: npz = scipy.sparse.load_npz(latest_file) break except (AttributeError, BadZipFile) as e: print('Issue when loading tfidf matrix, retrying in 10 sec', e) sleep(10) if retries > 5: raise BadZipFile( 'Issue when loading tfidf matrix, over limit retry counter') return npz
def load(self, filepath: PathLike, pwd: OptBytes = None) -> None: """Load Workspace from file. Args: filepath: String or :term:`path-like object`, that points to a valid ZipFile file. If the filepath points to a valid ZipFile, then the class instance is initialized with a memory copy of the file. If the given file, however, does not exist or isn't a valid ZipFile respectively one of the errors FileNotFoundError or BadZipFile is raised. pwd: Bytes representing password of ZipFile. """ # Initialize instance Variables, Buffer and buffered ZipFile self._changed = False self._path = env.expand(filepath) self._pwd = pwd self._buffer = io.BytesIO() self._file = ZipFile(self._buffer, mode='w') if not self._path: raise ValueError('') # TODO # Copy contents from ZipFile to buffered ZipFile with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) try: with ZipFile(self._path, mode='r') as fh: for zinfo in fh.infolist(): data = fh.read(zinfo, pwd=pwd) # TODO ([email protected]): The zipfile standard # module currently does not support encryption in write # mode of new ZipFiles. See: # https://docs.python.org/3/library/zipfile.html # When support is provided, the below line for writing # files shall be replaced by: # self._file.writestr(zinfo, data, pwd=pwd) self._file.writestr(zinfo, data) except FileNotFoundError as err: raise FileNotFoundError( f"file '{self.path}' does not exist") from err except BadZipFile as err: raise BadZipFile( f"file '{self.path}' is not a valid ZIP file") from err
def __download_climate_observations_data(remote_file: str) -> bytes: try: file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES) except InvalidURL as e: raise InvalidURL(f"Error: the station data {remote_file} could not be reached.") from e except Exception: raise FailedDownload(f"Download failed for {remote_file}") try: zfs = ZipFileSystem(file) except BadZipFile as e: raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e product_file = zfs.glob("produkt*") if len(product_file) != 1: raise ProductFileNotFound(f"The archive of {remote_file} does not hold a 'produkt' file.") return zfs.open(product_file[0]).read()
def extract_zip(self): assert self.FILE_COUNT > 0 try: with zipfile.ZipFile(self.archive_path, "r") as zip: namelist = zip.namelist() print("namelist():", namelist) if namelist != self.ARCHIVE_NAMES: msg = ("Wrong archive content?!?" " namelist should be: %r") % self.ARCHIVE_NAMES log.error(msg) raise RuntimeError(msg) zip.extractall(path=self.ROM_PATH) except BadZipFile as err: msg = "Error extracting archive %r: %s" % (self.archive_path, err) log.error(msg) raise BadZipFile(msg) hex2bin(src=os.path.join(self.ROM_PATH, "ExBasROM.hex"), dst=self.rom_path, verbose=False)
def get_zip_infos(self, *filenames): """Read in the table of contents for the ZIP file.""" fp = self.fp max_file_count = self.max_file_count if not fp: raise RuntimeError( "Attempt to read ZIP archive that was already closed") filenames = set(filenames) if len(filenames) == 0: return try: endrec = _EndRecData(fp) except OSError: raise BadZipFile("File is not a zip file") if not endrec: raise BadZipFile("File is not a zip file") size_cd = endrec[_ECD_SIZE] # bytes in central directory offset_cd = endrec[_ECD_OFFSET] # offset of central directory # "concat" is zero, unless zip was concatenated to another file concat = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) # start_dir: Position of start of central directory start_dir = offset_cd + concat fp.seek(start_dir, 0) data = fp.read(size_cd) fp = BytesIO(data) total = 0 file_count = 0 while total < size_cd: centdir = fp.read(sizeCentralDir) if len(centdir) != sizeCentralDir: raise BadZipFile("Truncated central directory") centdir = struct.unpack(structCentralDir, centdir) if centdir[_CD_SIGNATURE] != stringCentralDir: raise BadZipFile("Bad magic number for central directory") filename = fp.read(centdir[_CD_FILENAME_LENGTH]) flags = centdir[5] if flags & _UTF8_EXTENSION_FLAG: # UTF-8 file names extension filename = filename.decode('utf-8') else: # Historical ZIP filename encoding filename = filename.decode('cp437') # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = centdir[1:12] if x.extract_version > MAX_EXTRACT_VERSION: raise NotImplementedError("zip file version %.1f" % (x.extract_version / 10)) x.volume, x.internal_attr, x.external_attr = centdir[15:18] # Convert date/time code to (year, month, day, hour, min, sec) x._raw_time = t x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2) x._decodeExtra() x.header_offset = x.header_offset + concat # update total bytes read from central directory total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) file_count += 1 if max_file_count is not None and file_count > max_file_count: raise TooManyFiles('Too many files in egg') if x.filename in filenames: filenames.discard(x.filename) yield x if len(filenames) == 0: return
def open(self, name, mode="r", pwd=None, *, force_zip64=False): """ Returns file-like object for 'name'. @param name is a string for the file name within the ZIP file, or a ZipInfo object. @param mode should be 'r' to read a file already in the ZIP file, or 'w' to write to a file newly added to the archive. @param pwd is the password to decrypt files (only used for reading). When writing, if the file size is not known in advance but may exceed 2 GiB, pass force_zip64 to use the ZIP64 format, which can handle large files. If the size is known in advance, it is best to pass a ZipInfo instance for name, with zinfo.file_size set. """ if mode not in {"r", "w"}: raise ValueError('open() requires mode "r" or "w"') if pwd and not isinstance(pwd, bytes): raise TypeError("pwd: expected bytes, got %s" % type(pwd).__name__) if pwd and (mode == "w"): raise ValueError("pwd is only supported for reading files") if not self.fp: raise ValueError( "Attempt to use ZIP archive that was already closed") # Make sure we have an info object if isinstance(name, ZipInfo): # 'name' is already an info object zinfo = name elif mode == 'w': zinfo = ZipInfo(name) zinfo.compress_type = self.compression else: # Get info object for name zinfo = self.getinfo(name) if mode == 'w': return self._open_to_write(zinfo, force_zip64=force_zip64) if hasattr(self, "_writing") and self._writing: raise ValueError("Can't read from the ZIP file while there " "is an open writing handle on it. " "Close the writing handle before trying to read.") # Open for reading: self._fileRefCnt += 1 if sys.version_info[:2] <= (3, 5): zef_file = _SharedFile( # pylint: disable=E1120 self.fp, zinfo.header_offset, self._fpclose, self._lock) zef_file = _SharedFile( self.fp, zinfo.header_offset, self._fpclose, self._lock, lambda: hasattr(self, "_writing") and self._writing) try: # Skip the file header: fheader = zef_file.read(sizeFileHeader) if len(fheader) != sizeFileHeader: raise BadZipFile("Truncated file header") fheader = struct.unpack(structFileHeader, fheader) if fheader[_FH_SIGNATURE] != stringFileHeader: raise BadZipFile("Bad magic number for file header") fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) if fheader[_FH_EXTRA_FIELD_LENGTH]: zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) if zinfo.flag_bits & 0x20: # Zip 2.7: compressed patched data raise NotImplementedError( "compressed patched data (flag bit 5)") if zinfo.flag_bits & 0x40: # strong encryption raise NotImplementedError("strong encryption (flag bit 6)") if zinfo.flag_bits & 0x800: # UTF-8 filename fname_str = fname.decode("utf-8") else: fname_str = fname.decode("cp437") if sys.platform.startswith("win"): if fname_str.replace("\\", "/") != zinfo.orig_filename.replace( "\\", "/"): raise BadZipFile( 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) else: if fname_str != zinfo.orig_filename: raise BadZipFile( 'File name in directory %r and header %r differ.' % (zinfo.orig_filename, fname)) # check for encrypted flag & handle password is_encrypted = zinfo.flag_bits & 0x1 zd = None if is_encrypted: if not pwd: pwd = self.pwd if not pwd: raise RuntimeError("File %r is encrypted, password " "required for extraction" % name) zd = _ZipDecrypter(pwd) # The first 12 bytes in the cypher stream is an encryption header # used to strengthen the algorithm. The first 11 bytes are # completely random, while the 12th contains the MSB of the CRC, # or the MSB of the file time depending on the header type # and is used to check the correctness of the password. header = zef_file.read(12) h = list(map(zd, header[0:12])) if zinfo.flag_bits & 0x8: # compare against the file type from extended local headers check_byte = (zinfo._raw_time >> 8) & 0xff else: # compare against the CRC otherwise check_byte = (zinfo.CRC >> 24) & 0xff if h[11] != check_byte: raise RuntimeError("Bad password for file %r" % name) return ZipExtFile(zef_file, mode, zinfo, zd, True) except Exception: zef_file.close() raise
def decompress(self, chunk): chunkbuffer = BytesBuffer() chunkbuffer.write(chunk) output = BytesBuffer() while not self.finished and len(chunkbuffer) > 0: if not self.current_fheader: # Read the first part of the header (constant size of sizeFileHeader). if len(self.header_buf) <= sizeFileHeader: bytes_remaining = sizeFileHeader - len(self.header_buf) self.header_buf.write(chunkbuffer.read(bytes_remaining)) if len(self.header_buf) >= sizeFileHeader: fheader = self.header_buf.read(sizeFileHeader) self.current_fheader = struct.unpack( structFileHeader, fheader) if self.current_fheader[_FH_SIGNATURE] == stringCentralDir: # We've reached the central directory. This means that we've finished iterating through # all entries in the zip file. We can do this check because the file header signature # and central directory signature are stored in the same spot (index 0) and with the same format. self.finished = True break if self.current_fheader[_FH_SIGNATURE] != stringFileHeader: raise BadZipFile("Bad magic number for file header") self.current_file_compressed_size = self.current_fheader[ _FH_COMPRESSED_SIZE] self.current_file_uncompressed_size = self.current_fheader[ _FH_UNCOMPRESSED_SIZE] # Finished reading the first part of the header. self.buf.write(fheader) if self.current_fheader and not self.current_zipinfo: # Read the second part of the header (variable size sizeFileHeaderExtra obtained by reading self.current_fheader). sizeFileHeader2 = ( self.current_fheader[_FH_FILENAME_LENGTH] + self.current_fheader[_FH_EXTRA_FIELD_LENGTH]) if len(self.header_buf_2) <= sizeFileHeader2: bytes_remaining = sizeFileHeader2 - len(self.header_buf_2) self.header_buf_2.write(chunkbuffer.read(bytes_remaining)) if len(self.header_buf_2) >= sizeFileHeader2: fheader_2 = self.header_buf_2.read(sizeFileHeader2) # Finished reading the entire header. self.buf.write(fheader_2) with StreamingZipFile(self.buf) as zf: # Header finished; write the tarfile header now. zinfo = zf.next() self.current_zipinfo = zinfo tarinfo = tarfile.TarInfo(name=zinfo.filename) tarinfo.size = zinfo.file_size tarinfo.mode = 0o755 # ZipFile doesn't store permissions, so we just set it to a sensible default. tarinfo.type = tarfile.DIRTYPE if zinfo.is_dir( ) else tarfile.REGTYPE tarinfo.mtime = time.mktime( datetime.datetime(*zinfo.date_time).timetuple() ) # From https://fossies.org/linux/littleutils/scripts/zip2tarcat.in self.output.addfile(tarinfo) if self.current_zipinfo: # Header finished; write (up to) the entire body of the current member. bytes_remaining = (self.current_file_compressed_size - self.current_file_compressed_bytes_obtained) remaining = chunkbuffer.read(bytes_remaining) self.current_file_compressed_bytes_obtained += len(remaining) self.buf.write(remaining) # Extract what's remaining from the zipfile and write it to the tarfile. if not self.current_zef: with StreamingZipFile(self.buf) as zf: self.current_zef = zf.open(self.current_zipinfo) # ZipExtFile._read1(n) reads up to n compressed bytes. We use this rather than ZipExtFile.read(n), which takes in uncompressed # bytes, because we only know how much compressed bytes we have added so far. uncompressed = self.current_zef._read1(len(remaining)) self.output.fileobj.write(uncompressed) self.output.offset += len(uncompressed) if self.current_file_compressed_bytes_obtained == self.current_file_compressed_size: # We've obtained the entire file. # Write the remainder of the block, if needed, and then reset the current information. # This code for writing the remainder of the block is taken from # https://github.com/python/cpython/blob/9d2c2a8e3b8fe18ee1568bfa4a419847b3e78575/Lib/tarfile.py#L2008-L2012. blocks, remainder = divmod( self.current_file_compressed_size, tarfile.BLOCKSIZE) if remainder > 0: assert self.output.fileobj is not None self.output.fileobj.write( tarfile.NUL * (tarfile.BLOCKSIZE - remainder)) blocks += 1 self.output.offset += blocks * tarfile.BLOCKSIZE # type: ignore # Reset the current information. self.reset_info() return output.read()
def next(self): """Return the next member of the archive as a ZipInfo object. Returns None if there is no more available. This method is analogous to TarFile.next(). We construct a ZipInfo object using the information stored in the next file header. The logic here is based on the implementation of ZipFile._RealGetContents(), which constructs a ZipInfo object from information in a central directory file header, but modified to work with the file-header-specific struct (for the implementation of ZipFile._RealGetContents(), see https://github.com/python/cpython/blob/048f54dc75d51e8a1c5822ab7b2828295192aaa5/Lib/zipfile.py#L1316). """ fp = self.fp # First, advance to the next header, if needed. fp.read(self._next_header_pos - fp.tell()) # Read the next header. fheader = fp.read(sizeFileHeader) if len(fheader) != sizeFileHeader: raise BadZipFile("Truncated file header") fheader = struct.unpack(structFileHeader, fheader) if fheader[_FH_SIGNATURE] == stringCentralDir: # We've reached the central directory. This means that we've finished iterating through # all entries in the zip file. We can do this check because the file header signature # and central directory signature are stored in the same spot (index 0) and with the same format. self._loaded = True return None if fheader[_FH_SIGNATURE] != stringFileHeader: raise BadZipFile("Bad magic number for file header") filename = fp.read(fheader[_FH_FILENAME_LENGTH]) flags = fheader[_FH_GENERAL_PURPOSE_FLAG_BITS] if flags & 0x800: # UTF-8 file names extension filename = filename.decode('utf-8') else: # Historical ZIP filename encoding filename = filename.decode('cp437') # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(fheader[_FH_EXTRA_FIELD_LENGTH]) x.header_offset = self._next_header_pos # The file header stores nearly all the same information needed for ZipInfo as what the # central directory file header stores, except for a couple of missing fields. # We just set them to 0 here. x.comment = 0 x.create_version, x.create_system = 0, 0 x.volume, x.internal_attr, x.external_attr = 0, 0, 0 (x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = fheader[1:10] if x.extract_version > MAX_EXTRACT_VERSION: raise NotImplementedError("zip file version %.1f" % (x.extract_version / 10)) # Convert date/time code to (year, month, day, hour, min, sec) # This comes from the original cpython code. x._raw_time = t x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2) x._decodeExtra() self.filelist.append(x) self.NameToInfo[x.filename] = x self._next_header_pos = (fp.tell() + x.compress_size ) # Beginning of the next file's header. return x