def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]: for data in self.datapipe: validate_pathname_binary_tuple(data) pathname, data_stream = data folder_name = os.path.dirname(pathname) try: # typing.cast is used here to silence mypy's type checker tar = tarfile.open(fileobj=cast(Optional[IO[bytes]], data_stream), mode=self.mode) for tarinfo in tar: if not tarinfo.isfile(): continue extracted_fobj = tar.extractfile(tarinfo) if extracted_fobj is None: warnings.warn( "failed to extract file {} from source tarfile {}". format(tarinfo.name, pathname)) raise tarfile.ExtractError inner_pathname = os.path.normpath( os.path.join(folder_name, tarinfo.name)) yield (inner_pathname, extracted_fobj ) # type: ignore[misc] except Exception as e: warnings.warn( "Unable to extract files from corrupted tarfile stream {} due to: {}, abort!" .format(pathname, e)) raise e
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]: if not isinstance(self.datapipe, Iterable): raise TypeError("datapipe must be Iterable type but got {}".format( type(self.datapipe))) for data in self.datapipe: validate_pathname_binary_tuple(data) pathname, data_stream = data try: # typing.cast is used here to silence mypy's type checker tar = tarfile.open(fileobj=cast(Optional[IO[bytes]], data_stream), mode="r:*") for tarinfo in tar: if not tarinfo.isfile(): continue extracted_fobj = tar.extractfile(tarinfo) if extracted_fobj is None: warnings.warn( "failed to extract file {} from source tarfile {}". format(tarinfo.name, pathname)) raise tarfile.ExtractError inner_pathname = os.path.normpath( os.path.join(pathname, tarinfo.name)) # Add a reference of the source tarfile into extracted_fobj, so the source # tarfile handle won't be released until all the extracted file objs are destroyed. # Add `# type: ignore` to silence mypy's type checker extracted_fobj.source_tarfile_ref = tar # type: ignore # typing.cast is used here to silence mypy's type checker yield (inner_pathname, cast(BufferedIOBase, extracted_fobj)) except Exception as e: warnings.warn( "Unable to extract files from corrupted tarfile stream {} due to: {}, abort!" .format(pathname, e)) raise e
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]: if not isinstance(self.datapipe, Iterable): raise TypeError("datapipe must be Iterable type but got {}".format( type(self.datapipe))) for data in self.datapipe: validate_pathname_binary_tuple(data) pathname, data_stream = data try: # typing.cast is used here to silence mypy's type checker zips = zipfile.ZipFile(cast(IO[bytes], data_stream)) for zipinfo in zips.infolist(): # major version should always be 3 here. if sys.version_info[1] >= 6: if zipinfo.is_dir(): continue elif zipinfo.filename.endswith('/'): continue extracted_fobj = zips.open(zipinfo) inner_pathname = os.path.normpath( os.path.join(pathname, zipinfo.filename)) # Add a reference of the source zipfile into extracted_fobj, so the source # zipfile handle won't be released until all the extracted file objs are destroyed. # Add `# type: ignore` to silence mypy's type checker extracted_fobj.source_zipfile_ref = zips # type: ignore # typing.cast is used here to silence mypy's type checker yield (inner_pathname, cast(BufferedIOBase, extracted_fobj)) except Exception as e: warnings.warn( "Unable to extract files from corrupted zipfile stream {} due to: {}, abort!" .format(pathname, e)) raise e
def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]: for data in self.datapipe: validate_pathname_binary_tuple(data) pathname, data_stream = data folder_name = os.path.dirname(pathname) try: # typing.cast is used here to silence mypy's type checker zips = zipfile.ZipFile(cast(IO[bytes], data_stream)) for zipinfo in zips.infolist(): # major version should always be 3 here. if sys.version_info[1] >= 6: if zipinfo.is_dir(): continue elif zipinfo.filename.endswith('/'): continue extracted_fobj = zips.open(zipinfo) inner_pathname = os.path.normpath( os.path.join(folder_name, zipinfo.filename)) yield inner_pathname, StreamWrapper( extracted_fobj) # type: ignore[misc] except Exception as e: warnings.warn( f"Unable to extract files from corrupted zipfile stream {pathname} due to: {e}, abort!" ) raise e
def __iter__(self) -> Iterator[Tuple[str, io.BufferedIOBase]]: for data in self.datapipe: validate_pathname_binary_tuple(data) path, stream = data rar = self._rarfile.RarFile(stream) for info in rar.infolist(): if info.filename.endswith("/"): continue inner_path = str(pathlib.Path(path) / info.filename) file_obj = rar.open(info) file_obj.source_rar = rar yield inner_path, file_obj