示例#1
0
 def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
     for data in self.datapipe:
         validate_pathname_binary_tuple(data)
         pathname, data_stream = data
         folder_name = os.path.dirname(pathname)
         try:
             # typing.cast is used here to silence mypy's type checker
             tar = tarfile.open(fileobj=cast(Optional[IO[bytes]],
                                             data_stream),
                                mode=self.mode)
             for tarinfo in tar:
                 if not tarinfo.isfile():
                     continue
                 extracted_fobj = tar.extractfile(tarinfo)
                 if extracted_fobj is None:
                     warnings.warn(
                         "failed to extract file {} from source tarfile {}".
                         format(tarinfo.name, pathname))
                     raise tarfile.ExtractError
                 inner_pathname = os.path.normpath(
                     os.path.join(folder_name, tarinfo.name))
                 yield (inner_pathname, extracted_fobj
                        )  # type: ignore[misc]
         except Exception as e:
             warnings.warn(
                 "Unable to extract files from corrupted tarfile stream {} due to: {}, abort!"
                 .format(pathname, e))
             raise e
示例#2
0
 def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
     if not isinstance(self.datapipe, Iterable):
         raise TypeError("datapipe must be Iterable type but got {}".format(
             type(self.datapipe)))
     for data in self.datapipe:
         validate_pathname_binary_tuple(data)
         pathname, data_stream = data
         try:
             # typing.cast is used here to silence mypy's type checker
             tar = tarfile.open(fileobj=cast(Optional[IO[bytes]],
                                             data_stream),
                                mode="r:*")
             for tarinfo in tar:
                 if not tarinfo.isfile():
                     continue
                 extracted_fobj = tar.extractfile(tarinfo)
                 if extracted_fobj is None:
                     warnings.warn(
                         "failed to extract file {} from source tarfile {}".
                         format(tarinfo.name, pathname))
                     raise tarfile.ExtractError
                 inner_pathname = os.path.normpath(
                     os.path.join(pathname, tarinfo.name))
                 # Add a reference of the source tarfile into extracted_fobj, so the source
                 # tarfile handle won't be released until all the extracted file objs are destroyed.
                 # Add `# type: ignore` to silence mypy's type checker
                 extracted_fobj.source_tarfile_ref = tar  # type: ignore
                 # typing.cast is used here to silence mypy's type checker
                 yield (inner_pathname, cast(BufferedIOBase,
                                             extracted_fobj))
         except Exception as e:
             warnings.warn(
                 "Unable to extract files from corrupted tarfile stream {} due to: {}, abort!"
                 .format(pathname, e))
             raise e
    def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
        if not isinstance(self.datapipe, Iterable):
            raise TypeError("datapipe must be Iterable type but got {}".format(
                type(self.datapipe)))
        for data in self.datapipe:
            validate_pathname_binary_tuple(data)
            pathname, data_stream = data
            try:
                # typing.cast is used here to silence mypy's type checker
                zips = zipfile.ZipFile(cast(IO[bytes], data_stream))
                for zipinfo in zips.infolist():
                    # major version should always be 3 here.
                    if sys.version_info[1] >= 6:
                        if zipinfo.is_dir():
                            continue
                    elif zipinfo.filename.endswith('/'):
                        continue

                    extracted_fobj = zips.open(zipinfo)
                    inner_pathname = os.path.normpath(
                        os.path.join(pathname, zipinfo.filename))
                    # Add a reference of the source zipfile into extracted_fobj, so the source
                    # zipfile handle won't be released until all the extracted file objs are destroyed.
                    # Add `# type: ignore` to silence mypy's type checker
                    extracted_fobj.source_zipfile_ref = zips  # type: ignore
                    # typing.cast is used here to silence mypy's type checker
                    yield (inner_pathname, cast(BufferedIOBase,
                                                extracted_fobj))
            except Exception as e:
                warnings.warn(
                    "Unable to extract files from corrupted zipfile stream {} due to: {}, abort!"
                    .format(pathname, e))
                raise e
示例#4
0
 def __iter__(self) -> Iterator[Tuple[str, BufferedIOBase]]:
     for data in self.datapipe:
         validate_pathname_binary_tuple(data)
         pathname, data_stream = data
         folder_name = os.path.dirname(pathname)
         try:
             # typing.cast is used here to silence mypy's type checker
             zips = zipfile.ZipFile(cast(IO[bytes], data_stream))
             for zipinfo in zips.infolist():
                 # major version should always be 3 here.
                 if sys.version_info[1] >= 6:
                     if zipinfo.is_dir():
                         continue
                 elif zipinfo.filename.endswith('/'):
                     continue
                 extracted_fobj = zips.open(zipinfo)
                 inner_pathname = os.path.normpath(
                     os.path.join(folder_name, zipinfo.filename))
                 yield inner_pathname, StreamWrapper(
                     extracted_fobj)  # type: ignore[misc]
         except Exception as e:
             warnings.warn(
                 f"Unable to extract files from corrupted zipfile stream {pathname} due to: {e}, abort!"
             )
             raise e
示例#5
0
    def __iter__(self) -> Iterator[Tuple[str, io.BufferedIOBase]]:
        for data in self.datapipe:
            validate_pathname_binary_tuple(data)
            path, stream = data
            rar = self._rarfile.RarFile(stream)
            for info in rar.infolist():
                if info.filename.endswith("/"):
                    continue

                inner_path = str(pathlib.Path(path) / info.filename)

                file_obj = rar.open(info)
                file_obj.source_rar = rar

                yield inner_path, file_obj