示例#1
0
 def _guess_archive_loader(
     self, path: pathlib.Path
 ) -> Optional[Callable[[IterDataPipe[Tuple[str, IO]]], IterDataPipe[Tuple[str, IO]]]]:
     try:
         _, archive_type, _ = _detect_file_type(path.name)
     except RuntimeError:
         return None
     return self._ARCHIVE_LOADERS.get(archive_type)  # type: ignore[arg-type]
示例#2
0
 def test_detect_file_type(self):
     for file, expected in [
         ("foo.tar.xz", (".tar.xz", ".tar", ".xz")),
         ("foo.tar", (".tar", ".tar", None)),
         ("foo.tar.gz", (".tar.gz", ".tar", ".gz")),
         ("foo.tgz", (".tgz", ".tar", ".gz")),
         ("foo.gz", (".gz", None, ".gz")),
         ("foo.zip", (".zip", ".zip", None)),
         ("foo.xz", (".xz", None, ".xz")),
     ]:
         with self.subTest(file=file):
             self.assertSequenceEqual(utils._detect_file_type(file),
                                      expected)
def download_from_url(
    *,
    file_info: Union[UrlFileInfo, List[UrlFileInfo]],
    root: Union[Path, str],
    logger: Optional[logging.Logger] = None,
    remove_finished: bool = True,
) -> None:

    logger = logging.getLogger(__name__) if logger is None else logger
    file_info_ls = file_info if isinstance(file_info, list) else [file_info]
    if not isinstance(root, Path):
        root = Path(root).expanduser()
    # Create the specified root directory if it doesn't already exist
    root.mkdir(parents=True, exist_ok=True)

    for info in file_info_ls:
        filepath = root / info.name

        filepath_str = str(filepath)
        suffix = _detect_file_type(filepath_str)[0]
        extracted_filepath = Path(filepath_str.split(suffix)[0])

        if extracted_filepath.exists():
            logger.info(f"File '{info.name}' already downloaded and extracted.")
        else:
            if filepath.exists():
                logger.info(f"File '{info.name}' already downloaded.")
            else:
                logger.info(f"Downloading file '{info.name}' from address '{info.url}'.")
                download_url(url=info.url, filename=info.name, root=str(root), md5=info.md5)

            logger.info(f"Extracting '{filepath.resolve()}' to '{root.resolve()}'")
            try:
                extract_archive(
                    from_path=str(filepath),
                    to_path=str(extracted_filepath),
                    remove_finished=remove_finished,
                )
            # Fall back on using jar to unzip the archive
            except BadZipFile:
                try:
                    subprocess.run(["jar", "-xvf", str(filepath)], check=True, cwd=root)
                except subprocess.CalledProcessError:
                    logger.info(
                        "Attempted to fall back on using Java to extract malformed .zip file; "
                        "however, there was a problem. Try redownloading the zip file or "
                        "checking that Java has been properly added to your system variables."
                    )
示例#4
0
 def test_detect_file_type_unknown_partial_ext(self):
     with self.assertRaises(RuntimeError):
         utils._detect_file_type("foo.bar")
示例#5
0
 def test_detect_file_type_unknown_compression(self):
     with self.assertRaises(RuntimeError):
         utils._detect_file_type("foo.tar.baz")
示例#6
0
 def test_detect_file_type_unknown_archive_type(self):
     with self.assertRaises(RuntimeError):
         utils._detect_file_type("foo.bar.gz")
示例#7
0
 def test_detect_file_type_to_many_exts(self):
     with self.assertRaises(RuntimeError):
         utils._detect_file_type("foo.bar.tar.gz")
示例#8
0
 def test_detect_file_type_no_ext(self):
     with self.assertRaises(RuntimeError):
         utils._detect_file_type("foo")
示例#9
0
 def test_detect_file_type_incompatible(self, file):
     # tests detect file type for no extension, unknown compression and unknown partial extension
     with pytest.raises(RuntimeError):
         utils._detect_file_type(file)
示例#10
0
 def test_detect_file_type(self, file, expected):
     assert utils._detect_file_type(file) == expected