def _get_local_path(self, path: str, force: bool = False, **kwargs: Any) -> str: """ This implementation downloads the remote resource and caches it locally. The resource will only be downloaded if not previously requested. """ self._check_kwargs(kwargs) if ( force or path not in self.cache_map or not os.path.exists(self.cache_map[path]) ): logger = logging.getLogger(__name__) parsed_url = urlparse(path) dirname = os.path.join( get_cache_dir(), os.path.dirname(parsed_url.path.lstrip("/")) ) filename = path.split("/")[-1] cached = os.path.join(dirname, filename) with file_lock(cached): if not os.path.isfile(cached): logger.info("Downloading {} ...".format(path)) cached = download(path, dirname, filename=filename) logger.info("URL {} cached in {}".format(path, cached)) self.cache_map[path] = cached return self.cache_map[path]
def cache_url(url: str, cache_dir: str) -> str: """ This implementation downloads the remote resource and caches it locally. The resource will only be downloaded if not previously requested. """ parsed_url = urlparse(url) dirname = os.path.join(cache_dir, os.path.dirname(parsed_url.path.lstrip("/"))) makedir(dirname) filename = url.split("/")[-1] cached = os.path.join(dirname, filename) with file_lock(cached): if not os.path.isfile(cached): logging.info(f"Downloading {url} to {cached} ...") cached = download(url, dirname, filename=filename) logging.info(f"URL {url} cached in {cached}") return cached
def test_download(self) -> None: download("https://www.facebook.com", ".", filename=self._filename, progress=False) self.assertTrue(os.path.isfile(self._filename))
def _get_local_path( self, path: str, force: bool = False, cache_dir: Optional[str] = None, **kwargs: Any, ) -> str: """ As paddle model stores all files in tar files, we need to extract them and get the newly extracted folder path. This function rewrites the base function to support the following situations: 1. If the tar file is not downloaded, it will download the tar file, extract it to the target folder, delete the downloaded tar file, and return the folder path. 2. If the extracted target folder is present, and all the necessary model files are present (specified in _TAR_FILE_NAME_LIST), it will return the folder path. 3. If the tar file is downloaded, but the extracted target folder is not present (or it doesn't contain the necessary files in _TAR_FILE_NAME_LIST), it will extract the tar file to the target folder, delete the tar file, and return the folder path. """ self._check_kwargs(kwargs) if (force or path not in self.cache_map or not os.path.exists(self.cache_map[path])): logger = logging.getLogger(__name__) parsed_url = urlparse(path) dirname = os.path.join( get_cache_dir(cache_dir), os.path.dirname(parsed_url.path.lstrip("/"))) filename = path.split("/")[-1] if len(filename) > self.MAX_FILENAME_LEN: filename = filename[:100] + "_" + uuid.uuid4().hex cached = os.path.join(dirname, filename) if is_cached_folder_exists_and_valid(cached): # When the cached folder exists and valid, we don't need to redownload # the tar file. self.cache_map[path] = _get_untar_directory(cached) else: with file_lock(cached): if not os.path.isfile(cached): logger.info("Downloading {} ...".format(path)) cached = download(path, dirname, filename=filename) if path.endswith(".tar"): model_dir = _untar_model_weights(cached) try: os.remove(cached) # remove the redundant tar file # TODO: remove the .lock file . except: logger.warning( f"Not able to remove the cached tar file {cached}" ) logger.info("URL {} cached in {}".format(path, model_dir)) self.cache_map[path] = model_dir return self.cache_map[path]