def upload(url: str, path: Union[str, Path]) -> None: url = to_str(url) path = to_str(path) parsed = urlparse(url) if parsed.scheme == "s3": if Path(path).is_dir(): raise NotImplementedError else: s3_upload_file(url=url, filename=path) else: raise NotImplementedError("Uploading except S3 is not implemented.")
def test_to_str(): url = "http://hoge/fuga.txt" assert to_str(Path(url)) == url url = "https://hoge/fuga.txt" assert to_str(Path(url)) == url url = "s3://hoge/fuga.txt" assert to_str(Path(url)) == url url = "https://elyza-sandbox.s3.amazonaws.com/liz_ocr/shadow_sample1.jpg" assert to_str(Path(url)) == url
def cached_path(url_or_filename: Union[str, Path], cache_dir: str = None) -> str: """ Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. """ url_or_filename = to_str(url_or_filename) if cache_dir is None: cache_dir = get_config("cache_dir") url_or_filename = os.path.expanduser(url_or_filename) parsed = urlparse(url_or_filename) if parsed.scheme in ("http", "https", "s3"): # URL, so get it from the cache (downloading if necessary) return get_from_cache(url_or_filename, cache_dir) elif os.path.exists(url_or_filename): # File, and it exists. return url_or_filename elif parsed.scheme == "": # File, but it doesn't exist. raise FileNotFoundError("file {} not found".format(url_or_filename)) else: # Something unknown raise ValueError( "unable to parse {} as a URL or as a local path".format( url_or_filename))
def is_url_or_existing_file(url_or_filename: Union[str, Path, None]) -> bool: """ Given something that might be a URL (or might be a local path), determine check if it's url or an existing file path. """ if url_or_filename is None: return False url_or_filename = to_str(url_or_filename) url_or_filename = os.path.expanduser(url_or_filename) parsed = urlparse(url_or_filename) return parsed.scheme in ("http", "https", "s3") or os.path.exists(url_or_filename)
def upload_later(cloud_or_local_path: Union[str, Path]) -> Generator[str, None, None]: cloud_or_local_path = to_str(cloud_or_local_path) parsed = urlparse(cloud_or_local_path) # If path is local path, then yield it if Path(cloud_or_local_path).parent.exists(): yield cloud_or_local_path # Upload to some cloud storage elif parsed.scheme in ("s3", "gs"): # Write to temp file temp_path = ( Path(get_config("upload_tmp_dir")) / str(uuid4()) / Path(cloud_or_local_path).name ) temp_path.parent.mkdir(parents=True, exist_ok=True) yield str(temp_path) # if succeeded to write tempfile, then upload temp file to cloud try: upload(cloud_or_local_path, temp_path) except Exception: logger.error( f"Upload to {cloud_or_local_path} failed. You can accesss {temp_path}" ) raise else: logger.info( f"Upload to {cloud_or_local_path} succeeded. Deleting temp file..." ) os.remove(str(temp_path)) else: raise ValueError(f"Given url_or_path {cloud_or_local_path} is invalid.")