Пример #1
0
def upload(url: str, path: Union[str, Path]) -> None:
    url = to_str(url)
    path = to_str(path)

    parsed = urlparse(url)
    if parsed.scheme == "s3":
        if Path(path).is_dir():
            raise NotImplementedError
        else:
            s3_upload_file(url=url, filename=path)
    else:
        raise NotImplementedError("Uploading except S3 is not implemented.")
Пример #2
0
def test_to_str():
    url = "http://hoge/fuga.txt"
    assert to_str(Path(url)) == url

    url = "https://hoge/fuga.txt"
    assert to_str(Path(url)) == url

    url = "s3://hoge/fuga.txt"
    assert to_str(Path(url)) == url

    url = "https://elyza-sandbox.s3.amazonaws.com/liz_ocr/shadow_sample1.jpg"
    assert to_str(Path(url)) == url
Пример #3
0
def cached_path(url_or_filename: Union[str, Path],
                cache_dir: str = None) -> str:
    """
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.
    """
    url_or_filename = to_str(url_or_filename)
    if cache_dir is None:
        cache_dir = get_config("cache_dir")

    url_or_filename = os.path.expanduser(url_or_filename)
    parsed = urlparse(url_or_filename)

    if parsed.scheme in ("http", "https", "s3"):
        # URL, so get it from the cache (downloading if necessary)
        return get_from_cache(url_or_filename, cache_dir)
    elif os.path.exists(url_or_filename):
        # File, and it exists.
        return url_or_filename
    elif parsed.scheme == "":
        # File, but it doesn't exist.
        raise FileNotFoundError("file {} not found".format(url_or_filename))
    else:
        # Something unknown
        raise ValueError(
            "unable to parse {} as a URL or as a local path".format(
                url_or_filename))
Пример #4
0
def is_url_or_existing_file(url_or_filename: Union[str, Path, None]) -> bool:
    """
    Given something that might be a URL (or might be a local path),
    determine check if it's url or an existing file path.
    """
    if url_or_filename is None:
        return False
    url_or_filename = to_str(url_or_filename)
    url_or_filename = os.path.expanduser(url_or_filename)
    parsed = urlparse(url_or_filename)
    return parsed.scheme in ("http", "https",
                             "s3") or os.path.exists(url_or_filename)
Пример #5
0
def upload_later(cloud_or_local_path: Union[str, Path]) -> Generator[str, None, None]:
    cloud_or_local_path = to_str(cloud_or_local_path)
    parsed = urlparse(cloud_or_local_path)

    # If path is local path, then yield it
    if Path(cloud_or_local_path).parent.exists():
        yield cloud_or_local_path

    # Upload to some cloud storage
    elif parsed.scheme in ("s3", "gs"):

        # Write to temp file
        temp_path = (
            Path(get_config("upload_tmp_dir"))
            / str(uuid4())
            / Path(cloud_or_local_path).name
        )
        temp_path.parent.mkdir(parents=True, exist_ok=True)
        yield str(temp_path)

        # if succeeded to write tempfile, then upload temp file to cloud
        try:
            upload(cloud_or_local_path, temp_path)
        except Exception:
            logger.error(
                f"Upload to {cloud_or_local_path} failed. You can accesss {temp_path}"
            )
            raise
        else:
            logger.info(
                f"Upload to {cloud_or_local_path} succeeded. Deleting temp file..."
            )
            os.remove(str(temp_path))

    else:
        raise ValueError(f"Given url_or_path {cloud_or_local_path} is invalid.")