示例#1
0
def cached_gdown_download(url, cached_path=None):
    cache_root = os.path.join(download.get_dataset_root(), '_dl_cache')
    try:
        os.makedirs(cache_root)
    except OSError:
        if not os.path.exists(cache_root):
            raise

    urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
    if cached_path is None:
        cached_path = os.path.join(cache_root, urlhash)
    lock_path = cached_path + ".lock"

    with filelock.FileLock(lock_path):
        if os.path.exists(cached_path):
            return cached_path

    temp_root = tempfile.mkdtemp(dir=cache_root)
    try:
        temp_path = os.path.join(temp_root, 'download.cache')
        gdown.download(url, temp_path, quiet=False)
        with filelock.FileLock(lock_path):
            shutil.move(temp_path, cached_path)
    finally:
        shutil.rmtree(temp_root)

    return cached_path
示例#2
0
def cached_download(url):
    """Downloads a file and caches it.

    This is different from the original
    :func:`~chainer.dataset.cached_download` in that the download
    progress is reported. Note that this progress report can be disabled
    by setting the environment variable `CHAINERCV_DOWNLOAD_REPORT` to `'OFF'`.

    It downloads a file from the URL if there is no corresponding cache. After
    the download, this function stores a cache to the directory under the
    dataset root (see :func:`set_dataset_root`). If there is already a cache
    for the given URL, it just returns the path to the cache without
    downloading the same file.

    Args:
        url (string): URL to download from.

    Returns:
        string: Path to the downloaded file.

    """
    cache_root = os.path.join(get_dataset_root(), '_dl_cache')
    try:
        os.makedirs(cache_root)
    except OSError:
        if not os.path.exists(cache_root):
            raise

    lock_path = os.path.join(cache_root, '_dl_lock')
    urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
    cache_path = os.path.join(cache_root, urlhash)

    with filelock.FileLock(lock_path):
        if os.path.exists(cache_path):
            return cache_path

    temp_root = tempfile.mkdtemp(dir=cache_root)
    try:
        temp_path = os.path.join(temp_root, 'dl')
        if strtobool(os.getenv('CHAINERCV_DOWNLOAD_REPORT', 'ON')):
            print('Downloading ...')
            print('From: {:s}'.format(url))
            print('To: {:s}'.format(cache_path))
            request.urlretrieve(url, temp_path, _reporthook)
        else:
            request.urlretrieve(url, temp_path)
        with filelock.FileLock(lock_path):
            shutil.move(temp_path, cache_path)
    finally:
        shutil.rmtree(temp_root)

    return cache_path
示例#3
0
def cached_download(url, cached_path=None):
    """Downloads a file and caches it.

    This is different from the original
    :func:`~chainer.dataset.cached_download` in that the download
    progress is reported.

    It downloads a file from the URL if there is no corresponding cache. After
    the download, this function stores a cache to the directory under the
    dataset root (see :func:`set_dataset_root`). If there is already a cache
    for the given URL, it just returns the path to the cache without
    downloading the same file.

    Args:
        url (string): URL to download from.

    Returns:
        string: Path to the downloaded file.

    """
    cache_root = os.path.join(download.get_dataset_root(), '_dl_cache')
    try:
        os.makedirs(cache_root)
    except OSError:
        if not os.path.exists(cache_root):
            raise

    urlhash = hashlib.md5(url.encode('utf-8')).hexdigest()
    if cached_path is None:
        cached_path = os.path.join(cache_root, urlhash)
    lock_path = cached_path + ".lock"

    with filelock.FileLock(lock_path):
        if os.path.exists(cached_path):
            return cached_path

    temp_root = tempfile.mkdtemp(dir=cache_root)
    try:
        temp_path = os.path.join(temp_root, 'download.cache')
        print('Downloading ...')
        print('From: {:s}'.format(url))
        print('To: {:s}'.format(cached_path))
        request.urlretrieve(url, temp_path, _reporthook)
        with filelock.FileLock(lock_path):
            shutil.move(temp_path, cached_path)
    finally:
        shutil.rmtree(temp_root)

    return cached_path