Exemplo n.º 1
0
def download_url(url:str, dest:str, overwrite:bool=False, pbar:ProgressBar=None,
                 show_progress=True, chunk_size=1024*1024, timeout=4, retries=5)->None:
    "Download `url` to `dest` unless it exists and not `overwrite`."
    if os.path.exists(dest) and not overwrite: return

    s = requests.Session()
    s.mount('http://',requests.adapters.HTTPAdapter(max_retries=retries))
    u = s.get(url, stream=True, timeout=timeout)
    try: file_size = int(u.headers["Content-Length"])
    except: show_progress = False

    with open(dest, 'wb') as f:
        nbytes = 0
        if show_progress: pbar = progress_bar(range(file_size), auto_update=False, leave=False, parent=pbar)
        try:
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress: pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            fname = url.split('/')[-1]
            from fastai.datasets import Config
            data_dir = Config().data_path()
            timeout_txt =(f'\n Download of {url} has failed after {retries} retries\n'
                          f' Fix the download manually:\n'
                          f'$ mkdir -p {data_dir}\n'
                          f'$ cd {data_dir}\n'
                          f'$ wget -c {url}\n'
                          f'$ tar -zxvf {fname}\n\n'
                          f'And re-run your code once the download is successful\n')
            print(timeout_txt)
            import sys;sys.exit(1)
Exemplo n.º 2
0
def download_url(
    url: str,
    dest: str,
    overwrite: bool = False,
    pbar: ProgressBar = None,
    show_progress=True,
    chunk_size=1024 * 1024,
    timeout=4,
    retries=5,
) -> None:
    "Download `url` to `dest` unless it exists and not `overwrite`."
    if os.path.exists(dest) and not overwrite:
        return

    s = requests.Session()
    s.mount("http://", requests.adapters.HTTPAdapter(max_retries=retries))
    # additional line to identify as a firefox browser, see #2438
    s.headers.update(
        {
            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0"
        }
    )
    u = s.get(url, stream=True, timeout=timeout)
    try:
        file_size = int(u.headers["Content-Length"])
    except:
        show_progress = False

    with open(dest, "wb") as f:
        nbytes = 0
        if show_progress:
            pbar = progress_bar(range(file_size), leave=False, parent=pbar)
        try:
            if show_progress:
                pbar.update(0)
            for chunk in u.iter_content(chunk_size=chunk_size):
                nbytes += len(chunk)
                if show_progress:
                    pbar.update(nbytes)
                f.write(chunk)
        except requests.exceptions.ConnectionError as e:
            fname = url.split("/")[-1]
            from fastai.datasets import Config

            data_dir = Config().data_path()
            timeout_txt = (
                f"\n Download of {url} has failed after {retries} retries\n"
                f" Fix the download manually:\n"
                f"$ mkdir -p {data_dir}\n"
                f"$ cd {data_dir}\n"
                f"$ wget -c {url}\n"
                f"$ tar -zxvf {fname}\n\n"
                f"And re-run your code once the download is successful\n"
            )
            print(timeout_txt)
            import sys

            sys.exit(1)