Exemplo n.º 1
0
def download(url, save_path=None, save_dir=hanlp_home(), prefix=HANLP_URL, append_location=True, verbose=HANLP_VERBOSE):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        if verbose:
            eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        if verbose:
            eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(url, tmp_path, 4, headers={
                'User-agent': f'HanLP/{__version__} ({platform.platform()})'})
            if verbose:
                downloader.subscribe(DownloadCallback(show_header=False))
            downloader.start_sync()
        except BaseException as e:
            remove_file(tmp_path)
            url = url.split('#')[0]
            try:
                installed_version, latest_version = check_outdated()
            except:
                installed_version, latest_version = None, None  # No Internet
            if installed_version != latest_version:
                # Always prompt user to upgrade whenever a new version is available
                hints = f'[green]Please upgrade to the latest version ({latest_version}) with:[/green]' \
                        f'\n\n\t[yellow]pip install -U hanlp[/yellow]\n'
            else:  # Otherwise, prompt user to re-try
                hints = f'[green]Please re-try or download it to {save_path} by yourself '
                if not windows():
                    hints += f'with:[/green]\n\n\t[yellow]wget {url} -O {save_path}[/yellow]\n\n'
                else:
                    hints += 'using some decent downloading tools.[/green]\n'
                if not url.startswith(HANLP_URL):
                    hints += 'For third party data, you may find it on our mirror site:\n' \
                             'https://od.hankcs.com/hanlp/data/\n'
                hints += 'See also https://hanlp.hankcs.com/docs/install.html#install-models for instructions.'
            message = f'Download failed due to [red]{repr(e)}[/red].\n' \
                      f'{hints}'
            if verbose:
                cprint(message)
            if hasattr(e, 'msg'):
                e.msg += '\n' + remove_color_tag(message)
            elif hasattr(e, 'args') and e.args and isinstance(e.args, tuple) and isinstance(e.args[0], str):
                e.args = (e.args[0] + '\n' + remove_color_tag(message),) + e.args[1:]
            raise e from None
        remove_file(save_path)
        os.rename(tmp_path, save_path)
    return save_path
Exemplo n.º 2
0
def download(url,
             save_path=None,
             save_dir=hanlp_home(),
             prefix=HANLP_URL,
             append_location=True,
             verbose=HANLP_VERBOSE):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        if verbose:
            eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        if verbose:
            eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(
                url,
                tmp_path,
                4,
                headers={
                    'User-agent':
                    f'HanLP/{__version__} ({platform.platform()})'
                })
            if verbose:
                downloader.subscribe(DownloadCallback(show_header=False))
            downloader.start_sync()
        except BaseException as e:
            remove_file(tmp_path)
            url = url.split('#')[0]
            if not windows():
                hints_for_download = f'e.g. \nwget {url} -O {save_path}\n'
            else:
                hints_for_download = ' Use some decent downloading tools.\n'
            if not url.startswith(HANLP_URL):
                hints_for_download += 'For third party data, you may find it on our mirror site:\n' \
                                      'https://od.hankcs.com/hanlp/data/\n'
            installed_version, latest_version = check_outdated()
            if installed_version != latest_version:
                hints_for_download += f'Or upgrade to the latest version({latest_version}):\npip install -U hanlp'
            message = f'Download failed due to [red]{repr(e)}[/red]. Please download it to {save_path} by yourself. ' \
                      f'[yellow]{hints_for_download}[/yellow]' \
                      'See https://hanlp.hankcs.com/docs/install.html#install-models for instructions.'
            if verbose:
                cprint(message)
            if hasattr(e, 'msg'):
                e.msg += '\n' + remove_color_tag(message)
            raise e
        remove_file(save_path)
        os.rename(tmp_path, save_path)
    return save_path
Exemplo n.º 3
0
def download(url,
             save_path=None,
             save_dir=hanlp_home(),
             prefix=HANLP_URL,
             append_location=True,
             verbose=HANLP_VERBOSE):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        if verbose:
            eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        if verbose:
            eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:

            def reporthook(count, block_size, total_size):
                global start_time, progress_size
                if count == 0:
                    start_time = time.time()
                    progress_size = 0
                    return
                duration = time.time() - start_time
                duration = max(1e-8, duration)
                progress_size = int(count * block_size)
                if progress_size > total_size:
                    progress_size = total_size
                speed = int(progress_size / duration)
                ratio = progress_size / total_size
                ratio = max(1e-8, ratio)
                percent = ratio * 100
                eta = duration / ratio * (1 - ratio)
                speed = human_bytes(speed)
                progress_size = human_bytes(progress_size)
                if verbose:
                    sys.stderr.write(
                        "\r%.2f%%, %s/%s, %s/s, ETA %s      " %
                        (percent, progress_size, human_bytes(total_size),
                         speed, time_util.report_time_delta(eta)))
                    sys.stderr.flush()

            import socket
            socket.setdefaulttimeout(10)
            opener = urllib.request.build_opener()
            opener.addheaders = [('User-agent', f'HanLP/{__version__}')]
            urllib.request.install_opener(opener)
            urlretrieve(url, tmp_path, reporthook)
            eprint()
        except BaseException as e:
            remove_file(tmp_path)
            url = url.split('#')[0]
            if not windows():
                hints_for_download = f'e.g. \nwget {url} -O {save_path}\n'
            else:
                hints_for_download = ' Use some decent downloading tools.\n'
            if not url.startswith(HANLP_URL):
                hints_for_download += 'For third party data, you may find it on our mirror site:\n' \
                                      'https://od.hankcs.com/hanlp/data/\n'
            installed_version, latest_version = check_outdated()
            if installed_version != latest_version:
                hints_for_download += f'Or upgrade to the latest version({latest_version}):\npip install -U hanlp'
            message = f'Download failed due to [red]{repr(e)}[/red]. Please download it to {save_path} by yourself. ' \
                      f'[yellow]{hints_for_download}[/yellow]'
            if verbose:
                cprint(message)
            if hasattr(e, 'msg'):
                e.msg += '\n' + remove_color_tag(message)
            raise e
        remove_file(save_path)
        os.rename(tmp_path, save_path)
    return save_path