Exemplo n.º 1
0
def download(url, path):
    if os.path.isfile(path):
        print('使用本地 {}, 忽略 {}'.format(path, url))
        return True
    else:
        print('下载 {} 到 {}'.format(url, path))
        tmp_path = '{}.downloading'.format(path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(url,
                                    tmp_path,
                                    4,
                                    headers={
                                        'User-agent':
                                        'pyhanlp (' + platform.platform() + ')'
                                    })
            downloader.subscribe(
                DownloadCallback(show_header=False, out=sys.stdout))
            downloader.start_sync()
        except BaseException as e:
            eprint('下载失败 {} 由于 {}'.format(url, repr(e)))
            doc_url = 'https://od.hankcs.com/book/intro_nlp/'
            eprint('请参考 %s 执行手动安装.' % doc_url)
            eprint('或手动下载 {} 到 {}'.format(url, path))
            if os.path.isfile(tmp_path):
                os.remove(tmp_path)
            browser_open(doc_url)
            exit(1)
        remove_file(path)
        os.rename(tmp_path, path)
    return True
Exemplo n.º 2
0
def download(url,
             save_path=None,
             save_dir=hanlp_home(),
             prefix=HANLP_URL,
             append_location=True,
             verbose=HANLP_VERBOSE):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        if verbose:
            eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        if verbose:
            eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(
                url,
                tmp_path,
                4,
                headers={
                    'User-agent':
                    f'HanLP/{__version__} ({platform.platform()})'
                })
            if verbose:
                downloader.subscribe(DownloadCallback(show_header=False))
            downloader.start_sync()
        except BaseException as e:
            remove_file(tmp_path)
            url = url.split('#')[0]
            if not windows():
                hints_for_download = f'e.g. \nwget {url} -O {save_path}\n'
            else:
                hints_for_download = ' Use some decent downloading tools.\n'
            if not url.startswith(HANLP_URL):
                hints_for_download += 'For third party data, you may find it on our mirror site:\n' \
                                      'https://od.hankcs.com/hanlp/data/\n'
            installed_version, latest_version = check_outdated()
            if installed_version != latest_version:
                hints_for_download += f'Or upgrade to the latest version({latest_version}):\npip install -U hanlp'
            message = f'Download failed due to [red]{repr(e)}[/red]. Please download it to {save_path} by yourself. ' \
                      f'[yellow]{hints_for_download}[/yellow]' \
                      'See https://hanlp.hankcs.com/docs/install.html#install-models for instructions.'
            if verbose:
                cprint(message)
            if hasattr(e, 'msg'):
                e.msg += '\n' + remove_color_tag(message)
            raise e
        remove_file(save_path)
        os.rename(tmp_path, save_path)
    return save_path
Exemplo n.º 3
0
def download(url, save_path=None, save_dir=hanlp_home(), prefix=HANLP_URL, append_location=True, verbose=HANLP_VERBOSE):
    if not save_path:
        save_path = path_from_url(url, save_dir, prefix, append_location)
    if os.path.isfile(save_path):
        if verbose:
            eprint('Using local {}, ignore {}'.format(save_path, url))
        return save_path
    else:
        makedirs(parent_dir(save_path))
        if verbose:
            eprint('Downloading {} to {}'.format(url, save_path))
        tmp_path = '{}.downloading'.format(save_path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(url, tmp_path, 4, headers={
                'User-agent': f'HanLP/{__version__} ({platform.platform()})'})
            if verbose:
                downloader.subscribe(DownloadCallback(show_header=False))
            downloader.start_sync()
        except BaseException as e:
            remove_file(tmp_path)
            url = url.split('#')[0]
            try:
                installed_version, latest_version = check_outdated()
            except:
                installed_version, latest_version = None, None  # No Internet
            if installed_version != latest_version:
                # Always prompt user to upgrade whenever a new version is available
                hints = f'[green]Please upgrade to the latest version ({latest_version}) with:[/green]' \
                        f'\n\n\t[yellow]pip install -U hanlp[/yellow]\n'
            else:  # Otherwise, prompt user to re-try
                hints = f'[green]Please re-try or download it to {save_path} by yourself '
                if not windows():
                    hints += f'with:[/green]\n\n\t[yellow]wget {url} -O {save_path}[/yellow]\n\n'
                else:
                    hints += 'using some decent downloading tools.[/green]\n'
                if not url.startswith(HANLP_URL):
                    hints += 'For third party data, you may find it on our mirror site:\n' \
                             'https://od.hankcs.com/hanlp/data/\n'
                hints += 'See also https://hanlp.hankcs.com/docs/install.html#install-models for instructions.'
            message = f'Download failed due to [red]{repr(e)}[/red].\n' \
                      f'{hints}'
            if verbose:
                cprint(message)
            if hasattr(e, 'msg'):
                e.msg += '\n' + remove_color_tag(message)
            elif hasattr(e, 'args') and e.args and isinstance(e.args, tuple) and isinstance(e.args[0], str):
                e.args = (e.args[0] + '\n' + remove_color_tag(message),) + e.args[1:]
            raise e from None
        remove_file(save_path)
        os.rename(tmp_path, save_path)
    return save_path
Exemplo n.º 4
0
def download(url, path):
    opener = urllib.build_opener()
    opener.addheaders = [(
        'User-agent',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
    )]
    urllib.install_opener(opener)
    if os.path.isfile(path):
        print('使用本地 {}, 忽略 {}'.format(path, url))
        return True
    else:
        print('下载 {} 到 {}'.format(url, path))
        tmp_path = '{}.downloading'.format(path)
        remove_file(tmp_path)
        try:
            downloader = Downloader(
                url,
                tmp_path,
                4,
                headers={
                    'User-agent':
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
                })
            downloader.subscribe(
                DownloadCallback(show_header=False, out=sys.stdout))
            downloader.start_sync()
        except BaseException as e:
            eprint('下载失败 {} 由于 {}'.format(url, repr(e)))
            doc_url = 'https://od.hankcs.com/book/intro_nlp/'
            eprint('请参考 %s 执行手动安装.' % doc_url)
            eprint('或手动下载 {} 到 {}'.format(url, path))
            if os.path.isfile(tmp_path):
                os.remove(tmp_path)
            browser_open(doc_url)
            exit(1)
        remove_file(path)
        os.rename(tmp_path, path)
    return True