Python download_file_and_uncompress примеры использования

Язык программирования: Python

Пространство имен/Пакет: paddlehub.common.downloader.default_downloader

Метод/Функция: download_file_and_uncompress

Примеров на hotexamples.com: 9

Python download_file_and_uncompress - 9 примеров найдено. Это лучшие примеры Python кода для paddlehub.common.downloader.default_downloader.download_file_and_uncompress, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: cmrc2018_evaluate.py Проект: wxm2020/PaddleHub

def mixed_segmentation(in_str, rm_punc=False):
    nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data")
    tokenizers_path = os.path.join(nltk_path, "tokenizers")
    punkt_path = os.path.join(tokenizers_path, "punkt")
    if not os.path.exists(punkt_path):
        default_downloader.download_file_and_uncompress(
            url=_PUNKT_URL, save_path=tokenizers_path, print_progress=True)
    nltk.data.path.append(nltk_path)

    in_str = str(in_str).lower().strip()
    segs_out = []
    temp_str = ""
    sp_char = [
        '-', ':', '_', '*', '^', '/', '\\', '~', '`', '+', '=', '，', '。', '：',
        '？', '！', '“', '”', '；', '’', '《', '》', '……', '·', '、', '「', '」', '（',
        '）', '－', '～', '『', '』'
    ]
    for char in in_str:
        if rm_punc and char in sp_char:
            continue
        if re.search(r'[\u4e00-\u9fa5]', char) or char in sp_char:
            if temp_str != "":
                ss = nltk.word_tokenize(temp_str)
                segs_out.extend(ss)
                temp_str = ""
            segs_out.append(char)
        else:
            temp_str += char

    # handling last part
    if temp_str != "":
        ss = nltk.word_tokenize(temp_str)
        segs_out.extend(ss)

    return segs_out

Пример #2

Показать файл

Файл: glue.py Проект: VVJY/PaddleHub

    def __init__(self, sub_dataset='SST-2'):
        # sub_dataset : CoLA, MNLI, MRPC, QNLI, QQP, RTE, SST-2, STS-B
        if sub_dataset not in [
                'CoLA', 'MNLI', 'MNLI_m', 'MNLI_mm', 'MRPC', 'QNLI', 'QQP',
                'RTE', 'SST-2', 'STS-B'
        ]:
            raise Exception(
                sub_dataset +
                " is not in GLUE benchmark. Please confirm the data set")
        self.mismatch = False
        if sub_dataset == 'MNLI_mm':
            sub_dataset = 'MNLI'
            self.mismatch = True
        elif sub_dataset == 'MNLI_m':
            sub_dataset = 'MNLI'
        self.sub_dataset = sub_dataset
        self.dataset_dir = os.path.join(DATA_HOME, "glue_data")

        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_dev_examples()
        self._load_test_examples()
        self._load_predict_examples()

Пример #3

Показать файл

 def _init_with_url(self, url):
     utils.check_url(url)
     result, tips, module_dir = default_downloader.download_file_and_uncompress(
         url, save_path=".")
     if not result:
         logger.error(tips)
         exit(1)
     self._init_with_module_file(module_dir)

Пример #4

Показать файл

Файл: squad.py Проект: jayhenry/PaddleHub

 def __init__(self, version_2_with_negative=False):
     self.dataset_dir = os.path.join(DATA_HOME, "squad_data")
     if not os.path.exists(self.dataset_dir):
         ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
             url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
     else:
         logger.info("Dataset {} already cached.".format(self.dataset_dir))
     self.version_2_with_negative = version_2_with_negative
     self._load_train_examples(version_2_with_negative, if_has_answer=True)
     self._load_dev_examples(version_2_with_negative, if_has_answer=True)

Пример #5

Показать файл

 def _download_dataset(self, dataset_path, url):
     if not os.path.exists(dataset_path):
         result, tips, dataset_path = default_downloader.download_file_and_uncompress(
             url=url,
             save_path=hub.common.dir.DATA_HOME,
             print_progress=True,
             replace=True)
         if not result:
             print(tips)
             exit()
     return dataset_path

Пример #6

Показать файл

Файл: msra_ner.py Проект: zhongyunuestc/PaddleHub

    def __init__(self):
        self.dataset_dir = os.path.join(DATA_HOME, "msra_ner")
        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()

Пример #7

Показать файл

 def _download_dataset(self, dataset_path, url):
     if not os.path.exists(dataset_path):
         result, tips, dataset_path = default_downloader.download_file_and_uncompress(
             url=url,
             save_path=hub.common.dir.DATA_HOME,
             print_progress=True,
             replace=True)
         if not result:
             raise Exception(tips)
     else:
         logger.info("Dataset {} already cached.".format(dataset_path))
     return dataset_path

Пример #8

Показать файл

    def __init__(self, language='zh'):
        if language not in [
                "ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw",
                "th", "tr", "ur", "vi", "zh"
        ]:
            raise Exception(language +
                            "is not in XNLI. Please confirm the language")
        self.language = language
        self.dataset_dir = os.path.join(DATA_HOME, "XNLI-lan")

        if not os.path.exists(self.dataset_dir):
            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
        else:
            logger.info("Dataset {} already cached.".format(self.dataset_dir))

        self._load_train_examples()
        self._load_test_examples()
        self._load_dev_examples()

Пример #9

Показать файл

Файл: module.py Проект: zfzf1990/PaddleHub

for dependency in ["ruamel", "parakeet", "soundfile", "librosa"]:
    if not importlib.util.find_spec(dependency):
        lack_dependency.append(dependency)

# Accelerate NLTK package download via paddlehub. 'import parakeet' will use the package.
_PUNKT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/punkt.tar.gz"
_CMUDICT_URL = "https://paddlehub.bj.bcebos.com/paddlehub-thirdparty/cmudict.tar.gz"
nltk_path = os.path.join(THIRD_PARTY_HOME, "nltk_data")
tokenizers_path = os.path.join(nltk_path, "tokenizers")
corpora_path = os.path.join(nltk_path, "corpora")
punkt_path = os.path.join(tokenizers_path, "punkt")
cmudict_path = os.path.join(corpora_path, "cmudict")

if not os.path.exists(punkt_path):
    default_downloader.download_file_and_uncompress(url=_PUNKT_URL,
                                                    save_path=tokenizers_path,
                                                    print_progress=True)
if not os.path.exists(cmudict_path):
    default_downloader.download_file_and_uncompress(url=_CMUDICT_URL,
                                                    save_path=corpora_path,
                                                    print_progress=True)
nltk.data.path.append(nltk_path)

if not lack_dependency:
    import soundfile as sf
    import librosa
    from ruamel import yaml
    from parakeet.models.fastspeech.fastspeech import FastSpeech as FastSpeechModel
    from parakeet.g2p.en import text_to_sequence
    from parakeet.models.transformer_tts.utils import *
    from parakeet.utils import io