def set_up(self): try: self.nlp = spacy.load(self.lang_model) except OSError: from spacy.cli.download import download download(self.lang_model) self.nlp = spacy.load(self.lang_model)
def _load_lang_model(self): # pylint: disable=import-outside-toplevel # download ScispaCy model using URL if self.lang_model in SCISPACYMODEL_URL: import subprocess import sys import os import importlib download_url = SCISPACYMODEL_URL[self.lang_model] command = [sys.executable, "-m", "pip", "install"] + [download_url] subprocess.run(command, env=os.environ.copy(), encoding="utf8", check=False) cls = importlib.import_module(self.lang_model) self.nlp = cls.load() else: # use spaCy download try: self.nlp = spacy.load(self.lang_model) except OSError: download(self.lang_model) self.nlp = spacy.load(self.lang_model)
def _setup(args): try: import spacy from spacy.cli.download import download except ImportError: print('error: required package "spacy" is not installed', file=sys.stderr) exit(1) if not args: print('error: you must pass a list of two-letter ISO 639-1 language codes to install the respective ' 'language models or the string "all" to install all available language models', file=sys.stderr) exit(2) else: try: args.pop(args.index('--no-update')) no_update = True except ValueError: no_update = False if args == ['all']: install_languages = list(DEFAULT_LANGUAGE_MODELS.keys()) else: install_languages = [] for arg in args: install_languages.extend([l for l in map(str.strip, arg.split(',')) if l]) print('checking if required spaCy data packages are installed...') try: piplist_str = subprocess.check_output([sys.executable, '-m', 'pip', 'list', '--disable-pip-version-check', '--format', 'json']) except subprocess.CalledProcessError as exc: print('error: calling pip failed with the following error message\n' + str(exc), file=sys.stderr) exit(3) piplist = json.loads(piplist_str) installed_pkgs = set(item['name'] for item in piplist) model_pkgs = dict(zip(DEFAULT_LANGUAGE_MODELS.keys(), map(lambda x: x.replace('_', '-') + '-sm', DEFAULT_LANGUAGE_MODELS.values()))) for lang in install_languages: if lang not in DEFAULT_LANGUAGE_MODELS.keys(): print('error: no language model for language code "%s"' % lang, file=sys.stderr) exit(4) lang_model_pkg = model_pkgs[lang] if no_update and lang_model_pkg in installed_pkgs: print('language model package "%s" for language code "%s" is already installed -- skipping' % (lang_model_pkg, lang)) continue lang_model = DEFAULT_LANGUAGE_MODELS[lang] + '_sm' print('installing language model "%s" for language code "%s"...' % (lang_model, lang)) download(lang_model) print('done.')
def load(self): """ If the pre-trained `en_vectors_web_lg` model is not already stored on disk, it will be automatically downloaded as part of :func:`load()`. Note that the download process may require sudo permissions depending on your python package settings. """ try: self.nlp = spacy.load('en_vectors_web_lg') except OSError: download('en_vectors_web_lg') self.nlp = spacy.load('en_vectors_web_lg')
def add_language(self, lang): print('Loading the %s model' % lang) lang_model = self.lang_dict.get(lang, lang) if lang == "ja": import ja_sudachipy self.nlp_dict[lang] = ja_sudachipy.Japanese().load(lang_model) return if info(): if lang_model not in info()['Models'] and lang in lang_dict: download(lang_model) self.nlp_dict[lang] = spacy.load(lang_model)
def setup(parser: ArgumentParser, args: Namespace) -> None: api_key = password('Enter your Google API key:').ask() config['google_api_key'] = api_key config.save() with progress_bar(transient=True) as progress: progress.add_task('[green]Downloading spaCy models[/]', start=False) download('pt_core_news_md', False, False, '-q') download('en_core_web_sm', False, False, '-q') print('\n[green]✔ You can now use Qualichat.[/green]')
def download_model(model_name: str): """ Downloads and links language trained model. >>> from nerd import ner >>> ner.download_model(model_name='en_core_web_sm') >>> supported languages 'en_core_web_sm', 'de_core_news_sm', 'fr_core_news_sm', >>> 'es_core_news_sm', 'pt_core_news_sm', 'it_core_news_sm', >>> 'nl_core_news_sm', 'el_core_news_sm', 'xx_ent_wiki_sm' :param model_name: Model package name. :type model_name: str """ download(model_name) package_path = get_package_path(model_name) link(model_name, model_name, force=True, model_path=package_path)
def main(): # faz a veirificação da presença de complementos de linguagem necessários ao nltk try: nltk.tokenize.word_tokenize('Existe nltk punkt') except LookupError: nltk.download('punkt') try: spacy.load('pt') except IOError: download('pt') config = Config('data/configuration/', 'config.json') # executa as principais funções de cada classe, lendo arquivos de entrada e criando o modelo parser = run_data_parse(config) #model = run_model(config) # salva as principais informações do dataset create_dataset_info(parser)
def load_model( *, name_or_nlp: str | Language, vocab: Union[Vocab, bool] = True, disable: Iterable[str] = None, exclude: Iterable[str] = None, keep_hyphens: bool = False, remove_whitespace_ents: bool = False, ) -> Language: if remove_whitespace_ents: Language.factories['remove_whitespace_entities'] = lambda _nlp, **_cfg: remove_whitespace_entities args: dict = skip_none_values(dict(vocab=vocab, disable=disable, exclude=exclude)) if isinstance(name_or_nlp, Language): return name_or_nlp if isinstance(name_or_nlp, str): try: nlp: Language = load(name_or_nlp, **args) except OSError: logger.info(f"not found: {name_or_nlp}, downloading...") download(name_or_nlp) nlp: Language = load(name_or_nlp, **args) # try: # name: Union[str, Language] = prepend_spacy_path(name_or_nlp) # nlp: Language = load(name, **args) # except OSError: # ... if keep_hyphens: nlp.tokenizer = keep_hyphen_tokenizer(nlp) return nlp
def run( model_path: Optional[str] = None, ensure_model: Optional[str] = None, split_mode: Optional[str] = None, hash_comment: str = "print", output_path: Optional[Path] = None, output_format: str = "0", require_gpu: int = -1, disable_sentencizer: bool = False, use_normalized_form: bool = False, parallel_level: int = 1, files: List[str] = None, ): if output_format in ["3", "json"] and hash_comment != "analyze": print( f'hash_comment="{hash_comment}" not permitted for JSON output. Forced to use hash_comment="analyze".', file=sys.stderr) assert parallel_level == 1 or require_gpu == -1, "require_gpu not allowed for multi-processing. https://github.com/explosion/spaCy/issues/5507" if parallel_level <= 0: level = max(1, cpu_count() + parallel_level) if output_format in [2, "mecab"]: if require_gpu >= 0: print("GPU not used for mecab mode", file=sys.stderr) require_gpu = False elif parallel_level <= 0: if require_gpu >= 0: if level < 4: print( f"GPU #{require_gpu} enabled: parallel_level' set to {level}", end="", file=sys.stderr) else: print( f"GPU #{require_gpu} enabled: parallel_level' set to {level} but seems it's too much", end="", file=sys.stderr) else: print(f"'parallel_level' set to {level}", file=sys.stderr) elif require_gpu: print(f"GPU #{require_gpu} enabled", file=sys.stderr) parallel_level = level assert model_path is None or ensure_model is None if ensure_model: ensure_model = ensure_model.replace("-", "_") try: from importlib import import_module import_module(ensure_model) except ModuleNotFoundError: if GINZA_MODEL_PATTERN.match(ensure_model): print("Installing", ensure_model, file=sys.stderr) import pip pip.main(["install", ensure_model]) print("Successfully installed", ensure_model, file=sys.stderr) elif SPACY_MODEL_PATTERN.match(ensure_model): print("Installing", ensure_model, file=sys.stderr) from spacy.cli.download import download download(ensure_model) print("Successfully installed", ensure_model, file=sys.stderr) else: raise OSError( "E050", f'You need to install "{ensure_model}" before executing ginza.' ) model_name_or_path = ensure_model else: model_name_or_path = model_path analyzer = Analyzer( model_name_or_path, split_mode, hash_comment, output_format, require_gpu, disable_sentencizer, use_normalized_form, ) output = _OutputWrapper(output_path, output_format) output.open() try: if not files and sys.stdin.isatty(): _analyze_tty(analyzer, output) else: if not files: files = [0] if parallel_level == 1: _analyze_single(analyzer, output, files) else: _analyze_parallel(analyzer, output, files, parallel_level) finally: output.close()
def download_model_by_name(*, model_name: str): download(model_name)
#!/usr/bin/env python # coding: utf-8 from spacy.cli.download import download, link download(model='en_core_web_sm') link(origin='en_core_web_sm', link_name='en') import nltk nltk.download('punkt')
import string from chatterbot import languages import spacy from spacy.cli.download import download download(model="en_core_web_sm") class LowercaseTagger(object): """ Returns the text in lowercase. """ def __init__(self, language=None): self.language = language or languages.ENG def get_text_index_string(self, text): return text.lower() class PosLemmaTagger(object): def __init__(self, language=None): import spacy self.language = language or languages.ENG self.punctuation_table = str.maketrans( dict.fromkeys(string.punctuation)) if self.language.ISO_639_1.lower() == 'en': self.nlp = spacy.load('en_core_web_sm') else: self.nlp = spacy.load(self.language.ISO_639_1.lower())
class NoTokenizer(object): def __init__(self, vocab): self.vocab = vocab def __call__(self, tokens): spaces = [True] * len(tokens) return Doc(self.vocab, words=tokens, spaces=spaces) try: lemmatizer = spacy.load('en', disable=['parser', 'ner']) except OSError: # Assume the problem was the spacy models were not downloaded from spacy.cli.download import download download('en') lemmatizer = spacy.load('en', disable=['parser', 'ner']) lemmatizer.tokenizer = NoTokenizer(lemmatizer.vocab) def get_sparse_prob_indices(probs, alpha=0.0): """ Here alpha is used to make a probability distribution sparse by limiting the maximum allowed relative decay between sorted probabilities """ if probs.shape[0] == 1: return np.array([0]) # sort the probabilities and compute the ratio from larger to smaller
def init( self, snips_path: Optional[str] = os.path.join(HERE, "engine.tar.gz"), transformations: Optional[str] = os.path.join(HERE, "transformations.json"), srl_predictor_path: Optional[ str] = "https://s3-us-west-2.amazonaws.com/allennlp/models/srl-model-2018.05.25.tar.gz", spacy_lang: Optional[str] = "en_core_web_md", neuralcoref: bool = True, core_nlp_server_url: Optional[str] = "http://localhost:9000", ) -> None: logger.debug("Initializing shared resources") if core_nlp_server_url: logger.debug("Connecting to stanford CoreNLP server %s", core_nlp_server_url) import requests try: requests.head(core_nlp_server_url).ok self._core_nlp_server_url = core_nlp_server_url except IOError: logger.exception("During HEAD request:") logger.warn( "Failed to load CoreNLP server %s, make sure it is live and ready," " continuing without it.", core_nlp_server_url, ) if srl_predictor_path: logger.debug("Initiating allennlp SRL server with model from %s", srl_predictor_path) from multiprocessing import Process, Queue self._srl_qi = Queue() self._srl_qo = Queue() self._srl_count = 0 p = Process( target=_predictor_server, args=(srl_predictor_path, self._srl_qi, self._srl_qo), daemon=True, ) p.start() @atexit.register def cleanup(): logger.debug("Calling cleanup") self._srl_qi.put(None, timeout=0.5) self._srl_qi.close() self._srl_qo.close() p.join(timeout=5) if p.is_alive(): logger.error("Killing SRL server") p.kill() if snips_path: logger.debug("Loading snips engine from %s", snips_path) from snips_nlu import SnipsNLUEngine if os.path.isdir(snips_path): logger.debug("%s is a directory, loading directly", snips_path) self._engine = SnipsNLUEngine.from_path(snips_path) else: with tarfile.open(snips_path, "r:gz") as archive: with TemporaryDirectory() as tmp: archive.extractall(tmp) logger.debug( "Extracted to temporary dir %s, loading from there", tmp) self._engine = SnipsNLUEngine.from_path( os.path.join(tmp, "engine")) if transformations: logger.debug("Loading transformations file from %s", transformations) with open(transformations) as f: self._transformations = json.load(f) if spacy_lang: logger.debug("Loading spacy lang %s", spacy_lang) try: module = importlib.import_module(spacy_lang) except ModuleNotFoundError: from spacy.cli.download import download download(spacy_lang) module = importlib.import_module(spacy_lang) self._spacy = module.load() if neuralcoref: if self._spacy is None: raise ValueError( "neuralcoref is set but no spacy model is loaded") import neuralcoref neuralcoref.add_to_pipe(self._spacy) if self._spacy and self._core_nlp_server_url: from spacy.tokens import Token Token.set_extension("quote", default=None, force=True)
def set_up(self): try: self.nlp = spacy.load(self.lang_model) except OSError: download(self.lang_model) self.nlp = spacy.load(self.lang_model)
# Hyperparameter ## to set the amount of entities which should be saved to for each article amount_of_entities = 3 # Name of the csv file with the data PATH_LABELED_DATA = './data_files/labeled_data.csv' PATH_UNLABELED_DATA = './data_files/data.csv' # Indexname for elasticSearch INDEX = 'labeled-news-english' # lenght of char around the entitiy char_length = 20 # logging.basicConfig(level=logging.ERROR) # checks if the corpus is in the container and if needed downloaded it download('en_core_web_sm') # loads the spacy corpus nlp = spacy.load('en_core_web_sm') # connects to elasticsearch es = connect_elasticsearch() # ensures that an connection is successfull before perfom the following steps if es is not None: # loads the data set from which the entities shoudl be taken dataset = get_labeled_data(PATH_LABELED_DATA) # creates the amount of entities of each article entities = NER_spacy(dataset, amount_of_entities, char_length)
def load( tag: t.Union[str, Tag], model_store: "ModelStore" = Provide[BentoMLContainer.model_store], vocab: t.Union["Vocab", bool] = True, # type: ignore[reportUnknownParameterType] disable: t.Sequence[str] = tuple(), exclude: t.Sequence[str] = tuple(), config: t.Union[t.Dict[str, t.Any], "Config", None] = None, ) -> "spacy.language.Language": """ Load a model from BentoML local modelstore with given name. Args: tag (:code:`Union[str, Tag]`): Tag of a saved model in BentoML local modelstore. model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`): BentoML modelstore, provided by DI Container. vocab (:code:`Union[spacy.vocab.Vocab, bool]`, `optional`, defaults to `True`): Optional vocab to pass in on initialization. If True, a new Vocab object will be created. disable (`Sequence[str]`, `optional`): Names of pipeline components to disable. exclude (`Sequence[str]`, `optional`): Names of pipeline components to exclude. Excluded components won't be loaded. config (:code:`Union[Dict[str, Any], spacy.Config]`, `optional`): Config overrides as nested dict or dict keyed by section values in dot notation. Returns: :obj:`spacy.language.Language`: an instance of :obj:`spacy.Language` from BentoML modelstore. Examples: .. code-block:: python import bentoml model = bentoml.spacy.load('custom_roberta') """ model = model_store.get(tag) if model.info.module not in (MODULE_NAME, __name__): raise BentoMLException( f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}." ) if "projects_uri" in model.info.options: raise BentoMLException( "Cannot use `bentoml.spacy.load()` to load Spacy Projects. Use" " `bentoml.spacy.load_project()` instead." ) required = model.info.options["pip_package"] try: _ = importlib.import_module(required) except ModuleNotFoundError: try: from spacy.cli.download import download # TODO: move this to runner on startup hook download(required) except (SystemExit, Exception): # pylint: disable=broad-except logger.warning( f"{required} cannot be downloaded as pip package. If this" " is a custom pipeline there is nothing to worry about." " If this is a pretrained model provided by Explosion make" " sure that you save the correct package and model to BentoML" " via `bentoml.spacy.save()`" ) try: # check if pipeline has additional requirements then all related # pip package has been installed correctly. additional = model.info.options["additional_requirements"] not_existed = list() # type: t.List[str] dists = packages_distributions() for module_name in additional: mod, _ = split_requirement(module_name) if mod not in dists: not_existed.append(module_name) if len(not_existed) > 0: raise MissingDependencyException( f"`{','.join(not_existed)}` is required by `{tag}`." ) except KeyError: pass import spacy.util return spacy.util.load_model( model.path, vocab=vocab, disable=disable, exclude=exclude, config=config if config else {}, )
import json import ibm_watson import json import db from ibm_cloud_sdk_core.authenticators import IAMAuthenticator INTENT_DOCTOR = 'Register_Agent' INTENT_COMMUNITY = 'Register_Community' model_name = "en_core_web_sm" import spacy import csv from spacy.cli.download import download download(model_name) #from spacy.cli import link #from spacy.util import get_package_path #package_path = get_package_path(model_name) #link(model_name, model_name, force=True, package_path=package_path) nlp = spacy.load('en_core_web_sm') import logging logging.getLogger("scapy.runtime").setLevel(logging.ERROR) replytext = 'empty msg' with open('config.json') as config_file: config = json.load(config_file) authenticator = IAMAuthenticator(config["ibm_assistant"]["iam_apikey"])
from flask import Flask from chatterbot import ChatBot from chatterbot.trainers import ChatterBotCorpusTrainer import os import spacy from spacy.cli.download import download download(model="en") app = Flask(__name__) chatterbot = ChatBot("Jarvis", storage_adapter="chatterbot.storage.SQLStorageAdapter", read_only=False, database_uri=os.environ['DATABASE_URL']) # To train with default english corpus # chatterbot.set_trainer(ChatterBotCorpusTrainer) # chatterbot.train( # "chatterbot.corpus.english" # ) # Create a new trainer for the chatbot trainer = ChatterBotCorpusTrainer(chatterbot) # Train the chatbot based on the english corpus trainer.train( "chatterbot.corpus.english.ai", "chatterbot.corpus.english.botprofile", "chatterbot.corpus.english.computers", "chatterbot.corpus.english.conversations",