def get_config(): """ Get key configuration info about dev environment: OS, python, spacy, and textacy. Returns: dict """ from spacy.about import __version__ as spacy_version from spacy.util import get_data_path from .about import __version__ as textacy_version return { "platform": sys.platform, "python": sys.version, "spacy": spacy_version, "spacy_models": [ d.parts[-1] for d in get_data_path().iterdir() if (d.is_dir() or d.is_symlink()) and d.parts[-1] not in {"__cache__", "__pycache__"} ], "textacy": textacy_version, }
def model_installed(name): ''' Check if spaCy language model is installed :param name: :return: ''' data_path = util.get_data_path() model_path = data_path / name return model_path.exists()
def model_installed(name): ''' Check if spaCy language model is installed :param name: :return: ''' data_path = util.get_data_path() model_name = resolve_model_name(name) model_path = data_path / model_name if not model_path.exists(): lang_name = util.get_lang_class(name).lang return False return True
def vocab(): path = os.environ.get('SPACY_DATA') if path is None: path = util.match_best_version('en', None, util.get_data_path()) else: path = util.match_best_version('en', None, path) vocab = English.Defaults('en', path).Vocab() lex = vocab['dog'] assert vocab[vocab.strings['dog']].orth_ == 'dog' lex = vocab['the'] lex = vocab['quick'] lex = vocab['jumped'] return vocab
def vocab(): path = os.environ.get('SPACY_DATA') if path is None: path = util.match_best_version('en', None, util.get_data_path()) else: path = util.match_best_version('en', None, path) vocab = English.Defaults.create_vocab() lex = vocab['dog'] assert vocab[vocab.strings['dog']].orth_ == 'dog' lex = vocab['the'] lex = vocab['quick'] lex = vocab['jumped'] return vocab
def get_config(): """Helper function to get relevant config info, especially when debugging.""" from spacy.about import __version__ as spacy_version from spacy.util import get_data_path from textacy import __version__ as textacy_version return { 'python': sys.version, 'platform': sys.platform, 'textacy': textacy_version, 'spacy': spacy_version, 'spacy_models': [ d.parts[-1] for d in get_data_path().iterdir() if (d.is_dir() or d.is_symlink()) and d.parts[-1] not in {'__cache__', '__pycache__'}] }
def model_installed(name): """Check if spaCy language model is installed. From https://github.com/explosion/spaCy/blob/master/spacy/util.py :param name: :return: """ data_path = util.get_data_path() if not data_path or not data_path.exists(): raise IOError(f"Can't find spaCy data path: {data_path}") if name in {d.name for d in data_path.iterdir()}: return True if Spacy.is_package(name): # installed as package return True if Path(name).exists(): # path to model data directory return True return False
def model_installed(name): ''' Check if spaCy language model is installed From https://github.com/explosion/spaCy/blob/master/spacy/util.py :param name: :return: ''' data_path = util.get_data_path() if not data_path or not data_path.exists(): raise IOError("Can't find spaCy data path: %s" % str(data_path)) if name in set([d.name for d in data_path.iterdir()]): return True if SpacyTokenizer.is_package(name): # installed as package return True if Path(name).exists(): # path to model data directory return True return False
def model_installed(name: str): """Check if spaCy language model is installed From https://github.com/explosion/spaCy/blob/master/spacy/util.py Arguments: name {str} -- Name of package Returns: [bool] -- True if installed package, False if not. """ data_path = util.get_data_path() if not data_path or not data_path.exists(): raise IOError("Can't find spaCy data path: %s" % str(data_path)) if name in set([d.name for d in data_path.iterdir()]): return True if SpacyAnnotator.is_package(name): # installed as package return True if Path(name).exists(): # path to model data directory return True return False
def path(): return util.match_best_version( 'en', None, os.environ.get('SPACY_DATA', util.get_data_path()))
def path(): if 'SPACY_DATA' in os.environ: return pathlib.Path(os.environ['SPACY_DATA']) else: return util.match_best_version('en', None, util.get_data_path())
from spacy.util import get_data_path data_path = get_data_path() print([p for p in data_path.iterdir()]) print([p for p in data_path.iterdir() if p.is_dir()]) print([p.parts[-1] for p in data_path.iterdir()]) print([p.parts[-1] for p in data_path.iterdir() if p.is_dir()])