def from_path(cls, path, **shared): """Loads a :class:`CRFSlotFiller` instance from a path The data at the given path must have been generated using :func:`~CRFSlotFiller.persist` """ path = Path(path) model_path = path / "slot_filler.json" if not model_path.exists(): raise LoadingError("Missing slot filler model file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: model = json.load(f) slot_filler_config = cls.config_type.from_dict(model["config"]) slot_filler = cls(config=slot_filler_config, **shared) slot_filler.language = model["language_code"] slot_filler.intent = model["intent"] slot_filler.slot_name_mapping = model["slot_name_mapping"] crf_model_file = model["crf_model_file"] if crf_model_file is not None: crf = _crf_model_from_path(path / crf_model_file) slot_filler.crf_model = crf return slot_filler
def from_path(cls, path, **shared): path = Path(path) model_path = path / "vectorizer.json" if not model_path.exists(): raise LoadingError("Missing vectorizer model file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: vectorizer_dict = json.load(f) config = vectorizer_dict.pop("config") self = cls(config, **shared) self._language = vectorizer_dict["language_code"] self._word_pairs = None builtin_entity_scope = vectorizer_dict["builtin_entity_scope"] if builtin_entity_scope is not None: builtin_entity_scope = set(builtin_entity_scope) self.builtin_entity_scope = builtin_entity_scope if vectorizer_dict["word_pairs"]: self._word_pairs = { tuple(p): int(i) for i, p in iteritems(vectorizer_dict["word_pairs"]) } return self
def from_path(cls, path, **shared): path = Path(path) model_path = path / "featurizer.json" if not model_path.exists(): raise LoadingError("Missing featurizer model file: %s" % model_path.name) with model_path.open("r", encoding="utf-8") as f: featurizer_dict = json.load(f) featurizer_config = featurizer_dict["config"] featurizer = cls(featurizer_config, **shared) featurizer.language = featurizer_dict["language_code"] tfidf_vectorizer = featurizer_dict["tfidf_vectorizer"] if tfidf_vectorizer: vectorizer_path = path / featurizer_dict["tfidf_vectorizer"] tfidf_vectorizer = TfidfVectorizer.from_path( vectorizer_path, **shared) featurizer.tfidf_vectorizer = tfidf_vectorizer cooccurrence_vectorizer = featurizer_dict["cooccurrence_vectorizer"] if cooccurrence_vectorizer: vectorizer_path = path / featurizer_dict["cooccurrence_vectorizer"] cooccurrence_vectorizer = CooccurrenceVectorizer.from_path( vectorizer_path, **shared) featurizer.cooccurrence_vectorizer = cooccurrence_vectorizer return featurizer
def load_from_path(cls, unit_path, unit_name=None, **shared): """Load a :class:`ProcessingUnit` from a persisted processing unit directory Args: unit_path (str or :class:`pathlib.Path`): path to the persisted processing unit unit_name (str, optional): Name of the processing unit to load. By default, the unit name is assumed to be stored in a "metadata.json" file located in the directory at unit_path. Raises: LoadingError: when unit_name is None and no metadata file is found in the processing unit directory """ unit_path = Path(unit_path) if unit_name is None: metadata_path = unit_path / "metadata.json" if not metadata_path.exists(): raise LoadingError( "Missing metadata for processing unit at path %s" % str(unit_path)) with metadata_path.open(encoding="utf8") as f: metadata = json.load(f) unit_name = metadata["unit_name"] unit = cls.by_name(unit_name) return unit.from_path(unit_path, **shared)
def from_path(cls, path, **shared): """Loads a :class:`SnipsNLUEngine` instance from a directory path The data at the given path must have been generated using :func:`~SnipsNLUEngine.persist` Args: path (str): The path where the nlu engine is stored Raises: LoadingError: when some files are missing IncompatibleModelError: when trying to load an engine model which is not compatible with the current version of the lib """ directory_path = Path(path) model_path = directory_path / "nlu_engine.json" if not model_path.exists(): raise LoadingError("Missing nlu engine model file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: model = json.load(f) model_version = model.get("model_version") if model_version is None or model_version != __model_version__: raise IncompatibleModelError(model_version) dataset_metadata = model["dataset_metadata"] if shared.get(RESOURCES) is None and dataset_metadata is not None: language = dataset_metadata["language_code"] resources_dir = directory_path / "resources" / language if resources_dir.is_dir(): resources = load_resources_from_dir(resources_dir) shared[RESOURCES] = resources if shared.get(BUILTIN_ENTITY_PARSER) is None: path = model["builtin_entity_parser"] if path is not None: parser_path = directory_path / path shared[BUILTIN_ENTITY_PARSER] = BuiltinEntityParser.from_path( parser_path) if shared.get(CUSTOM_ENTITY_PARSER) is None: path = model["custom_entity_parser"] if path is not None: parser_path = directory_path / path shared[CUSTOM_ENTITY_PARSER] = CustomEntityParser.from_path( parser_path) config = cls.config_type.from_dict(model["config"]) nlu_engine = cls(config=config, **shared) nlu_engine.dataset_metadata = dataset_metadata intent_parsers = [] for parser_idx, parser_name in enumerate(model["intent_parsers"]): parser_config = config.intent_parsers_configs[parser_idx] intent_parser_path = directory_path / parser_name intent_parser = IntentParser.load_from_path( intent_parser_path, parser_config.unit_name, **shared) intent_parsers.append(intent_parser) nlu_engine.intent_parsers = intent_parsers return nlu_engine
def from_path(cls, path, **shared): """Loads a :class:`DeterministicIntentParser` instance from a path The data at the given path must have been generated using :func:`~DeterministicIntentParser.persist` """ path = Path(path) model_path = path / "intent_parser.json" if not model_path.exists(): raise LoadingError( "Missing deterministic intent parser metadata file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: metadata = json.load(f) return cls.from_dict(metadata, **shared)
def from_path(cls, path, **shared): import numpy as np import scipy.sparse as sp from sklearn.feature_extraction.text import (TfidfTransformer, TfidfVectorizer as SklearnTfidfVectorizer) path = Path(path) model_path = path / "vectorizer.json" if not model_path.exists(): raise LoadingError("Missing vectorizer model file: %s" % model_path.name) with model_path.open("r", encoding="utf-8") as f: vectorizer_dict = json.load(f) vectorizer = cls(vectorizer_dict["config"], **shared) vectorizer._language = vectorizer_dict["language_code"] builtin_entity_scope = vectorizer_dict["builtin_entity_scope"] if builtin_entity_scope is not None: builtin_entity_scope = set(builtin_entity_scope) vectorizer.builtin_entity_scope = builtin_entity_scope vectorizer_ = vectorizer_dict["vectorizer"] if vectorizer_: vocab = vectorizer_["vocab"] idf_diag_data = vectorizer_["idf_diag"] idf_diag_data = np.array(idf_diag_data) idf_diag_shape = (len(idf_diag_data), len(idf_diag_data)) row = list(range(idf_diag_shape[0])) col = list(range(idf_diag_shape[0])) idf_diag = sp.csr_matrix((idf_diag_data, (row, col)), shape=idf_diag_shape) tfidf_transformer = TfidfTransformer() tfidf_transformer._idf_diag = idf_diag vectorizer_ = SklearnTfidfVectorizer( tokenizer=lambda x: tokenize_light(x, vectorizer._language)) vectorizer_.vocabulary_ = vocab vectorizer_._tfidf = tfidf_transformer vectorizer._tfidf_vectorizer = vectorizer_ return vectorizer
def from_path(cls, path, **shared): """Loads a :class:`LogRegIntentClassifier` instance from a path The data at the given path must have been generated using :func:`~LogRegIntentClassifier.persist` """ import numpy as np from sklearn.linear_model import SGDClassifier path = Path(path) model_path = path / "intent_classifier.json" if not model_path.exists(): raise LoadingError("Missing intent classifier model file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: model_dict = json.load(f) # Create the classifier config = LogRegIntentClassifierConfig.from_dict(model_dict["config"]) intent_classifier = cls(config=config, **shared) intent_classifier.intent_list = model_dict['intent_list'] # Create the underlying SGD classifier sgd_classifier = None coeffs = model_dict['coeffs'] intercept = model_dict['intercept'] t_ = model_dict["t_"] if coeffs is not None and intercept is not None: sgd_classifier = SGDClassifier(**LOG_REG_ARGS) sgd_classifier.coef_ = np.array(coeffs) sgd_classifier.intercept_ = np.array(intercept) sgd_classifier.t_ = t_ intent_classifier.classifier = sgd_classifier # Add the featurizer featurizer = model_dict['featurizer'] if featurizer is not None: featurizer_path = path / featurizer intent_classifier.featurizer = Featurizer.from_path( featurizer_path, **shared) return intent_classifier
def from_path(cls, path, **shared): """Loads a :class:`ProbabilisticIntentParser` instance from a path The data at the given path must have been generated using :func:`~ProbabilisticIntentParser.persist` """ path = Path(path) model_path = path / "intent_parser.json" if not model_path.exists(): raise LoadingError( "Missing probabilistic intent parser model file: %s" % model_path.name) with model_path.open(encoding="utf8") as f: model = json.load(f) config = cls.config_type.from_dict(model["config"]) parser = cls(config=config, **shared) classifier = None intent_classifier_path = path / "intent_classifier" if intent_classifier_path.exists(): classifier_unit_name = config.intent_classifier_config.unit_name classifier = IntentClassifier.load_from_path( intent_classifier_path, classifier_unit_name, **shared) slot_fillers = dict() slot_filler_unit_name = config.slot_filler_config.unit_name for slot_filler_conf in model["slot_fillers"]: intent = slot_filler_conf["intent"] slot_filler_path = path / slot_filler_conf["slot_filler_name"] slot_filler = SlotFiller.load_from_path(slot_filler_path, slot_filler_unit_name, **shared) slot_fillers[intent] = slot_filler parser.intent_classifier = classifier parser.slot_fillers = slot_fillers return parser