Пример #1
0
    async def train(request):
        # if set will not generate a model name but use the passed one
        model_name = request.args.get("model", None)

        try:
            model_config, data_dict = extract_data_and_config(request)
        except Exception as e:
            logger.debug(traceback.format_exc())
            raise ErrorResponse(
                500,
                "ServerError",
                "An unexpected error occurred.",
                details={"error": str(e)},
            )

        data_file = dump_to_data_file(data_dict)
        config_file = dump_to_data_file(model_config, "_config")

        try:
            path_to_model = await data_router.start_train_process(
                data_file, RasaNLUModelConfig(model_config), model_name)

            # store trained model as tar.gz file
            output_path = create_model_path(model_name, path_to_model)

            nlu_data = data.get_nlu_directory(data_file)
            new_fingerprint = model.model_fingerprint(config_file,
                                                      nlu_data=nlu_data)
            model.create_package_rasa(path_to_model, output_path,
                                      new_fingerprint)
            logger.info("Rasa NLU model trained and persisted to '{}'.".format(
                output_path))

            await data_router.load_model(output_path)

            return await response.file(output_path)
        except MaxWorkerProcessError as e:
            raise ErrorResponse(
                403,
                "NoFreeProcess",
                "No process available for training.",
                details={"error": str(e)},
            )
        except InvalidModelError as e:
            raise ErrorResponse(
                404,
                "ModelNotFound",
                "Model '{}' not found.".format(model_name),
                details={"error": str(e)},
            )
        except TrainingException as e:
            logger.debug(traceback.format_exc())
            raise ErrorResponse(
                500,
                "ServerError",
                "An unexpected error occurred.",
                details={"error": str(e)},
            )
Пример #2
0
def test_count_vectors_featurizer_train():
    featurizer = CountVectorsFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today ?"
    message = Message(data={TEXT: sentence})
    message.set(RESPONSE, sentence)
    message.set(INTENT, "intent")
    WhitespaceTokenizer().train(TrainingData([message]))

    featurizer.train(TrainingData([message]), RasaNLUModelConfig())

    expected = np.array([0, 1, 0, 0, 0])
    expected_cls = np.array([1, 1, 1, 1, 1])

    seq_vec, sen_vec = message.get_sparse_features(TEXT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert (5, 5) == seq_vec.shape
    assert (1, 5) == sen_vec.shape
    assert np.all(seq_vec.toarray()[0] == expected)
    assert np.all(sen_vec.toarray()[-1] == expected_cls)

    seq_vec, sen_vec = message.get_sparse_features(RESPONSE, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert (5, 5) == seq_vec.shape
    assert (1, 5) == sen_vec.shape
    assert np.all(seq_vec.toarray()[0] == expected)
    assert np.all(sen_vec.toarray()[-1] == expected_cls)

    seq_vec, sen_vec = message.get_sparse_features(INTENT, [])
    if seq_vec:
        seq_vec = seq_vec.features
    if sen_vec:
        sen_vec = sen_vec.features

    assert sen_vec is None
    assert (1, 1) == seq_vec.shape
    assert np.all(seq_vec.toarray()[0] == np.array([1]))
Пример #3
0
def duckling_interpreter(component_builder, tmpdir_factory):
    conf = RasaNLUModelConfig(
        {"pipeline": [{"name": "DucklingHTTPExtractor"}]}
    )
    return utilities.interpreter_for(
        component_builder,
        data="./data/examples/rasa/demo-rasa.json",
        path=tmpdir_factory.mktemp("projects").strpath,
        config=conf)
Пример #4
0
def test_spacy_featurizer(sentence, spacy_nlp):

    ftr = SpacyFeaturizer.create({}, RasaNLUModelConfig())

    doc = spacy_nlp(sentence)
    vecs = ftr._features_for_doc(doc)
    expected = [t.vector for t in doc]

    assert np.allclose(vecs, expected, atol=1e-5)
Пример #5
0
    def create_component_from_class(self, component_class: Type[C],
                                    **cfg: Any) -> C:
        """Create a component based on a class and a configuration.

        Mainly used to make use of caching when instantiating component classes."""

        component_config = {"name": component_class.name}

        return self.create_component(component_config, RasaNLUModelConfig(cfg))
Пример #6
0
def test_run_cv_evaluation_with_response_selector():
    training_data_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa.md")
    training_data_responses_obj = training_data.load_data(
        "data/examples/rasa/demo-rasa-responses.md")
    training_data_obj = training_data_obj.merge(training_data_responses_obj)
    training_data_obj.fill_response_phrases()

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
            {
                "name": "ResponseSelector",
                EPOCHS: 2
            },
        ],
    })

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        training_data_obj,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert len(response_selection_results.train["Accuracy"]) == n_folds
    assert len(response_selection_results.train["Precision"]) == n_folds
    assert len(response_selection_results.train["F1-score"]) == n_folds
    assert len(response_selection_results.test["Accuracy"]) == n_folds
    assert len(response_selection_results.test["Precision"]) == n_folds
    assert len(response_selection_results.test["F1-score"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.train["DIETClassifier"]["F1-score"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["Accuracy"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["Precision"]) == n_folds
    assert len(entity_results.test["DIETClassifier"]["F1-score"]) == n_folds
Пример #7
0
def test_load_and_persist_without_train(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    trainer = Trainer(_config, component_builder)
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor)
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #8
0
def test_whitespace_language_suuport(language, error, component_builder):
    config = RasaNLUModelConfig(
        {"language": language, "pipeline": [{"name": "WhitespaceTokenizer"}]}
    )

    if error:
        with pytest.raises(UnsupportedLanguageError):
            component_builder.create_component({"name": "WhitespaceTokenizer"}, config)
    else:
        component_builder.create_component({"name": "WhitespaceTokenizer"}, config)
Пример #9
0
def trained_nlu_model():
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)
    model_path = trainer.persist("test_models",
                                 project_name="test_model_keyword")

    return model_path
Пример #10
0
def test_load_and_persist_without_train(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})

    trainer = Trainer(_config, component_builder)
    persisted_path = trainer.persist(tmpdir.strpath)

    loaded = Interpreter.load(persisted_path, component_builder)

    assert loaded.pipeline
    assert loaded.parse("Rasa is great!") is not None
Пример #11
0
def test_spacy_featurizer(sentence, spacy_nlp):
    from rasa.nlu.featurizers.dense_featurizer.spacy_featurizer import SpacyFeaturizer

    ftr = SpacyFeaturizer.create({"return_sequence": True},
                                 RasaNLUModelConfig())

    doc = spacy_nlp(sentence)
    vecs = ftr._features_for_doc(doc)
    expected = [t.vector for t in doc]
    assert np.allclose(vecs, expected, atol=1e-5)
Пример #12
0
def test_mitie_featurizer(mitie_feature_extractor, default_config):
    from rasa.nlu.featurizers.mitie_featurizer import MitieFeaturizer

    mitie_component_config = {"name": "MitieFeaturizer"}
    ftr = MitieFeaturizer.create(mitie_component_config, RasaNLUModelConfig())
    sentence = "Hey how are you today"
    tokens = MitieTokenizer().tokenize(sentence)
    vecs = ftr.features_for_tokens(tokens, mitie_feature_extractor)
    expected = np.array([0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])
    assert np.allclose(vecs[:5], expected, atol=1e-5)
Пример #13
0
def test_train_with_empty_data(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    trainer = Trainer(_config, component_builder)
    trainer.train(TrainingData())
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor,
                                     project_name="my_project")
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #14
0
def test_run_cv_evaluation(
        pretrained_embeddings_spacy_config: RasaNLUModelConfig,
        monkeypatch: MonkeyPatch):
    td = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.json")

    nlu_config = RasaNLUModelConfig({
        "language":
        "en",
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                EPOCHS: 2
            },
        ],
    })

    # mock training
    trainer = Trainer(nlu_config)
    trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
    mock = Mock(return_value=Interpreter(trainer.pipeline, None))
    monkeypatch.setattr(Trainer, "train", mock)

    n_folds = 2
    intent_results, entity_results, response_selection_results = cross_validate(
        td,
        n_folds,
        nlu_config,
        successes=False,
        errors=False,
        disable_plotting=True,
        report_as_dict=True,
    )

    assert len(intent_results.train["Accuracy"]) == n_folds
    assert len(intent_results.train["Precision"]) == n_folds
    assert len(intent_results.train["F1-score"]) == n_folds
    assert len(intent_results.test["Accuracy"]) == n_folds
    assert len(intent_results.test["Precision"]) == n_folds
    assert len(intent_results.test["F1-score"]) == n_folds
    assert all(key in intent_results.evaluation
               for key in ["errors", "report"])
    assert any(
        isinstance(intent_report, dict)
        and intent_report.get("confused_with") is not None
        for intent_report in intent_results.evaluation["report"].values())
    for extractor_evaluation in entity_results.evaluation.values():
        assert all(key in extractor_evaluation for key in ["errors", "report"])
Пример #15
0
def pretrained_embeddings_convert_config() -> RasaNLUModelConfig:
    return RasaNLUModelConfig(
        {
            "language": "en",
            "pipeline": [
                {"name": "WhitespaceTokenizer"},
                {"name": "ConveRTFeaturizer"},
                {"name": "DIETClassifier", EPOCHS: 1, RANDOM_SEED: 42},
            ],
        }
    )
Пример #16
0
def test_convert_featurizer_train(monkeypatch: MonkeyPatch):
    tokenizer = WhitespaceTokenizer()

    monkeypatch.setattr(ConveRTFeaturizer, "_get_validated_model_url",
                        lambda x: RESTRICTED_ACCESS_URL)
    component_config = {
        "name": "ConveRTFeaturizer",
        "model_url": RESTRICTED_ACCESS_URL
    }
    featurizer = ConveRTFeaturizer(component_config)

    sentence = "Hey how are you today ?"
    message = Message(data={TEXT: sentence})
    message.set(RESPONSE, sentence)

    td = TrainingData([message])
    tokenizer.train(td)

    tokens = featurizer.tokenize(message, attribute=TEXT)

    message.set(TOKENS_NAMES[TEXT], tokens)
    message.set(TOKENS_NAMES[RESPONSE], tokens)

    featurizer.train(TrainingData([message]),
                     RasaNLUModelConfig(),
                     tf_hub_module=featurizer.module)

    expected = np.array(
        [2.2636216, -0.26475656, -1.1358104, -0.49751878, -1.3946456])
    expected_cls = np.array(
        [1.0251294, -0.04053932, -0.7018805, -0.82054937, -0.75054353])

    seq_vecs, sent_vecs = message.get_dense_features(TEXT, [])

    seq_vecs = seq_vecs.features
    sent_vecs = sent_vecs.features

    assert len(tokens) == len(seq_vecs)
    assert np.allclose(seq_vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(sent_vecs[-1][:5], expected_cls, atol=1e-5)

    seq_vecs, sent_vecs = message.get_dense_features(RESPONSE, [])

    seq_vecs = seq_vecs.features
    sent_vecs = sent_vecs.features

    assert len(tokens) == len(seq_vecs)
    assert np.allclose(seq_vecs[0][:5], expected, atol=1e-5)
    assert np.allclose(sent_vecs[-1][:5], expected_cls, atol=1e-5)

    seq_vecs, sent_vecs = message.get_dense_features(INTENT, [])

    assert seq_vecs is None
    assert sent_vecs is None
Пример #17
0
def test_train_model_noents(language, pipeline, component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    (trained, _, persisted_path) = train(
        _config,
        path=tmpdir.strpath,
        data="./data/test/demo-rasa-noents.json",
        component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #18
0
async def test_validate_component_keys_raises_warning_on_invalid_key(tmp_path: Path,):
    _config = RasaNLUModelConfig(
        # config with a component that does not have a `confidence_threshold ` property
        {"pipeline": [{"name": "WhitespaceTokenizer", "confidence_threshold": 0.7}]}
    )

    with pytest.warns(UserWarning) as record:
        await train(
            _config, data=DEFAULT_DATA_PATH, path=str(tmp_path),
        )

    assert "You have provided an invalid key" in record[0].message.args[0]
Пример #19
0
async def test_train_model_checkpointing(
    component_builder: ComponentBuilder, tmpdir: Path
):
    from pathlib import Path

    model_name = "rs-checkpointed-model"
    best_model_file = Path(str(tmpdir), model_name)
    assert not best_model_file.exists()

    _config = RasaNLUModelConfig(
        {
            "pipeline": [
                {"name": "WhitespaceTokenizer"},
                {
                    "name": "CountVectorsFeaturizer",
                    "analyzer": "char_wb",
                    "min_ngram": 3,
                    "max_ngram": 17,
                    "max_features": 10,
                    "min_df": 5,
                },
                {
                    "name": "ResponseSelector",
                    EPOCHS: 5,
                    MODEL_CONFIDENCE: "linear_norm",
                    CONSTRAIN_SIMILARITIES: True,
                    CHECKPOINT_MODEL: True,
                },
            ],
            "language": "en",
        }
    )

    await rasa.nlu.train.train(
        _config,
        path=str(tmpdir),
        data="data/test_selectors",
        component_builder=component_builder,
        fixed_model_name=model_name,
    )

    assert best_model_file.exists()

    """
    Tricky to validate the *exact* number of files that should be there, however there
    must be at least the following:
        - metadata.json
        - checkpoint
        - component_1_CountVectorsFeaturizer (as per the pipeline above)
        - component_2_ResponseSelector files (more than 1 file)
    """
    all_files = list(best_model_file.rglob("*.*"))
    assert len(all_files) > 4
Пример #20
0
async def test_validate_requirements_raises_exception_on_component_without_name(
    tmp_path: Path,
):
    _config = RasaNLUModelConfig(
        # config with a component that does not have a `name` property
        {"pipeline": [{"parameter": 4}]}
    )

    with pytest.raises(InvalidConfigException):
        await train(
            _config, data=DEFAULT_DATA_PATH, path=str(tmp_path),
        )
Пример #21
0
    async def train(self):
        """Train the engine.
        """
        nltk.download('punkt')
        lang = self.config['language']
        if not os.path.exists('data/' + self.config['skill-id']):
            _LOGGER.info("Starting Skill training.")
            _LOGGER.info("Generating stories.")
            data, domain_data, stories = await GenerateStories.run(
                self.config['skill-id'], self.config['language'], self.asm)
            training_data = TrainingData(training_examples=data)
            nlu_config = RasaNLUModelConfig({
                "language": lang,
                "pipeline": self.config['pipeline'],
                "data": None
            })

            trainer = Trainer(nlu_config, None, True)
            _LOGGER.info("Training Arcus NLU")
            trainer.train(training_data)
            trainer.persist("data/" + self.config['skill-id'], None, 'nlu')

            # Rasa core
            domain = Domain.from_dict(domain_data)

            reader = StoryFileReader(domain, RegexInterpreter(), None, False)
            story_steps = await reader.process_lines(stories)
            graph = StoryGraph(story_steps)

            g = TrainingDataGenerator(
                graph,
                domain,
                remove_duplicates=True,
                unique_last_num_states=None,
                augmentation_factor=20,
                tracker_limit=None,
                use_story_concatenation=True,
                debug_plots=False,
            )

            training_trackers = g.generate()
            policy_list = SimplePolicyEnsemble.from_dict(
                {"policies": self.config['policies']})
            policy_ensemble = SimplePolicyEnsemble(policy_list)

            _LOGGER.info("Training Arcus Core")
            policy_ensemble.train(training_trackers, domain)
            policy_ensemble.persist(
                "data/" + self.config['skill-id'] + "/core", False)
            domain.persist("data/" + self.config['skill-id'] + "/core/model")
            domain.persist_specification("data/" + self.config['skill-id'] +
                                         "/core")
Пример #22
0
def test_train_selector(pipeline, component_builder, tmpdir):
    # use data that include some responses
    training_data = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa.md"
    )
    training_data_responses = rasa.shared.nlu.training_data.loading.load_data(
        "data/examples/rasa/demo-rasa-responses.md"
    )
    training_data = training_data.merge(training_data_responses)

    nlu_config = RasaNLUModelConfig({"language": "en", "pipeline": pipeline})

    trainer = Trainer(nlu_config)
    trainer.train(training_data)

    persisted_path = trainer.persist(tmpdir)

    assert trainer.pipeline

    loaded = Interpreter.load(persisted_path, component_builder)
    parsed = loaded.parse("hello")

    assert loaded.pipeline
    assert parsed is not None
    assert (parsed.get("response_selector").get("all_retrieval_intents")) == [
        "chitchat"
    ]
    assert (
        parsed.get("response_selector")
        .get("default")
        .get("response")
        .get("intent_response_key")
    ) is not None
    assert (
        parsed.get("response_selector")
        .get("default")
        .get("response")
        .get("template_name")
    ) is not None
    assert (
        parsed.get("response_selector")
        .get("default")
        .get("response")
        .get("response_templates")
    ) is not None

    ranking = parsed.get("response_selector").get("default").get("ranking")
    assert ranking is not None

    for rank in ranking:
        assert rank.get("confidence") is not None
        assert rank.get("intent_response_key") is not None
Пример #23
0
    def validate_rasa_config(config: Dict):
        """
        validates bot config.yml content for invalid entries
        :param config: configuration
        :return: None
        """
        rasa_config = RasaNLUModelConfig(config)
        component_builder = ComponentBuilder()
        for i in range(len(rasa_config.pipeline)):
            component_cfg = rasa_config.for_component(i)
            component_builder.create_component(component_cfg, rasa_config)

        configuration.load(config)
Пример #24
0
def test_mitie_featurizer_train(mitie_feature_extractor):

    featurizer = MitieFeaturizer.create({}, RasaNLUModelConfig())

    sentence = "Hey how are you today"
    message = Message(sentence)
    message.set(RESPONSE, sentence)
    message.set(INTENT, "intent")
    MitieTokenizer().train(TrainingData([message]))

    featurizer.train(
        TrainingData([message]),
        RasaNLUModelConfig(),
        **{"mitie_feature_extractor": mitie_feature_extractor},
    )

    expected = np.array([
        0.00000000e00, -5.12735510e00, 4.39929873e-01, -5.60760403e00,
        -8.26445103e00
    ])
    expected_cls = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])

    seq_vec, sen_vec = message.get_dense_features(TEXT, [])

    assert len(message.get(TOKENS_NAMES[TEXT])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(RESPONSE, [])

    assert len(message.get(TOKENS_NAMES[RESPONSE])) == len(seq_vec)
    assert np.allclose(seq_vec[0][:5], expected, atol=1e-5)
    assert np.allclose(sen_vec[-1][:5], expected_cls, atol=1e-5)

    seq_vec, sen_vec = message.get_dense_features(INTENT, [])

    assert seq_vec is None
    assert sen_vec is None
Пример #25
0
def test_train_model_on_test_pipelines(language, pipeline,
                                       component_builder, tmpdir):
    _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language})
    (trained, _, persisted_path) = train(
        _config,
        path=tmpdir.strpath,
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #26
0
async def test_eval_data(component_builder, tmpdir, project):
    _config = RasaNLUModelConfig({
        "pipeline": [
            {
                "name": "WhitespaceTokenizer"
            },
            {
                "name": "CountVectorsFeaturizer"
            },
            {
                "name": "DIETClassifier",
                "epochs": 2
            },
            {
                "name": "ResponseSelector",
                "epochs": 2
            },
        ],
        "language":
        "en",
    })

    config_path = os.path.join(project, "config.yml")
    data_importer = TrainingDataImporter.load_nlu_importer_from_config(
        config_path,
        training_data_paths=[
            "data/examples/rasa/demo-rasa.md",
            "data/examples/rasa/demo-rasa-responses.md",
        ],
    )

    (_, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=data_importer,
        component_builder=component_builder,
        persist_nlu_training_data=True,
    )

    interpreter = Interpreter.load(persisted_path, component_builder)

    data = await data_importer.get_nlu_data()
    (
        intent_results,
        response_selection_results,
        entity_results,
    ) = get_eval_data(interpreter, data)

    assert len(intent_results) == 46
    assert len(response_selection_results) == 46
    assert len(entity_results) == 46
Пример #27
0
def test_warn_of_competing_extractors(
    pipeline_template: List[Dict[Text, Text]], should_warn: bool
):
    config = RasaNLUModelConfig({"pipeline": pipeline_template})
    trainer = Trainer(config)

    if should_warn:
        with pytest.warns(UserWarning):
            rasa.nlu.components.warn_of_competing_extractors(trainer.pipeline)
    else:
        with pytest.warns(None) as records:
            rasa.nlu.components.warn_of_competing_extractors(trainer.pipeline)

        assert len(records) == 0
Пример #28
0
def train():
    td = load_data("{}/demo_rasa.json".format(prj_dir))
    _config = RasaNLUModelConfig(load_json("{}/config.json".format(prj_dir)))
    trainer = Trainer(_config)
    trainer.train(td)
    persisted_path = trainer.persist("{}/models".format(prj_dir))
    loaded = Interpreter.load(persisted_path)
    assert loaded.pipeline

    # Inference
    result = loaded.parse("i'm looking for a place in the north of town")
    result = loaded.parse("show me chinese restaurants")
    result = dict(filter(lambda item: item[0] not in ["intent_ranking"], result.items()))
    show_dict(result)
Пример #29
0
async def test_validate_requirements_raises_exception_on_component_without_name(
    tmp_path: Path, ):
    _config = RasaNLUModelConfig(
        # config with a component that does not have a `name` property
        {"pipeline": [{
            "parameter": 4
        }]})

    with pytest.raises(InvalidConfigError):
        await train(
            _config,
            data="./data/examples/rasa/demo-rasa.json",
            path=str(tmp_path),
        )
Пример #30
0
def test_mitie_featurizer_no_sequence(mitie_feature_extractor, default_config):
    from rasa.nlu.featurizers.dense_featurizer.mitie_featurizer import MitieFeaturizer

    component_config = {"name": "MitieFeaturizer", "return_sequence": False}
    featurizer = MitieFeaturizer.create(component_config, RasaNLUModelConfig())

    sentence = f"Hey how are you today {CLS_TOKEN}"
    tokens = MitieTokenizer().tokenize(sentence)

    vecs = featurizer.features_for_tokens(tokens, mitie_feature_extractor)[0]

    expected = np.array(
        [0.0, -4.4551446, 0.26073121, -1.46632245, -1.84205751])
    assert np.allclose(vecs[:5], expected, atol=1e-5)