Пример #1
0
async def test_train_model_empty_pipeline(component_builder):
    # Should return an empty pipeline
    _config = utilities.base_test_conf(pipeline_template=None)
    with pytest.raises(ValueError):
        await train(
            _config, data=DEFAULT_DATA_PATH, component_builder=component_builder
        )
Пример #2
0
async def test_random_seed(component_builder, tmpdir):
    """test if train result is the same for two runs of tf embedding"""

    _config = utilities.base_test_conf("supervised_embeddings")
    # set fixed random seed of the embedding intent classifier to 1
    _config.set_component_attr(6, random_seed=1)
    # first run
    (trained_a, _, persisted_path_a) = await train(
        _config,
        path=tmpdir.strpath + "_a",
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder,
    )
    # second run
    (trained_b, _, persisted_path_b) = await train(
        _config,
        path=tmpdir.strpath + "_b",
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder,
    )
    loaded_a = Interpreter.load(persisted_path_a, component_builder)
    loaded_b = Interpreter.load(persisted_path_b, component_builder)
    result_a = loaded_a.parse("hello")["intent"]["confidence"]
    result_b = loaded_b.parse("hello")["intent"]["confidence"]
    assert result_a == result_b
Пример #3
0
def test_unintentional_synonyms_capitalized(component_builder):
    _config = utilities.base_test_conf("pretrained_embeddings_spacy")
    ner_syn = component_builder.create_component(_config.for_component(5),
                                                 _config)
    examples = [
        Message(
            "Any Mexican restaurant will do",
            {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 4,
                    "end": 11,
                    "value": "Mexican",
                    "entity": "cuisine"
                }],
            },
        ),
        Message(
            "I want Tacos!",
            {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 7,
                    "end": 12,
                    "value": "Mexican",
                    "entity": "cuisine"
                }],
            },
        ),
    ]
    ner_syn.train(TrainingData(training_examples=examples), _config)
    assert ner_syn.synonyms.get("mexican") is None
    assert ner_syn.synonyms.get("tacos") == "Mexican"
Пример #4
0
def test_handles_pipeline_with_non_existing_component(component_builder):
    _config = utilities.base_test_conf("pretrained_embeddings_spacy")
    _config.pipeline.append({"name": "my_made_up_component"})
    with pytest.raises(Exception) as execinfo:
        train(_config, data=DEFAULT_DATA_PATH,
              component_builder=component_builder)
    assert "Failed to find component" in str(execinfo.value)
Пример #5
0
async def test_interpreter_on_pipeline_templates(pipeline_template,
                                                 component_builder, tmpdir):
    test_data = "data/examples/rasa/demo-rasa.json"

    config = utilities.base_test_conf(pipeline_template)
    config["data"] = test_data

    td = training_data.load_data(test_data)

    interpreter = await utilities.interpreter_for(
        component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath,
        config)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result["text"] == text
        assert not result["intent"]["name"] or result["intent"][
            "name"] in td.intents
        assert result["intent"]["confidence"] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to
        # require the exact entities to be found
        for entity in result["entities"]:
            assert entity["entity"] in td.entities
Пример #6
0
async def test_train_named_model(component_builder, tmpdir):
    _config = utilities.base_test_conf("keyword")
    (trained, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder,
    )
    assert trained.pipeline
    normalized_path = os.path.dirname(os.path.normpath(persisted_path))
    # should be saved in a dir named after a project
    assert normalized_path == tmpdir.strpath
Пример #7
0
def test_train_model(pipeline_template, component_builder, tmpdir):
    _config = utilities.base_test_conf(pipeline_template)
    (trained, _, persisted_path) = train(
        _config,
        path=tmpdir.strpath,
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #8
0
async def test_train_model_training_data_persisted(component_builder, tmpdir):
    _config = utilities.base_test_conf("keyword")
    (trained, _, persisted_path) = await train(
        _config,
        path=tmpdir.strpath,
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder,
        persist_nlu_training_data=True,
    )
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.model_metadata.get("training_data") is not None
Пример #9
0
def test_interpreter(pipeline_template, component_builder, tmpdir):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(
        component_builder, "data/examples/rasa/demo-rasa.json", tmpdir.strpath,
        _conf)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text
        assert (not result['intent']['name']
                or result['intent']['name'] in td.intents)
        assert result['intent']['confidence'] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to
        # require the exact entities to be found
        for entity in result['entities']:
            assert entity['entity'] in td.entities
Пример #10
0
def test_whitespace_training():
    _config = utilities.base_test_conf("supervised_embeddings")

    examples = [
        Message(
            "Any Mexican restaurant will do",
            {
                "intent": "restaurant_search",
                "entities": [
                    {"start": 4, "end": 11, "value": "Mexican", "entity": "cuisine"}
                ],
            },
        ),
        Message(
            "I want Tacos!",
            {
                "intent": "restaurant_search",
                "entities": [
                    {"start": 7, "end": 12, "value": "Mexican", "entity": "cuisine"}
                ],
            },
        ),
    ]

    component_config = {"case_sensitive": False}
    tk = WhitespaceTokenizer(component_config)

    tk.train(TrainingData(training_examples=examples), _config)

    assert examples[0].data.get("tokens")[0].text == "any"
    assert examples[0].data.get("tokens")[1].text == "mexican"
    assert examples[0].data.get("tokens")[2].text == "restaurant"
    assert examples[0].data.get("tokens")[3].text == "will"
    assert examples[0].data.get("tokens")[4].text == "do"
    assert examples[1].data.get("tokens")[0].text == "i"
    assert examples[1].data.get("tokens")[1].text == "want"
    assert examples[1].data.get("tokens")[2].text == "tacos"
Пример #11
0
def test_whitespace_with_case():
    from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer

    component_config = {"case_sensitive": False}
    tk = WhitespaceTokenizer(component_config)
    assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [
        "forecast",
        "for",
        "lunch",
    ]

    component_config = {"case_sensitive": True}
    tk = WhitespaceTokenizer(component_config)
    assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [
        "Forecast",
        "for",
        "LUNCH",
    ]

    component_config = {}
    tk = WhitespaceTokenizer(component_config)
    assert [t.text for t in tk.tokenize("Forecast for LUNCH")] == [
        "Forecast",
        "for",
        "LUNCH",
    ]

    component_config = {"case_sensitive": False}
    tk = WhitespaceTokenizer(component_config)
    message = Message("Forecast for LUNCH")
    tk.process(message)
    assert message.data.get("tokens")[0].text == "forecast"
    assert message.data.get("tokens")[1].text == "for"
    assert message.data.get("tokens")[2].text == "lunch"

    _config = utilities.base_test_conf("supervised_embeddings")
    examples = [
        Message(
            "Any Mexican restaurant will do",
            {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 4,
                    "end": 11,
                    "value": "Mexican",
                    "entity": "cuisine"
                }],
            },
        ),
        Message(
            "I want Tacos!",
            {
                "intent":
                "restaurant_search",
                "entities": [{
                    "start": 7,
                    "end": 12,
                    "value": "Mexican",
                    "entity": "cuisine"
                }],
            },
        ),
    ]

    component_config = {"case_sensitive": False}
    tk = WhitespaceTokenizer(component_config)
    tk.train(TrainingData(training_examples=examples), _config)
    assert examples[0].data.get("tokens")[0].text == "any"
    assert examples[0].data.get("tokens")[1].text == "mexican"
    assert examples[0].data.get("tokens")[2].text == "restaurant"
    assert examples[0].data.get("tokens")[3].text == "will"
    assert examples[0].data.get("tokens")[4].text == "do"
    assert examples[1].data.get("tokens")[0].text == "i"
    assert examples[1].data.get("tokens")[1].text == "want"
    assert examples[1].data.get("tokens")[2].text == "tacos"