Пример #1
0
def test_handles_pipeline_with_non_existing_component(component_builder):
    _config = utilities.base_test_conf("pretrained_embeddings_spacy")
    _config.pipeline.append({"name": "my_made_up_component"})
    with pytest.raises(Exception) as execinfo:
        train(_config, data=DEFAULT_DATA_PATH,
              component_builder=component_builder)
    assert "Failed to find component" in str(execinfo.value)
Пример #2
0
def test_train_model_empty_pipeline(component_builder):
    # Should return an empty pipeline
    _config = utilities.base_test_conf(pipeline_template=None)
    with pytest.raises(ValueError):
        train.do_train(
                _config,
                data=DEFAULT_DATA_PATH,
                component_builder=component_builder)
Пример #3
0
def test_train_model(pipeline_template, component_builder):
    _config = utilities.base_test_conf(pipeline_template)
    (trained, persisted_path) = utilities.run_train(_config, component_builder)
    assert trained.pipeline
    loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #4
0
def test_train_named_model(component_builder):
    _config = utilities.base_test_conf("keyword")
    _config['project'] = "my_keyword_model"
    (trained, persisted_path) = utilities.run_train(_config, component_builder)
    assert trained.pipeline
    normalized_path = os.path.dirname(os.path.normpath(persisted_path))
    # should be saved in a dir named after a project
    assert os.path.basename(normalized_path) == "my_keyword_model"
Пример #5
0
def test_load_and_persist_without_train(component_builder):
    _config = utilities.base_test_conf("all_components")
    trainer = Trainer(_config, component_builder)
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(_config['path'], persistor, project_name=_config['name'])
    loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #6
0
def test_train_model_noents(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config['data'] = "./data/test/demo-rasa-noents.json"
    (trained, persisted_path) = utilities.run_train(_config, component_builder)
    assert trained.pipeline
    loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
def test_duckling_entity_extractor_and_synonyms(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config["duckling_dimensions"] = ["number"]
    duckling = component_builder.create_component("ner_duckling", _config)
    synonyms = component_builder.create_component("ner_synonyms", _config)
    message = Message("He was 6 feet away")
    duckling.process(message)
    synonyms.process(message)  # checks that the synonym processor can handle entities that have int values
    assert message is not None
Пример #8
0
def test_duckling_entity_extractor_and_synonyms(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config["duckling_dimensions"] = ["number"]
    duckling = component_builder.create_component("ner_duckling", _config)
    synonyms = component_builder.create_component("ner_synonyms", _config)
    message = Message("He was 6 feet away")
    duckling.process(message)
    synonyms.process(message)  # checks that the synonym processor can handle entities that have int values
    assert message is not None
Пример #9
0
def test_handles_pipeline_with_non_existing_component(component_builder):
    _config = utilities.base_test_conf("spacy_sklearn")
    _config.pipeline.append({"name": "my_made_up_component"})
    with pytest.raises(Exception) as execinfo:
        train.do_train(
                _config,
                data=DEFAULT_DATA_PATH,
                component_builder=component_builder)
    assert "Failed to find component" in str(execinfo.value)
Пример #10
0
def test_train_model_multithread(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config['num_threads'] = 2
    (trained, persisted_path) = utilities.run_train(_config, component_builder)
    assert trained.pipeline
    loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #11
0
def test_load_and_persist_without_train(component_builder):
    _config = utilities.base_test_conf("all_components")
    trainer = Trainer(_config, component_builder)
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(_config['path'], persistor,
                                     project_name=_config['project'])
    loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #12
0
def test_load_and_persist_without_train(component_builder, tmpdir):
    _config = utilities.base_test_conf("all_components")
    trainer = Trainer(_config, component_builder)
    persistor = create_persistor(_config)
    persisted_path = trainer.persist(tmpdir.strpath, persistor,
                                     project_name="my_project")
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #13
0
def test_train_named_model(component_builder, tmpdir):
    _config = utilities.base_test_conf("keyword")
    (trained, _, persisted_path) = train(_config,
                                         path=tmpdir.strpath,
                                         project="my_keyword_model",
                                         data=DEFAULT_DATA_PATH,
                                         component_builder=component_builder)
    assert trained.pipeline
    normalized_path = os.path.dirname(os.path.normpath(persisted_path))
    # should be saved in a dir named after a project
    assert os.path.basename(normalized_path) == "my_keyword_model"
Пример #14
0
def test_train_model(pipeline_template, component_builder, tmpdir):
    _config = utilities.base_test_conf(pipeline_template)
    (trained, _, persisted_path) = train(_config,
                                         path=tmpdir.strpath,
                                         data=DEFAULT_DATA_PATH,
                                         component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #15
0
def test_train_model_noents(component_builder, tmpdir):
    _config = utilities.base_test_conf("all_components")
    (trained, _, persisted_path) = train.do_train(
            _config,
            path=tmpdir.strpath,
            data="./data/test/demo-rasa-noents.json",
            component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
Пример #16
0
def test_train_named_model(component_builder, tmpdir):
    _config = utilities.base_test_conf("keyword")
    (trained, _, persisted_path) = train.do_train(
            _config,
            path=tmpdir.strpath,
            project="my_keyword_model",
            data=DEFAULT_DATA_PATH,
            component_builder=component_builder)
    assert trained.pipeline
    normalized_path = os.path.dirname(os.path.normpath(persisted_path))
    # should be saved in a dir named after a project
    assert os.path.basename(normalized_path) == "my_keyword_model"
Пример #17
0
def test_train_model(pipeline_template, component_builder, tmpdir):
    _config = utilities.base_test_conf(pipeline_template)
    (trained, _, persisted_path) = train.do_train(
            _config,
            path=tmpdir.strpath,
            data=DEFAULT_DATA_PATH,
            component_builder=component_builder)
    assert trained.pipeline
    loaded = Interpreter.load(persisted_path, component_builder)
    assert loaded.pipeline
    assert loaded.parse("hello") is not None
    assert loaded.parse("Hello today is Monday, again!") is not None
def test_duckling_entity_extractor(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config["duckling_dimensions"] = ["time"]
    duckling = component_builder.create_component("ner_duckling", _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 3

    # Test duckling with a defined date
    message = Message("Let us meet tomorrow.", time="1381536182000")  # 1381536182000 == 2013/10/12 02:03:02
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"
Пример #19
0
def test_unintentional_synonyms_capitalized(component_builder):
    _config = utilities.base_test_conf("all_components")
    ner_syn = component_builder.create_component("ner_synonyms", _config)
    examples = [
        Message("Any Mexican restaurant will do", {
            "intent": "restaurant_search",
            "entities": [{"start": 4, "end": 11, "value": "Mexican", "entity": "cuisine"}]
        }),
        Message("I want Tacos!", {
            "intent": "restaurant_search",
            "entities": [{"start": 7, "end": 12, "value": "Mexican", "entity": "cuisine"}]
        })
    ]
    ner_syn.train(TrainingData(training_examples=examples), _config)
    assert ner_syn.synonyms.get("mexican") is None
    assert ner_syn.synonyms.get("tacos") == "Mexican"
Пример #20
0
def test_duckling_entity_extractor(component_builder):
    _config = utilities.base_test_conf("all_components")
    _config["duckling_dimensions"] = ["time"]
    duckling = component_builder.create_component("ner_duckling", _config)
    message = Message("Today is the 5th of May. Let us meet tomorrow.")
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 3

    # Test duckling with a defined date
    message = Message("Let us meet tomorrow.", time="1381536182000")  # 1381536182000 == 2013/10/12 02:03:02
    duckling.process(message)
    entities = message.get("entities")
    assert len(entities) == 1
    assert entities[0]["text"] == "tomorrow"
    assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"
Пример #21
0
def test_unintentional_synonyms_capitalized(component_builder):
    _config = utilities.base_test_conf("all_components")
    ner_syn = component_builder.create_component("ner_synonyms", _config)
    examples = [
        Message("Any Mexican restaurant will do", {
            "intent": "restaurant_search",
            "entities": [{"start": 4, "end": 11, "value": "Mexican", "entity": "cuisine"}]
        }),
        Message("I want Tacos!", {
            "intent": "restaurant_search",
            "entities": [{"start": 7, "end": 12, "value": "Mexican", "entity": "cuisine"}]
        })
    ]
    ner_syn.train(TrainingData(training_examples=examples), _config)
    assert ner_syn.synonyms.get("mexican") is None
    assert ner_syn.synonyms.get("tacos") == "Mexican"
Пример #22
0
def test_interpreter(pipeline_template, component_builder):
    test_data = "data/examples/rasa/demo-rasa.json"
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = test_data
    td = training_data.load_data(test_data)
    interpreter = utilities.interpreter_for(component_builder, _conf)

    texts = ["good bye", "i am looking for an indian spot"]

    for text in texts:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text
        assert not result['intent']['name'] or result['intent']['name'] in td.intents
        assert result['intent']['confidence'] >= 0
        # Ensure the model doesn't detect entity types that are not present
        # Models on our test data set are not stable enough to require the exact entities to be found
        for entity in result['entities']:
            assert entity['entity'] in td.entities
Пример #23
0
def test_samples(pipeline_template, component_builder):
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = "./data/examples/rasa/demo-rasa.json"

    interpreter = utilities.interpreter_for(component_builder, _conf)
    available_intents = [
        "greet", "restaurant_search", "affirm", "goodbye", "None"
    ]
    samples = [("good bye", {
        'intent': 'goodbye',
        'entities': []
    }),
               ("i am looking for an indian spot", {
                   'intent':
                   'restaurant_search',
                   'entities': [{
                       "start": 20,
                       "end": 26,
                       "value": "indian",
                       "entity": "cuisine"
                   }, {
                       "end": 26,
                       "entity": "NORP",
                       "start": 20,
                       "value": "indian"
                   }]
               })]

    for text, gold in samples:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text, \
            "Wrong text for sample '{}'".format(text)
        assert result['intent']['name'] in available_intents, \
            "Wrong intent for sample '{}'".format(text)
        assert result['intent']['confidence'] >= 0, \
            "Low confidence for sample '{}'".format(text)

        # This ensures the model doesn't detect entities that are not present
        # Models on our test data set are not stable enough to require the entities to be found
        for entity in result['entities']:
            del entity["extractor"]
            assert entity in gold['entities'], \
                "Wrong entities for sample '{}'".format(text)
Пример #24
0
def test_samples(pipeline_template, component_builder):
    _conf = utilities.base_test_conf(pipeline_template)
    _conf["data"] = "./data/examples/rasa/demo-rasa.json"

    interpreter = utilities.interpreter_for(component_builder, _conf)
    available_intents = ["greet", "restaurant_search", "affirm", "goodbye", "None"]
    samples = [
        (
            "good bye",
            {
                'intent': 'goodbye',
                'entities': []
            }
        ),
        (
            "i am looking for an indian spot",
            {
                'intent': 'restaurant_search',
                'entities': [{"start": 20, "end": 26, "value": "indian", "entity": "cuisine"}]
            }
        )
    ]

    for text, gold in samples:
        result = interpreter.parse(text, time=None)
        assert result['text'] == text, \
            "Wrong text for sample '{}'".format(text)
        assert result['intent']['name'] in available_intents, \
            "Wrong intent for sample '{}'".format(text)
        assert result['intent']['confidence'] >= 0, \
            "Low confidence for sample '{}'".format(text)

        # This ensures the model doesn't detect entities that are not present
        # Models on our test data set are not stable enough to require the entities to be found
        for entity in result['entities']:
            del entity["extractor"]
            assert entity in gold['entities'], \
                "Wrong entities for sample '{}'".format(text)
Пример #25
0
def test_random_seed(component_builder, tmpdir):
    """test if train result is the same for two runs of tf embedding"""

    _config = utilities.base_test_conf("supervised_embeddings")
    # set fixed random seed to 1
    _config.set_component_attr(5, random_seed=1)
    # first run
    (trained_a, _,
     persisted_path_a) = train(_config,
                               path=tmpdir.strpath + "_a",
                               data=DEFAULT_DATA_PATH,
                               component_builder=component_builder)
    # second run
    (trained_b, _,
     persisted_path_b) = train(_config,
                               path=tmpdir.strpath + "_b",
                               data=DEFAULT_DATA_PATH,
                               component_builder=component_builder)
    loaded_a = Interpreter.load(persisted_path_a, component_builder)
    loaded_b = Interpreter.load(persisted_path_b, component_builder)
    result_a = loaded_a.parse("hello")["intent"]["confidence"]
    result_b = loaded_b.parse("hello")["intent"]["confidence"]
    assert result_a == result_b
Пример #26
0
def test_random_seed(component_builder, tmpdir):
    """test if train result is the same for two runs of tf embedding"""

    _config = utilities.base_test_conf("supervised_embeddings")
    # set fixed random seed to 1
    _config.set_component_attr(5, random_seed=1)
    # first run
    (trained_a, _, persisted_path_a) = train(
        _config,
        path=tmpdir.strpath + "_a",
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder)
    # second run
    (trained_b, _, persisted_path_b) = train(
        _config,
        path=tmpdir.strpath + "_b",
        data=DEFAULT_DATA_PATH,
        component_builder=component_builder)
    loaded_a = Interpreter.load(persisted_path_a, component_builder)
    loaded_b = Interpreter.load(persisted_path_b, component_builder)
    result_a = loaded_a.parse("hello")["intent"]["confidence"]
    result_b = loaded_b.parse("hello")["intent"]["confidence"]
    assert result_a == result_b
Пример #27
0
def duckling_interpreter(component_builder):
    _conf = utilities.base_test_conf("")
    _conf["pipeline"] = ["ner_duckling"]
    _conf["data"] = "./data/examples/rasa/demo-rasa.json"
    return utilities.interpreter_for(component_builder, _conf)
Пример #28
0
def test_train_model_empty_pipeline(component_builder):
    _config = utilities.base_test_conf(pipeline_template=None)  # Should return an empty pipeline
    with pytest.raises(ValueError):
        utilities.run_train(_config, component_builder)
def test_handles_pipeline_with_non_existing_component(component_builder):
    _config = utilities.base_test_conf("spacy_sklearn")
    _config['pipeline'].append("my_made_up_component")
    with pytest.raises(Exception) as execinfo:
        utilities.run_train(_config, component_builder)
    assert "Failed to find component" in str(execinfo.value)
def test_train_model_empty_pipeline(component_builder):
    _config = utilities.base_test_conf(
        pipeline_template=None)  # Should return an empty pipeline
    with pytest.raises(ValueError):
        utilities.run_train(_config, component_builder)
Пример #31
0
def test_handles_pipeline_with_non_existing_component(component_builder):
    _config = utilities.base_test_conf("spacy_sklearn")
    _config['pipeline'].append("my_made_up_component")
    with pytest.raises(Exception) as execinfo:
        utilities.run_train(_config, component_builder)
    assert "Failed to find component" in str(execinfo.value)
Пример #32
0
def duckling_interpreter(component_builder):
    _conf = utilities.base_test_conf("")
    _conf["pipeline"] = ["ner_duckling"]
    _conf["data"] = "./data/examples/rasa/demo-rasa.json"
    return utilities.interpreter_for(component_builder, _conf)