def test_handles_pipeline_with_non_existing_component(component_builder): _config = utilities.base_test_conf("pretrained_embeddings_spacy") _config.pipeline.append({"name": "my_made_up_component"}) with pytest.raises(Exception) as execinfo: train(_config, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert "Failed to find component" in str(execinfo.value)
def test_train_model_empty_pipeline(component_builder): # Should return an empty pipeline _config = utilities.base_test_conf(pipeline_template=None) with pytest.raises(ValueError): train.do_train( _config, data=DEFAULT_DATA_PATH, component_builder=component_builder)
def test_train_model(pipeline_template, component_builder): _config = utilities.base_test_conf(pipeline_template) (trained, persisted_path) = utilities.run_train(_config, component_builder) assert trained.pipeline loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_train_named_model(component_builder): _config = utilities.base_test_conf("keyword") _config['project'] = "my_keyword_model" (trained, persisted_path) = utilities.run_train(_config, component_builder) assert trained.pipeline normalized_path = os.path.dirname(os.path.normpath(persisted_path)) # should be saved in a dir named after a project assert os.path.basename(normalized_path) == "my_keyword_model"
def test_load_and_persist_without_train(component_builder): _config = utilities.base_test_conf("all_components") trainer = Trainer(_config, component_builder) persistor = create_persistor(_config) persisted_path = trainer.persist(_config['path'], persistor, project_name=_config['name']) loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_train_model_noents(component_builder): _config = utilities.base_test_conf("all_components") _config['data'] = "./data/test/demo-rasa-noents.json" (trained, persisted_path) = utilities.run_train(_config, component_builder) assert trained.pipeline loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_duckling_entity_extractor_and_synonyms(component_builder): _config = utilities.base_test_conf("all_components") _config["duckling_dimensions"] = ["number"] duckling = component_builder.create_component("ner_duckling", _config) synonyms = component_builder.create_component("ner_synonyms", _config) message = Message("He was 6 feet away") duckling.process(message) synonyms.process(message) # checks that the synonym processor can handle entities that have int values assert message is not None
def test_handles_pipeline_with_non_existing_component(component_builder): _config = utilities.base_test_conf("spacy_sklearn") _config.pipeline.append({"name": "my_made_up_component"}) with pytest.raises(Exception) as execinfo: train.do_train( _config, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert "Failed to find component" in str(execinfo.value)
def test_train_model_multithread(component_builder): _config = utilities.base_test_conf("all_components") _config['num_threads'] = 2 (trained, persisted_path) = utilities.run_train(_config, component_builder) assert trained.pipeline loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_load_and_persist_without_train(component_builder): _config = utilities.base_test_conf("all_components") trainer = Trainer(_config, component_builder) persistor = create_persistor(_config) persisted_path = trainer.persist(_config['path'], persistor, project_name=_config['project']) loaded = utilities.load_interpreter_for_model(_config, persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_load_and_persist_without_train(component_builder, tmpdir): _config = utilities.base_test_conf("all_components") trainer = Trainer(_config, component_builder) persistor = create_persistor(_config) persisted_path = trainer.persist(tmpdir.strpath, persistor, project_name="my_project") loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_train_named_model(component_builder, tmpdir): _config = utilities.base_test_conf("keyword") (trained, _, persisted_path) = train(_config, path=tmpdir.strpath, project="my_keyword_model", data=DEFAULT_DATA_PATH, component_builder=component_builder) assert trained.pipeline normalized_path = os.path.dirname(os.path.normpath(persisted_path)) # should be saved in a dir named after a project assert os.path.basename(normalized_path) == "my_keyword_model"
def test_train_model(pipeline_template, component_builder, tmpdir): _config = utilities.base_test_conf(pipeline_template) (trained, _, persisted_path) = train(_config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_train_model_noents(component_builder, tmpdir): _config = utilities.base_test_conf("all_components") (trained, _, persisted_path) = train.do_train( _config, path=tmpdir.strpath, data="./data/test/demo-rasa-noents.json", component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_train_named_model(component_builder, tmpdir): _config = utilities.base_test_conf("keyword") (trained, _, persisted_path) = train.do_train( _config, path=tmpdir.strpath, project="my_keyword_model", data=DEFAULT_DATA_PATH, component_builder=component_builder) assert trained.pipeline normalized_path = os.path.dirname(os.path.normpath(persisted_path)) # should be saved in a dir named after a project assert os.path.basename(normalized_path) == "my_keyword_model"
def test_train_model(pipeline_template, component_builder, tmpdir): _config = utilities.base_test_conf(pipeline_template) (trained, _, persisted_path) = train.do_train( _config, path=tmpdir.strpath, data=DEFAULT_DATA_PATH, component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline assert loaded.parse("hello") is not None assert loaded.parse("Hello today is Monday, again!") is not None
def test_duckling_entity_extractor(component_builder): _config = utilities.base_test_conf("all_components") _config["duckling_dimensions"] = ["time"] duckling = component_builder.create_component("ner_duckling", _config) message = Message("Today is the 5th of May. Let us meet tomorrow.") duckling.process(message) entities = message.get("entities") assert len(entities) == 3 # Test duckling with a defined date message = Message("Let us meet tomorrow.", time="1381536182000") # 1381536182000 == 2013/10/12 02:03:02 duckling.process(message) entities = message.get("entities") assert len(entities) == 1 assert entities[0]["text"] == "tomorrow" assert entities[0]["value"] == "2013-10-13T00:00:00.000Z"
def test_unintentional_synonyms_capitalized(component_builder): _config = utilities.base_test_conf("all_components") ner_syn = component_builder.create_component("ner_synonyms", _config) examples = [ Message("Any Mexican restaurant will do", { "intent": "restaurant_search", "entities": [{"start": 4, "end": 11, "value": "Mexican", "entity": "cuisine"}] }), Message("I want Tacos!", { "intent": "restaurant_search", "entities": [{"start": 7, "end": 12, "value": "Mexican", "entity": "cuisine"}] }) ] ner_syn.train(TrainingData(training_examples=examples), _config) assert ner_syn.synonyms.get("mexican") is None assert ner_syn.synonyms.get("tacos") == "Mexican"
def test_interpreter(pipeline_template, component_builder): test_data = "data/examples/rasa/demo-rasa.json" _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = test_data td = training_data.load_data(test_data) interpreter = utilities.interpreter_for(component_builder, _conf) texts = ["good bye", "i am looking for an indian spot"] for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text assert not result['intent']['name'] or result['intent']['name'] in td.intents assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to require the exact entities to be found for entity in result['entities']: assert entity['entity'] in td.entities
def test_samples(pipeline_template, component_builder): _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = "./data/examples/rasa/demo-rasa.json" interpreter = utilities.interpreter_for(component_builder, _conf) available_intents = [ "greet", "restaurant_search", "affirm", "goodbye", "None" ] samples = [("good bye", { 'intent': 'goodbye', 'entities': [] }), ("i am looking for an indian spot", { 'intent': 'restaurant_search', 'entities': [{ "start": 20, "end": 26, "value": "indian", "entity": "cuisine" }, { "end": 26, "entity": "NORP", "start": 20, "value": "indian" }] })] for text, gold in samples: result = interpreter.parse(text, time=None) assert result['text'] == text, \ "Wrong text for sample '{}'".format(text) assert result['intent']['name'] in available_intents, \ "Wrong intent for sample '{}'".format(text) assert result['intent']['confidence'] >= 0, \ "Low confidence for sample '{}'".format(text) # This ensures the model doesn't detect entities that are not present # Models on our test data set are not stable enough to require the entities to be found for entity in result['entities']: del entity["extractor"] assert entity in gold['entities'], \ "Wrong entities for sample '{}'".format(text)
def test_samples(pipeline_template, component_builder): _conf = utilities.base_test_conf(pipeline_template) _conf["data"] = "./data/examples/rasa/demo-rasa.json" interpreter = utilities.interpreter_for(component_builder, _conf) available_intents = ["greet", "restaurant_search", "affirm", "goodbye", "None"] samples = [ ( "good bye", { 'intent': 'goodbye', 'entities': [] } ), ( "i am looking for an indian spot", { 'intent': 'restaurant_search', 'entities': [{"start": 20, "end": 26, "value": "indian", "entity": "cuisine"}] } ) ] for text, gold in samples: result = interpreter.parse(text, time=None) assert result['text'] == text, \ "Wrong text for sample '{}'".format(text) assert result['intent']['name'] in available_intents, \ "Wrong intent for sample '{}'".format(text) assert result['intent']['confidence'] >= 0, \ "Low confidence for sample '{}'".format(text) # This ensures the model doesn't detect entities that are not present # Models on our test data set are not stable enough to require the entities to be found for entity in result['entities']: del entity["extractor"] assert entity in gold['entities'], \ "Wrong entities for sample '{}'".format(text)
def test_random_seed(component_builder, tmpdir): """test if train result is the same for two runs of tf embedding""" _config = utilities.base_test_conf("supervised_embeddings") # set fixed random seed to 1 _config.set_component_attr(5, random_seed=1) # first run (trained_a, _, persisted_path_a) = train(_config, path=tmpdir.strpath + "_a", data=DEFAULT_DATA_PATH, component_builder=component_builder) # second run (trained_b, _, persisted_path_b) = train(_config, path=tmpdir.strpath + "_b", data=DEFAULT_DATA_PATH, component_builder=component_builder) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b
def test_random_seed(component_builder, tmpdir): """test if train result is the same for two runs of tf embedding""" _config = utilities.base_test_conf("supervised_embeddings") # set fixed random seed to 1 _config.set_component_attr(5, random_seed=1) # first run (trained_a, _, persisted_path_a) = train( _config, path=tmpdir.strpath + "_a", data=DEFAULT_DATA_PATH, component_builder=component_builder) # second run (trained_b, _, persisted_path_b) = train( _config, path=tmpdir.strpath + "_b", data=DEFAULT_DATA_PATH, component_builder=component_builder) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] result_b = loaded_b.parse("hello")["intent"]["confidence"] assert result_a == result_b
def duckling_interpreter(component_builder): _conf = utilities.base_test_conf("") _conf["pipeline"] = ["ner_duckling"] _conf["data"] = "./data/examples/rasa/demo-rasa.json" return utilities.interpreter_for(component_builder, _conf)
def test_train_model_empty_pipeline(component_builder): _config = utilities.base_test_conf(pipeline_template=None) # Should return an empty pipeline with pytest.raises(ValueError): utilities.run_train(_config, component_builder)
def test_handles_pipeline_with_non_existing_component(component_builder): _config = utilities.base_test_conf("spacy_sklearn") _config['pipeline'].append("my_made_up_component") with pytest.raises(Exception) as execinfo: utilities.run_train(_config, component_builder) assert "Failed to find component" in str(execinfo.value)
def test_train_model_empty_pipeline(component_builder): _config = utilities.base_test_conf( pipeline_template=None) # Should return an empty pipeline with pytest.raises(ValueError): utilities.run_train(_config, component_builder)