def test_should_not_build_custom_parser_when_provided(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json resources = load_resources("en") custom_entity_parser = CustomEntityParser.build( dataset, CustomEntityParserUsage.WITH_AND_WITHOUT_STEMS, resources) # When with patch("snips_nlu.entity_parser.custom_entity_parser" ".CustomEntityParser.build") as mocked_build_parser: engine = SnipsNLUEngine( custom_entity_parser=custom_entity_parser) engine.fit(dataset) # Then mocked_build_parser.assert_not_called()
def test_should_not_load_resources_when_provided( self, mocked_load_resources): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json resources = load_resources("en") # When engine = SnipsNLUEngine(resources=resources) engine.fit(dataset) # Then mocked_load_resources.assert_not_called()
def test_should_not_build_builtin_parser_when_provided(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json dataset = validate_and_format_dataset(dataset) builtin_entity_parser = BuiltinEntityParser.build(language="en") # When with patch("snips_nlu.entity_parser.builtin_entity_parser" ".BuiltinEntityParser.build") as mocked_build_parser: engine = SnipsNLUEngine( builtin_entity_parser=builtin_entity_parser) engine.fit(dataset) # Then mocked_build_parser.assert_not_called()
def test_training_should_be_reproducible(self): # Given random_state = 42 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a hot cup of tea - make me five tea cups --- type: intent name: MakeCoffee utterances: - make me one cup of coffee please - brew two cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When engine1 = SnipsNLUEngine(random_state=random_state) engine1.fit(dataset) engine2 = SnipsNLUEngine(random_state=random_state) engine2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_engine1 = tmp_dir / "engine1" dir_engine2 = tmp_dir / "engine2" engine1.persist(dir_engine1) engine2.persist(dir_engine2) hash1 = dirhash(str(dir_engine1), 'sha256') hash2 = dirhash(str(dir_engine2), 'sha256') self.assertEqual(hash1, hash2)
def test_should_fit_and_parse_empty_intent(self): # Given dataset = { "intents": { "dummy_intent": { "utterances": [ { "data": [ { "text": " " } ] } ] } }, "language": "en", "entities": dict() } engine = SnipsNLUEngine(resources=self.get_resources("en")) # When / Then engine.fit(dataset) engine.parse("ya", intents=["dummy_intent"])
def test_should_retrain_only_non_trained_subunits(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json @IntentParser.register("test_intent_parser", True) class TestIntentParser(MockIntentParser): def __init__(self, config=None, **shared): super(TestIntentParser, self).__init__(config, **shared) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] nlu_engine_config = NLUEngineConfig(["test_intent_parser"]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser() intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(dataset, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json text = "please brew me a cup of coffee" for language in get_all_languages(): dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): res = engine.parse(text) self.assertEqual("MakeCoffee", res[RES_INTENT][RES_INTENT_NAME])
def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given text = "brew me an espresso" for language in get_all_languages(): dataset = deepcopy(BEVERAGE_DATASET) dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): engine.parse(text)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_retrain_only_non_trained_subunits(self): # Given class TestIntentParserConfig(ProcessingUnitConfig): unit_name = "test_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParserConfig() class TestIntentParser(IntentParser): unit_name = "test_intent_parser" config_type = TestIntentParserConfig def __init__(self, config): super(TestIntentParser, self).__init__(config) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser(conf) register_processing_unit(TestIntentParser) intent_parser_config = TestIntentParserConfig() nlu_engine_config = NLUEngineConfig([intent_parser_config]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser(intent_parser_config) intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(SAMPLE_DATASET, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_should_retrain_only_non_trained_subunits(self): # Given class TestIntentParserConfig(ProcessingUnitConfig): unit_name = "test_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParserConfig() def get_required_resources(self): return None class TestIntentParser(IntentParser): unit_name = "test_intent_parser" config_type = TestIntentParserConfig def __init__(self, config): super(TestIntentParser, self).__init__(config) self.sub_unit_1 = dict(fitted=False, calls=0) self.sub_unit_2 = dict(fitted=False, calls=0) def fit(self, dataset, force_retrain): if force_retrain: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 else: if not self.sub_unit_1["fitted"]: self.sub_unit_1["fitted"] = True self.sub_unit_1["calls"] += 1 if not self.sub_unit_2["fitted"]: self.sub_unit_2["fitted"] = True self.sub_unit_2["calls"] += 1 return self @property def fitted(self): return self.sub_unit_1["fitted"] and \ self.sub_unit_2["fitted"] def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(TestIntentParser) intent_parser_config = TestIntentParserConfig() nlu_engine_config = NLUEngineConfig([intent_parser_config]) nlu_engine = SnipsNLUEngine(nlu_engine_config) intent_parser = TestIntentParser(intent_parser_config) intent_parser.sub_unit_1.update(dict(fitted=True, calls=0)) nlu_engine.intent_parsers.append(intent_parser) # When nlu_engine.fit(SAMPLE_DATASET, force_retrain=False) # Then self.assertDictEqual(dict(fitted=True, calls=0), intent_parser.sub_unit_1) self.assertDictEqual(dict(fitted=True, calls=1), intent_parser.sub_unit_2)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)