def test_should_parse_after_deserialization(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine_dict = engine.to_dict() deserialized_engine = SnipsNLUEngine.from_dict(engine_dict) result = deserialized_engine.parse(input_) # Then msg = "SnipsNLUEngine dict should be json serializable to utf-8" with self.fail_if_exception(msg): json.dumps(engine_dict).encode("utf-8") expected_slots = [ resolved_slot({ START: 8, END: 9 }, '3', { 'kind': 'Number', 'value': 3.0 }, 'snips/number', 'number_of_cups'), custom_slot( unresolved_slot({ START: 18, END: 21 }, 'hot', 'Temperature', 'beverage_temperature')) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea') self.assertListEqual(result[RES_SLOTS], expected_slots)
def resolve_slots(input, slots, dataset_entities, language, scope): builtin_entities = get_builtin_entities(input, language, scope) resolved_slots = [] for slot in slots: entity_name = slot[RES_ENTITY] raw_value = slot[RES_VALUE] if is_builtin_entity(entity_name): found = False for ent in builtin_entities: if ent[ENTITY_KIND] == entity_name and \ ent[RES_MATCH_RANGE] == slot[RES_MATCH_RANGE]: resolved_slot = builtin_slot(slot, ent[ENTITY]) resolved_slots.append(resolved_slot) found = True break if not found: builtin_matches = get_builtin_entities(raw_value, language, scope=[entity_name]) if builtin_matches: resolved_slot = builtin_slot(slot, builtin_matches[0][VALUE]) resolved_slots.append(resolved_slot) else: # custom slot entity = dataset_entities[entity_name] if raw_value in entity[UTTERANCES]: resolved_value = entity[UTTERANCES][raw_value] elif entity[AUTOMATICALLY_EXTENSIBLE]: resolved_value = raw_value else: # entity is skipped resolved_value = None if resolved_value is not None: resolved_slots.append(custom_slot(slot, resolved_value)) return resolved_slots
def test_should_parse_after_deserialization_from_dir(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(input_) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_parse_after_deserialization(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine_dict = engine.to_dict() deserialized_engine = SnipsNLUEngine.from_dict(engine_dict) result = deserialized_engine.parse(input_) # Then msg = "SnipsNLUEngine dict should be json serializable to utf-8" with self.fail_if_exception(msg): json.dumps(engine_dict).encode("utf-8") expected_slots = [ resolved_slot({START: 8, END: 9}, '3', {'kind': 'Number', 'value': 3.0}, 'snips/number', 'number_of_cups'), custom_slot( unresolved_slot({START: 18, END: 21}, 'hot', 'Temperature', 'beverage_temperature')) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea') self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_engine_with_keyword_slot_filler_should_be_serializable(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: SetLightColor utterances: - set the light to [color](blue) in the [room](kitchen) - please make the lights [color](red) in the [room](bathroom)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "SetLightColor" slot_filler_config = { "unit_name": "keyword_slot_filler", "lowercase": True } parser_config = ProbabilisticIntentParserConfig( slot_filler_config=slot_filler_config) engine_config = NLUEngineConfig([parser_config]) engine = SnipsNLUEngine(engine_config).fit(dataset, intent) engine.persist(self.tmp_file_path) text = "I want Red lights in the kitchen now" # When loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path) res = loaded_engine.parse(text) # Then expected_slots = [ custom_slot( unresolved_slot(match_range={ START: 7, END: 10 }, value="Red", entity="color", slot_name="color"), "red"), custom_slot( unresolved_slot(match_range={ START: 25, END: 32 }, value="kitchen", entity="room", slot_name="room")) ] self.assertListEqual(expected_slots, res["slots"])
def test_should_parse_after_deserialization_from_dir(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) text = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(text) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], text) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def resolve_slots(input, slots, dataset_entities, language, scope): # Do not use cached entities here as datetimes must be computed using # current context builtin_entities = get_builtin_entities(input, language, scope, use_cache=False) resolved_slots = [] for slot in slots: entity_name = slot[RES_ENTITY] raw_value = slot[RES_VALUE] if is_builtin_entity(entity_name): found = False for ent in builtin_entities: if ent[ENTITY_KIND] == entity_name and \ ent[RES_MATCH_RANGE] == slot[RES_MATCH_RANGE]: resolved_slot = builtin_slot(slot, ent[ENTITY]) resolved_slots.append(resolved_slot) found = True break if not found: builtin_matches = get_builtin_entities(raw_value, language, scope=[entity_name], use_cache=False) if builtin_matches: resolved_slot = builtin_slot(slot, builtin_matches[0][VALUE]) resolved_slots.append(resolved_slot) else: # custom slot entity = dataset_entities[entity_name] normalized_raw_value = normalize(raw_value) if raw_value in entity[UTTERANCES]: resolved_value = entity[UTTERANCES][raw_value] elif normalized_raw_value in entity[UTTERANCES]: resolved_value = entity[UTTERANCES][normalized_raw_value] elif entity[AUTOMATICALLY_EXTENSIBLE]: resolved_value = raw_value else: # entity is skipped resolved_value = None if resolved_value is not None: resolved_slots.append(custom_slot(slot, resolved_value)) return resolved_slots
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_get_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" greeting_intent = "greeting" expected_slots = [ unresolved_slot(match_range=(6, 11), value="snips", entity="name", slot_name="greeted") ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_slots(self, text, intent): if text == input_text and intent == greeting_intent: return expected_slots return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When res_slots = engine.get_slots(input_text, greeting_intent) # Then expected_slots = [custom_slot(s) for s in expected_slots] self.assertListEqual(expected_slots, res_slots)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_parse_top_intents(self): # Given text = "foo bar ban" dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo [slot1:entity1](bak) --- type: intent name: intent2 utterances: - '[slot2:entity2](foo) baz' --- type: intent name: intent3 utterances: - foo bap""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent1", 0.5), intent_classification_result("intent2", 0.3), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [] if intent == "intent2": return [ unresolved_slot((0, 3), "foo", "entity2", "slot2") ] return [] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [ unresolved_slot((0, 3), "foo", "entity1", "slot1") ] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When results = nlu_engine.parse(text, top_n=3) results_with_filter = nlu_engine.parse( text, intents=["intent1", "intent3"], top_n=3) # Then expected_results = [ extraction_result( intent_classification_result("intent2", 0.6), [custom_slot( unresolved_slot((0, 3), "foo", "entity2", "slot2"))] ), extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), ] expected_results_with_filter = [ extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), extraction_result( intent_classification_result("intent3", 0.05), [] ), ] self.assertListEqual(expected_results, results) self.assertListEqual(expected_results_with_filter, results_with_filter)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result(intent_name='dummy_intent_1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name') ] class FirstIntentParserConfig(ProcessingUnitConfig): unit_name = "first_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return FirstIntentParserConfig() def get_required_resources(self): return None class FirstIntentParser(IntentParser): unit_name = "first_intent_parser" config_type = FirstIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) class SecondIntentParserConfig(ProcessingUnitConfig): unit_name = "second_intent_parser" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return SecondIntentParserConfig() def get_required_resources(self): return None class SecondIntentParser(IntentParser): unit_name = "second_intent_parser" config_type = SecondIntentParserConfig def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def persist(self, path): path = Path(path) path.mkdir() with (path / "metadata.json").open(mode="w") as f: f.write(json_string({"unit_name": self.unit_name})) @classmethod def from_path(cls, path): cfg = cls.config_type() return cls(cfg) register_processing_unit(FirstIntentParser) register_processing_unit(SecondIntentParser) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig( [FirstIntentParserConfig(), SecondIntentParserConfig()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result( intent_name='dummy_intent_1', probability=0.7) slots = [unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name')] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig([TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)