def test_should_get_builtin_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: GetWeather utterances: - what is the weather [datetime:snips/datetime](at 9pm) - what's the weather in [location:weather_location](berlin) - What's the weather in [location](tokyo) [datetime](this weekend)? - Can you tell me the weather [datetime] please ? - what is the weather forecast [datetime] in [location](paris)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json config = CRFSlotFillerConfig(random_seed=42) intent = "GetWeather" slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset)) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Give me the weather at 9pm in Paris") # Then expected_slots = [ unresolved_slot(match_range={START: 20, END: 26}, value='at 9pm', entity='snips/datetime', slot_name='datetime'), unresolved_slot(match_range={START: 30, END: 35}, value='Paris', entity='weather_location', slot_name='location') ] self.assertListEqual(expected_slots, slots)
def test_should_get_builtin_slots(self): # Given dataset = validate_and_format_dataset(WEATHER_DATASET) config = CRFSlotFillerConfig(random_seed=42) intent = "SearchWeatherForecast" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Give me the weather at 9p.m. in Paris") # Then expected_slots = [ unresolved_slot(match_range={ START: 20, END: 28 }, value='at 9p.m.', entity='snips/datetime', slot_name='datetime'), unresolved_slot(match_range={ START: 32, END: 37 }, value='Paris', entity='weather_location', slot_name='location') ] self.assertListEqual(expected_slots, slots)
def test_should_get_sub_builtin_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: PlanBreak utterances: - 'I want to leave from [start:snips/datetime](tomorrow) until [end:snips/datetime](next thursday)' - find me something from [start](9am) to [end](12pm) - I need a break from [start](2pm) until [end](4pm) - Can you suggest something from [start](april 4th) until [end](april 6th) ? - Book me a trip from [start](this friday) to [end](next tuesday)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json config = CRFSlotFillerConfig(random_seed=42) intent = "PlanBreak" slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset)) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("Find me a plan from 5pm to 6pm") # Then expected_slots = [ unresolved_slot(match_range={START: 20, END: 23}, value="5pm", entity="snips/datetime", slot_name="start"), unresolved_slot(match_range={START: 27, END: 30}, value="6pm", entity="snips/datetime", slot_name="end") ] self.assertListEqual(expected_slots, slots)
def test_should_get_slots_with_keywords_slot_filler(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: SetLightColor utterances: - set the light to [color](blue) in the [room](kitchen) - please make the lights [color](red) in the [room](bathroom)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "SetLightColor" slot_filler = KeywordSlotFiller().fit(dataset, intent) # When slots = slot_filler.get_slots("I want red lights in the kitchen now") # Then expected_slots = [ unresolved_slot(match_range={ START: 7, END: 10 }, value="red", entity="color", slot_name="color"), unresolved_slot(match_range={ START: 25, END: 32 }, value="kitchen", entity="room", slot_name="room") ] self.assertListEqual(slots, expected_slots)
def get_slots(self, text, intent): if intent == "intent1": return [unresolved_slot((0, 3), "foo", "entity1", "slot1")] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return []
def test_should_parse_stop_words_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: search utterances: - search - search [search_object](this) - search [search_object](a cat) --- type: entity name: search_object values: - [this thing, that] """) resources = self.get_resources("en") resources[STOP_WORDS] = {"a", "this", "that"} dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser_config = DeterministicIntentParserConfig(ignore_stop_words=True) parser = DeterministicIntentParser(config=parser_config, resources=resources) parser.fit(dataset) # When res_1 = parser.parse("search this") res_2 = parser.parse("search that") # Then expected_intent = intent_classification_result(intent_name="search", probability=1.0) expected_slots_1 = [ unresolved_slot(match_range=(7, 11), value="this", entity="search_object", slot_name="search_object") ] expected_slots_2 = [ unresolved_slot(match_range=(7, 11), value="that", entity="search_object", slot_name="search_object") ] self.assertEqual(expected_intent, res_1[RES_INTENT]) self.assertEqual(expected_intent, res_2[RES_INTENT]) self.assertListEqual(expected_slots_1, res_1[RES_SLOTS]) self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
def test_should_parse_after_deserialization(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine_dict = engine.to_dict() deserialized_engine = SnipsNLUEngine.from_dict(engine_dict) result = deserialized_engine.parse(input_) # Then msg = "SnipsNLUEngine dict should be json serializable to utf-8" with self.fail_if_exception(msg): json.dumps(engine_dict).encode("utf-8") expected_slots = [ resolved_slot({START: 8, END: 9}, '3', {'kind': 'Number', 'value': 3.0}, 'snips/number', 'number_of_cups'), custom_slot( unresolved_slot({START: 18, END: 21}, 'hot', 'Temperature', 'beverage_temperature')) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea') self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_parse_after_deserialization(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine_dict = engine.to_dict() deserialized_engine = SnipsNLUEngine.from_dict(engine_dict) result = deserialized_engine.parse(input_) # Then msg = "SnipsNLUEngine dict should be json serializable to utf-8" with self.fail_if_exception(msg): json.dumps(engine_dict).encode("utf-8") expected_slots = [ resolved_slot({ START: 8, END: 9 }, '3', { 'kind': 'Number', 'value': 3.0 }, 'snips/number', 'number_of_cups'), custom_slot( unresolved_slot({ START: 18, END: 21 }, 'hot', 'Temperature', 'beverage_temperature')) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], 'MakeTea') self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name")] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: {RES_INTENT_NAME: 'world', RES_PROBABILITY: 0.5}, RES_SLOTS: [{RES_MATCH_RANGE: {"start": 3, "end": 5}, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value'}], RES_INPUT: input_} self.assertDictEqual(expected_result, result)
def _get_matching_result(self, text, processed_text, regex, intent, builtin_entities_ranges_mapping=None): found_result = regex.match(processed_text) if found_result is None: return None parsed_intent = intent_classification_result(intent_name=intent, probability=1.0) slots = [] for group_name in found_result.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[intent][slot_name] rng = (found_result.start(group_name), found_result.end(group_name)) if builtin_entities_ranges_mapping is not None: if rng in builtin_entities_ranges_mapping: rng = builtin_entities_ranges_mapping[rng] else: shift = _get_range_shift( rng, builtin_entities_ranges_mapping) rng = {START: rng[0] + shift, END: rng[1] + shift} else: rng = {START: rng[0], END: rng[1]} value = text[rng[START]:rng[END]] parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots)
def test_should_get_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me [number_of_cups:snips/number](five) cups of tea - please I want [number_of_cups](two) cups of tea""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 slot_filler = CRFSlotFiller(**shared) intent = "MakeTea" slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(slots, expected_slots)
def test_should_parse_with_filter(self): dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[slot1:entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [slot2:entity2](baz) --- type: intent name: intent3 utterances: - foz for [slot3:entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 parser = ProbabilisticIntentParser(**shared) parser.fit(dataset) text = "foo bar baz" # When result = parser.parse(text, intents=["intent1", "intent3"]) # Then expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")] self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_slots, result[RES_SLOTS])
def test_should_parse_after_deserialization_from_dir(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(input_) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_be_serializable_into_bytearray(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me [number_of_cups:snips/number](one) cup of tea - i want [number_of_cups] cups of tea please - can you prepare [number_of_cups] cups of tea ?""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) slot_filler = CRFSlotFiller(**shared).fit(dataset, "MakeTea") # When slot_filler_bytes = slot_filler.to_byte_array() loaded_slot_filler = CRFSlotFiller.from_byte_array( slot_filler_bytes, **shared) slots = loaded_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def test_should_get_slots_after_deserialization(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me [number_of_cups:snips/number](one) cup of tea - i want [number_of_cups] cups of tea please - can you prepare [number_of_cups] cups of tea ?""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "MakeTea" shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 slot_filler = CRFSlotFiller(**shared) slot_filler.fit(dataset, intent) slot_filler.persist(self.tmp_file_path) deserialized_slot_filler = CRFSlotFiller.from_path( self.tmp_file_path, **shared) # When slots = deserialized_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def test_should_serialize_results(self): # Given input_ = "hello world" intent = intent_classification_result("world", 0.5) slots = [ unresolved_slot([3, 5], "slot_value", "slot_entity", "slot_name") ] # When result = parsing_result(input=input_, intent=intent, slots=slots) # Then msg = "Result dict should be json serializable" with self.fail_if_exception(msg): json.dumps(result) expected_result = { RES_INTENT: { RES_INTENT_NAME: 'world', RES_PROBA: 0.5 }, RES_SLOTS: [{ RES_MATCH_RANGE: { "start": 3, "end": 5 }, RES_ENTITY: 'slot_entity', RES_SLOT_NAME: 'slot_name', RES_VALUE: 'slot_value' }], RES_INPUT: input_ } self.assertDictEqual(expected_result, result)
def test_should_be_serializable_into_bytearray(self): # Given dataset = BEVERAGE_DATASET slot_filler = CRFSlotFiller().fit(dataset, "MakeTea") builtin_intent_parser = slot_filler.builtin_entity_parser custom_entity_parser = slot_filler.custom_entity_parser # When slot_filler_bytes = slot_filler.to_byte_array() loaded_slot_filler = CRFSlotFiller.from_byte_array( slot_filler_bytes, builtin_entity_parser=builtin_intent_parser, custom_entity_parser=custom_entity_parser) slots = loaded_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def test_should_get_slots_after_deserialization(self): # Given dataset = BEVERAGE_DATASET config = CRFSlotFillerConfig(random_seed=42) intent = "MakeTea" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) slot_filler.persist(self.tmp_file_path) custom_entity_parser = slot_filler.custom_entity_parser builtin_entity_parser = slot_filler.builtin_entity_parser deserialized_slot_filler = CRFSlotFiller.from_path( self.tmp_file_path, custom_entity_parser=custom_entity_parser, builtin_entity_parser=builtin_entity_parser) # When slots = deserialized_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def mock_proba_parse(text, intents): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_proba_parser_intent, slots)
def test_engine_with_keyword_slot_filler_should_be_serializable(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: SetLightColor utterances: - set the light to [color](blue) in the [room](kitchen) - please make the lights [color](red) in the [room](bathroom)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "SetLightColor" slot_filler_config = { "unit_name": "keyword_slot_filler", "lowercase": True } parser_config = ProbabilisticIntentParserConfig( slot_filler_config=slot_filler_config) engine_config = NLUEngineConfig([parser_config]) engine = SnipsNLUEngine(engine_config).fit(dataset, intent) engine.persist(self.tmp_file_path) text = "I want Red lights in the kitchen now" # When loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path) res = loaded_engine.parse(text) # Then expected_slots = [ custom_slot( unresolved_slot(match_range={ START: 7, END: 10 }, value="Red", entity="color", slot_name="color"), "red"), custom_slot( unresolved_slot(match_range={ START: 25, END: 32 }, value="kitchen", entity="room", slot_name="room")) ] self.assertListEqual(expected_slots, res["slots"])
def parse(self, text, intents=None, top_n=None): slots = [ unresolved_slot(match_range=(0, len(text)), value=text, entity="entity1", slot_name="slot1") ] return parsing_result(text, mocked_intent, slots)
def tags_to_slots(text, tokens, tags, tagging_scheme, intent_slots_mapping): slots = tags_to_preslots(tokens, tags, tagging_scheme) return [ unresolved_slot(match_range=slot[RANGE], value=text[slot[RANGE][START]:slot[RANGE][END]], entity=intent_slots_mapping[slot[SLOT_NAME]], slot_name=slot[SLOT_NAME]) for slot in slots ]
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot(match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def get_slots(self, text): tokens = tokenize(text, self.language) slots = [] for token in tokens: normalized_value = token.value if self.config.get("lowercase", False): normalized_value = normalized_value.lower() if normalized_value in self.slots_keywords: entity = self.slots_keywords[normalized_value][0] slot_name = self.slots_keywords[normalized_value][1] slot = unresolved_slot((token.start, token.end), token.value, entity, slot_name) slots.append(slot) return slots
def test_should_parse_after_deserialization_from_dir(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) text = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(text) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], text) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_get_slots(self): # Given dataset = validate_and_format_dataset(BEVERAGE_DATASET) config = CRFSlotFillerConfig(random_seed=42) intent = "MakeTea" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) # When slots = slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={START: 8, END: 11}, value='two', entity='snips/number', slot_name='number_of_cups')] self.assertListEqual(slots, expected_slots)
def test_should_use_parsers_sequentially(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting1 utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" intent = intent_classification_result(intent_name='greeting1', probability=0.7) slots = [ unresolved_slot(match_range=(6, 11), value='snips', entity='name', slot_name='greeted') ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0) # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_should_deduplicate_overlapping_slots(self): # Given language = LANGUAGE_EN slots = [ unresolved_slot([3, 7], "non_overlapping1", "e", "s1"), unresolved_slot([9, 16], "aaaaaaa", "e1", "s2"), unresolved_slot([10, 18], "bbbbbbbb", "e1", "s3"), unresolved_slot([17, 23], "b cccc", "e1", "s4"), unresolved_slot([50, 60], "non_overlapping2", "e", "s5"), ] # When deduplicated_slots = _deduplicate_overlapping_slots(slots, language) # Then expected_slots = [ unresolved_slot([3, 7], "non_overlapping1", "e", "s1"), unresolved_slot([17, 23], "b cccc", "e1", "s4"), unresolved_slot([50, 60], "non_overlapping2", "e", "s5"), ] self.assertSequenceEqual(deduplicated_slots, expected_slots)
def test_should_get_slots(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: greeting utterances: - hello [greeted:name](john)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json input_text = "hello snips" greeting_intent = "greeting" expected_slots = [ unresolved_slot(match_range=(6, 11), value="snips", entity="name", slot_name="greeted") ] # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): pass @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_slots(self, text, intent): if text == input_text and intent == greeting_intent: return expected_slots return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) engine = SnipsNLUEngine(config).fit(dataset) # When res_slots = engine.get_slots(input_text, greeting_intent) # Then expected_slots = [custom_slot(s) for s in expected_slots] self.assertListEqual(expected_slots, res_slots)
def _parse_map_output(self, text, output, entities, intents): """Parse the map output to the parser's result format""" intent_id, slot_ids = output intent_name = self._intents_names[intent_id] if intents is not None and intent_name not in intents: return None parsed_intent = intent_classification_result( intent_name=intent_name, probability=1.0) slots = [] # assert invariant assert len(slot_ids) == len(entities) for slot_id, entity in zip(slot_ids, entities): slot_name = self._slots_names[slot_id] rng_start = entity[RES_MATCH_RANGE][START] rng_end = entity[RES_MATCH_RANGE][END] slot_value = text[rng_start:rng_end] entity_name = entity[ENTITY_KIND] slot = unresolved_slot( [rng_start, rng_end], slot_value, entity_name, slot_name) slots.append(slot) return extraction_result(parsed_intent, slots)
def test_should_parse_with_filter(self): dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[slot1:entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [slot2:entity2](baz) --- type: intent name: intent3 utterances: - foz for [slot3:entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json classifier_config = LogRegIntentClassifierConfig(random_seed=42) slot_filler_config = CRFSlotFillerConfig(random_seed=42) parser_config = ProbabilisticIntentParserConfig( classifier_config, slot_filler_config) parser = ProbabilisticIntentParser(parser_config) parser.fit(dataset) text = "foo bar baz" # When result = parser.parse(text, intents=["intent1", "intent3"]) # Then expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")] self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_slots, result[RES_SLOTS])
def test_should_deduplicate_overlapping_slots(self): # Given language = LANGUAGE_EN slots = [ unresolved_slot([0, 3], "kid", "e", "s1"), unresolved_slot([4, 8], "loco", "e1", "s2"), unresolved_slot([0, 8], "kid loco", "e1", "s3"), unresolved_slot([9, 13], "song", "e2", "s4"), ] # When deduplicated_slots = _deduplicate_overlapping_slots(slots, language) # Then expected_slots = [ unresolved_slot([0, 8], "kid loco", "e1", "s3"), unresolved_slot([9, 13], "song", "e2", "s4"), ] self.assertSequenceEqual(deduplicated_slots, expected_slots)
def test_bilou_tags_to_slots(self): # Given language = LANGUAGE_EN slot_name = "animal" intent_slots_mapping = {"animal": "animal"} tags = [ { "text": "", "tags": [], "expected_slots": [] }, { "text": "nothing here", "tags": [OUTSIDE, OUTSIDE], "expected_slots": [] }, { "text": "i am a blue bird", "tags": [OUTSIDE, OUTSIDE, OUTSIDE, BEGINNING_PREFIX + slot_name, LAST_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(7, 16), value="blue bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "i am a bird", "tags": [OUTSIDE, OUTSIDE, OUTSIDE, UNIT_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(7, 11), value="bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "bird", "tags": [UNIT_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 4), value="bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "blue bird", "tags": [BEGINNING_PREFIX + slot_name, LAST_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 9), value="blue bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "light blue bird blue bird", "tags": [BEGINNING_PREFIX + slot_name, INSIDE_PREFIX + slot_name, LAST_PREFIX + slot_name, BEGINNING_PREFIX + slot_name, LAST_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 15), value="light blue bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(16, 25), value="blue bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "bird birdy", "tags": [UNIT_PREFIX + slot_name, UNIT_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 4), value="bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(5, 10), value="birdy", entity=slot_name, slot_name=slot_name ) ] }, { "text": "light bird bird blue bird", "tags": [BEGINNING_PREFIX + slot_name, INSIDE_PREFIX + slot_name, UNIT_PREFIX + slot_name, BEGINNING_PREFIX + slot_name, INSIDE_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 10), value="light bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(11, 15), value="bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(16, 25), value="blue bird", entity=slot_name, slot_name=slot_name ) ] }, { "text": "bird bird bird", "tags": [LAST_PREFIX + slot_name, BEGINNING_PREFIX + slot_name, UNIT_PREFIX + slot_name], "expected_slots": [ unresolved_slot( match_range=(0, 4), value="bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(5, 9), value="bird", entity=slot_name, slot_name=slot_name ), unresolved_slot( match_range=(10, 14), value="bird", entity=slot_name, slot_name=slot_name ) ] }, ] for data in tags: # When slots = tags_to_slots( data["text"], tokenize(data["text"], language), data["tags"], TaggingScheme.BILOU, intent_slots_mapping) # Then self.assertEqual(slots, data["expected_slots"])
def test_should_use_parsers_sequentially(self): # Given input_text = "hello world" intent = intent_classification_result( intent_name='dummy_intent_1', probability=0.7) slots = [unresolved_slot(match_range=(6, 11), value='world', entity='mocked_entity', slot_name='mocked_slot_name')] class TestIntentParser1Config(ProcessingUnitConfig): unit_name = "test_intent_parser1" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser1Config() class TestIntentParser1(IntentParser): unit_name = "test_intent_parser1" config_type = TestIntentParser1Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser1(conf) class TestIntentParser2Config(ProcessingUnitConfig): unit_name = "test_intent_parser2" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentParser2Config() class TestIntentParser2(IntentParser): unit_name = "test_intent_parser2" config_type = TestIntentParser2Config def fit(self, dataset, force_retrain): self._fitted = True return self @property def fitted(self): return hasattr(self, '_fitted') and self._fitted def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text) def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): conf = cls.config_type() return TestIntentParser2(conf) register_processing_unit(TestIntentParser1) register_processing_unit(TestIntentParser2) mocked_dataset_metadata = { "language_code": "en", "entities": { "mocked_entity": { "automatically_extensible": True, "utterances": dict() } }, "slot_name_mappings": { "dummy_intent_1": { "mocked_slot_name": "mocked_entity" } } } config = NLUEngineConfig([TestIntentParser1Config(), TestIntentParser2Config()]) engine = SnipsNLUEngine(config).fit(SAMPLE_DATASET) # pylint:disable=protected-access engine._dataset_metadata = mocked_dataset_metadata # pylint:enable=protected-access # When parse = engine.parse(input_text) # Then expected_slots = [custom_slot(s) for s in slots] expected_parse = parsing_result(input_text, intent, expected_slots) self.assertDictEqual(expected_parse, parse)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)