def _get_matching_result(self, text, processed_text, regex, intent, entities_ranges_mapping=None): found_result = regex.match(processed_text) if found_result is None: return None parsed_intent = intent_classification_result(intent_name=intent, probability=1.0) slots = [] for group_name in found_result.groupdict(): ref_group_name = group_name if "_" in group_name: ref_group_name = group_name.split("_")[0] slot_name = self.group_names_to_slot_names[ref_group_name] entity = self.slot_names_to_entities[intent][slot_name] rng = (found_result.start(group_name), found_result.end(group_name)) if entities_ranges_mapping is not None: if rng in entities_ranges_mapping: rng = entities_ranges_mapping[rng] else: shift = _get_range_shift( rng, entities_ranges_mapping) rng = {START: rng[0] + shift, END: rng[1] + shift} else: rng = {START: rng[0], END: rng[1]} value = text[rng[START]:rng[END]] parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots(slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return extraction_result(parsed_intent, parsed_slots)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - hello world --- type: intent name: intent2 utterances: - foo bar""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "hello world" # When results = parser.parse(text, top_n=3) # Then expected_intent = intent_classification_result(intent_name="intent1", probability=1.0) expected_results = [extraction_result(expected_intent, [])] self.assertEqual(expected_results, results)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str, optional): If provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted InvalidInputError: When input type is not unicode """ if not isinstance(text, str): raise InvalidInputError("Expected unicode but received: %s" % type(text)) if isinstance(intents, str): intents = {intents} elif isinstance(intents, list): intents = set(intents) if top_n is None: none_proba = 0.0 for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): none_proba = res[RES_INTENT][RES_PROBA] continue resolved_slots = self._resolve_slots(text, res[RES_SLOTS]) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text, none_proba) intents_results = self.get_intents(text) if intents is not None: intents_results = [ res for res in intents_results if res[RES_INTENT_NAME] in intents ] intents_results = intents_results[:top_n] results = [] for intent_res in intents_results: slots = self.get_slots(text, intent_res[RES_INTENT_NAME]) results.append(extraction_result(intent_res, slots)) return results
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - meeting tomorrow --- type: intent name: intent2 utterances: - meeting [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "meeting tomorrow" # When results = parser.parse(text, top_n=3) # Then slot = { "entity": "snips/datetime", "range": { "end": 16, "start": 8 }, "slotName": "time", "value": "tomorrow" } expected_results = [ extraction_result( intent_classification_result(intent_name="intent1", probability=0.5), []), extraction_result( intent_classification_result(intent_name="intent2", probability=0.5), [slot]) ] results = sorted(results, key=lambda r: r[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_results, results)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): input intents (str or list of str): if provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: when the intent parser is not fitted """ if isinstance(intents, str): intents = {intents} elif isinstance(intents, list): intents = list(intents) if top_n is None: intent_result = self.intent_classifier.get_intent(text, intents) intent_name = intent_result[RES_INTENT_NAME] if intent_name is not None: slots = self.slot_fillers[intent_name].get_slots(text) else: slots = [] return parsing_result(text, intent_result, slots) results = [] intents_results = self.intent_classifier.get_intents(text) for intent_result in intents_results[:top_n]: intent_name = intent_result[RES_INTENT_NAME] if intent_name is not None: slots = self.slot_fillers[intent_name].get_slots(text) else: slots = [] results.append(extraction_result(intent_result, slots)) return results
def _parse_map_output(self, text, output, entities, intents): """Parse the map output to the parser's result format""" intent_id, slot_ids = output intent_name = self._intents_names[intent_id] if intents is not None and intent_name not in intents: return None parsed_intent = intent_classification_result( intent_name=intent_name, probability=1.0) slots = [] # assert invariant assert len(slot_ids) == len(entities) for slot_id, entity in zip(slot_ids, entities): slot_name = self._slots_names[slot_id] rng_start = entity[RES_MATCH_RANGE][START] rng_end = entity[RES_MATCH_RANGE][END] slot_value = text[rng_start:rng_end] entity_name = entity[ENTITY_KIND] slot = unresolved_slot( [rng_start, rng_end], slot_value, entity_name, slot_name) slots.append(slot) return extraction_result(parsed_intent, slots)
def test_should_parse_top_intents(self): # Given text = "foo bar ban" dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo [slot1:entity1](bak) --- type: intent name: intent2 utterances: - '[slot2:entity2](foo) baz' --- type: intent name: intent3 utterances: - foo bap""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("first_intent_parser", True) class FirstIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent1", 0.5), intent_classification_result("intent2", 0.3), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [] if intent == "intent2": return [ unresolved_slot((0, 3), "foo", "entity2", "slot2") ] return [] @IntentParser.register("second_intent_parser", True) class SecondIntentParser(MockIntentParser): def get_intents(self, text): return [ intent_classification_result("intent2", 0.6), intent_classification_result("intent1", 0.2), intent_classification_result(None, 0.15), intent_classification_result("intent3", 0.05) ] def get_slots(self, text, intent): if intent == "intent1": return [ unresolved_slot((0, 3), "foo", "entity1", "slot1") ] if intent == "intent2": return [ unresolved_slot((8, 11), "ban", "entity2", "slot2") ] return [] # pylint:enable=unused-variable config = NLUEngineConfig( ["first_intent_parser", "second_intent_parser"]) nlu_engine = SnipsNLUEngine(config).fit(dataset) # When results = nlu_engine.parse(text, top_n=3) results_with_filter = nlu_engine.parse( text, intents=["intent1", "intent3"], top_n=3) # Then expected_results = [ extraction_result( intent_classification_result("intent2", 0.6), [custom_slot( unresolved_slot((0, 3), "foo", "entity2", "slot2"))] ), extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), ] expected_results_with_filter = [ extraction_result( intent_classification_result("intent1", 0.5), [custom_slot( unresolved_slot((0, 3), "foo", "entity1", "slot1"))] ), extraction_result( intent_classification_result(None, 0.15), [] ), extraction_result( intent_classification_result("intent3", 0.05), [] ), ] self.assertListEqual(expected_results, results) self.assertListEqual(expected_results_with_filter, results_with_filter)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - meeting [time:snips/datetime](today) --- type: intent name: intent2 utterances: - meeting tomorrow --- type: intent name: intent3 utterances: - "[event_type](call) [time:snips/datetime](at 9pm)" --- type: entity name: event_type values: - meeting - feedback session""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "meeting tomorrow" # When results = parser.parse(text, top_n=3) # Then time_slot = { "entity": "snips/datetime", "range": { "end": 16, "start": 8 }, "slotName": "time", "value": "tomorrow" } event_slot = { "entity": "event_type", "range": { "end": 7, "start": 0 }, "slotName": "event_type", "value": "meeting" } weight_intent_1 = 1. / 2. weight_intent_2 = 1. weight_intent_3 = 1. / 3. total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3 proba_intent2 = weight_intent_2 / total_weight proba_intent1 = weight_intent_1 / total_weight proba_intent3 = weight_intent_3 / total_weight expected_results = [ extraction_result(intent_classification_result( intent_name="intent2", probability=proba_intent2), slots=[]), extraction_result(intent_classification_result( intent_name="intent1", probability=proba_intent1), slots=[time_slot]), extraction_result(intent_classification_result( intent_name="intent3", probability=proba_intent3), slots=[event_slot, time_slot]) ] self.assertEqual(expected_results, results)