def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("ProbabilisticIntentParser must be fitted") logger.debug("Probabilistic intent parser parsing '%s'...", text) if isinstance(intents, str): intents = [intents] intent_result = self.intent_classifier.get_intent(text, intents) if intent_result is None: return empty_result(text) intent_name = intent_result[RES_INTENT_NAME] slots = self.slot_fillers[intent_name].get_slots(text) return parsing_result(text, intent_result, slots)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): input intents (str or list of str): if provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: when the intent parser is not fitted """ if top_n is None: top_intents = self._parse_top_intents(text, top_n=1, intents=intents) if top_intents: intent = top_intents[0][RES_INTENT] slots = top_intents[0][RES_SLOTS] return parsing_result(text, intent, slots) return empty_result(text, probability=1.0) return self._parse_top_intents(text, top_n=top_n, intents=intents)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ logging.info("NLU engine parsing: '%s'...", text) if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if isinstance(intents, str): intents = [intents] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue resolved_slots = self.resolve_slots(text, res[RES_SLOTS]) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by first classifying the intent and then using the correspond slot filler to extract slots Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("ProbabilisticIntentParser must be fitted") if isinstance(intents, str): intents = [intents] intent_result = self.intent_classifier.get_intent(text, intents) if intent_result is None: return empty_result(text) intent_name = intent_result[RES_INTENT_NAME] slots = self.slot_fillers[intent_name].get_slots(text) return parsing_result(text, intent_result, slots)
def test_should_ignore_very_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - "[event_type](meeting) tomorrow" --- type: intent name: intent_2 utterances: - call [time:snips/datetime](today) --- type: entity name: event_type values: - call - diner""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "call tomorrow" # When res = parser.parse(text) # Then self.assertEqual(empty_result(text, 1.0), res)
def parse(self, text, intents=None, top_n=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str, optional): If provided, reduces the scope of intent parsing to the provided list of intents top_n (int, optional): when provided, this method will return a list of at most top_n most likely intents, instead of a single parsing result. Note that the returned list can contain less than ``top_n`` elements, for instance when the parameter ``intents`` is not None, or when ``top_n`` is greater than the total number of intents. Returns: dict or list: the most likely intent(s) along with the extracted slots. See :func:`.parsing_result` and :func:`.extraction_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted InvalidInputError: When input type is not unicode """ if not isinstance(text, str): raise InvalidInputError("Expected unicode but received: %s" % type(text)) if isinstance(intents, str): intents = {intents} elif isinstance(intents, list): intents = set(intents) if top_n is None: none_proba = 0.0 for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): none_proba = res[RES_INTENT][RES_PROBA] continue resolved_slots = self._resolve_slots(text, res[RES_SLOTS]) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text, none_proba) intents_results = self.get_intents(text) if intents is not None: intents_results = [ res for res in intents_results if res[RES_INTENT_NAME] in intents ] intents_results = intents_results[:top_n] results = [] for intent_res in intents_results: slots = self.get_slots(text, intent_res[RES_INTENT_NAME]) results.append(extraction_result(intent_res, slots)) return results
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot(match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ if not self.fitted: raise NotTrained("DeterministicIntentParser must be fitted") if isinstance(intents, str): intents = [intents] ranges_mapping, processed_text = _replace_builtin_entities( text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: match = regex.match(processed_text) if match is None: continue parsed_intent = intent_classification_result( intent_name=intent, probability=1.0) slots = [] for group_name in match.groupdict(): slot_name = self.group_names_to_slot_names[group_name] entity = self.slot_names_to_entities[slot_name] rng = (match.start(group_name), match.end(group_name)) value = match.group(group_name) if rng in ranges_mapping: rng = ranges_mapping[rng] value = text[rng[START]:rng[END]] else: rng = {START: rng[0], END: rng[1]} parsed_slot = unresolved_slot( match_range=rng, value=value, entity=entity, slot_name=slot_name) slots.append(parsed_slot) parsed_slots = _deduplicate_overlapping_slots( slots, self.language) parsed_slots = sorted(parsed_slots, key=lambda s: s[RES_MATCH_RANGE][START]) return parsing_result(text, parsed_intent, parsed_slots) return empty_result(text)
def test_should_handle_empty_dataset(self): # Given dataset = validate_and_format_dataset(get_empty_dataset(LANGUAGE_EN)) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse("hello world") # Then self.assertEqual(empty_result("hello world"), result)
def test_should_handle_empty_dataset(self): # Given dataset = validate_and_format_dataset(get_empty_dataset(LANGUAGE_EN)) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse("hello world") # Then self.assertEqual(empty_result("hello world"), result)
def test_should_handle_empty_dataset(self): # Given dataset = get_empty_dataset(LANGUAGE_EN) shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) # When result = engine.parse("hello world") # Then self.assertEqual(empty_result("hello world", 1.0), result)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* Intent and slots are extracted simultaneously through pattern matching Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The matched intent, if any, along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the intent parser is not fitted """ logger.debug("DeterministicIntentParser parsing '%s'...", text) if isinstance(intents, str): intents = [intents] builtin_entities = self.builtin_entity_parser.parse(text, use_cache=True) custom_entities = self.custom_entity_parser.parse(text, use_cache=True) all_entities = builtin_entities + custom_entities ranges_mapping, processed_text = _replace_entities_with_placeholders( text, self.language, all_entities) # We try to match both the input text and the preprocessed text to # cover inconsistencies between labeled data and builtin entity parsing cleaned_text = _replace_tokenized_out_characters(text, self.language) cleaned_processed_text = _replace_tokenized_out_characters( processed_text, self.language) for intent, regexes in iteritems(self.regexes_per_intent): if intents is not None and intent not in intents: continue for regex in regexes: res = self._get_matching_result(text, cleaned_processed_text, regex, intent, ranges_mapping) if res is None: res = self._get_matching_result(text, cleaned_text, regex, intent) if res is not None: return res return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ logging.info("NLU engine parsing: '%s'...", text) if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if not self.fitted: raise NotTrained("SnipsNLUEngine must be fitted") if isinstance(intents, str): intents = [intents] language = self._dataset_metadata["language_code"] entities = self._dataset_metadata["entities"] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue slots = res[RES_SLOTS] scope = [ s[RES_ENTITY] for s in slots if is_builtin_entity(s[RES_ENTITY]) ] resolved_slots = resolve_slots(text, slots, entities, language, scope) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def parse(self, text, intents=None): """Performs intent parsing on the provided *text* by calling its intent parsers successively Args: text (str): Input intents (str or list of str): If provided, reduces the scope of intent parsing to the provided list of intents Returns: dict: The most likely intent along with the extracted slots. See :func:`.parsing_result` for the output format. Raises: NotTrained: When the nlu engine is not fitted TypeError: When input type is not unicode """ if not isinstance(text, str): raise TypeError("Expected unicode but received: %s" % type(text)) if not self.fitted: raise NotTrained("SnipsNLUEngine must be fitted") if isinstance(intents, str): intents = [intents] language = self._dataset_metadata["language_code"] entities = self._dataset_metadata["entities"] for parser in self.intent_parsers: res = parser.parse(text, intents) if is_empty(res): continue slots = res[RES_SLOTS] scope = [s[RES_ENTITY] for s in slots if is_builtin_entity(s[RES_ENTITY])] resolved_slots = resolve_slots(text, slots, entities, language, scope) return parsing_result(text, intent=res[RES_INTENT], slots=resolved_slots) return empty_result(text)
def test_should_ignore_completely_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: dummy_intent_1 utterances: - Hello world --- type: intent name: dummy_intent_2 utterances: - Hello world""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "Hello world" # When res = parser.parse(text) # Then self.assertEqual(empty_result(text, 1.0), res)
def test_should_ignore_subtly_ambiguous_utterances(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent_1 utterances: - meeting tomorrow --- type: intent name: intent_2 utterances: - meeting [time:snips/datetime](today)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "meeting tomorrow" # When res = parser.parse(text) # Then self.assertEqual(empty_result(text, 1.0), res)
def test_should_parse_intent_with_filter(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - foo bar baz --- type: intent name: intent2 utterances: - foo bar ban""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = DeterministicIntentParser().fit(dataset) text = "foo bar ban" # When parsing = parser.parse(text, intents=["intent1"]) # Then self.assertEqual(empty_result(text, 1.0), parsing)
def parse(self, text, intents): return empty_result(text)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name") ] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result(text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [{ "data": [{ "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" }] }] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [{ "value": "dummy1", "synonyms": ["dummy1", "dummy1_bis"] }, { "value": "dummy2", "synonyms": ["dummy2", "dummy2_bis"] }] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [{ "value": "dummy2", "synonyms": ["dummy2"] }] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [ unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name") ] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result(text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot( unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text)
def parse(self, text, intents): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text)
def parse(self, text, intents=None, top_n=None): return empty_result(text, 1.0)
def test_should_handle_keyword_entities(self, mocked_regex_parse, mocked_crf_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" }, { "text": " dummy_2", "entity": "dummy_entity_2", "slot_name": "other_dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] }, { "value": "dummy2", "synonyms": [ "dummy2", "dummy2_bis" ] } ] }, "dummy_entity_2": { "use_synonyms": False, "automatically_extensible": True, "data": [ { "value": "dummy2", "synonyms": [ "dummy2" ] } ] } }, "language": "en" } text = "dummy_3 dummy_4" mocked_crf_intent = intent_classification_result("dummy_intent_1", 1.0) mocked_crf_slots = [unresolved_slot(match_range=(0, 7), value="dummy_3", entity="dummy_entity_1", slot_name="dummy_slot_name"), unresolved_slot(match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")] mocked_regex_parse.return_value = empty_result(text) mocked_crf_parse.return_value = parsing_result( text, mocked_crf_intent, mocked_crf_slots) engine = SnipsNLUEngine() # When engine = engine.fit(dataset) result = engine.parse(text) # Then expected_slot = custom_slot(unresolved_slot( match_range=(8, 15), value="dummy_4", entity="dummy_entity_2", slot_name="other_dummy_slot_name")) expected_result = parsing_result(text, intent=mocked_crf_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def test_synonyms_should_point_to_base_value(self, mocked_deter_parse, mocked_proba_parse): # Given dataset = { "snips_nlu_version": "1.1.1", "intents": { "dummy_intent_1": { "utterances": [ { "data": [ { "text": "dummy_1", "entity": "dummy_entity_1", "slot_name": "dummy_slot_name" } ] } ] } }, "entities": { "dummy_entity_1": { "use_synonyms": True, "automatically_extensible": False, "data": [ { "value": "dummy1", "synonyms": [ "dummy1", "dummy1_bis" ] } ] } }, "language": "en" } text = "dummy1_bis" mocked_proba_parser_intent = intent_classification_result( "dummy_intent_1", 1.0) mocked_proba_parser_slots = [ unresolved_slot(match_range=(0, 10), value="dummy1_bis", entity="dummy_entity_1", slot_name="dummy_slot_name")] mocked_deter_parse.return_value = empty_result(text) mocked_proba_parse.return_value = parsing_result( text, mocked_proba_parser_intent, mocked_proba_parser_slots) engine = SnipsNLUEngine().fit(dataset) # When result = engine.parse(text) # Then expected_slot = { RES_MATCH_RANGE: { "start": 0, "end": 10 }, RES_RAW_VALUE: "dummy1_bis", RES_VALUE: { "kind": "Custom", "value": "dummy1" }, RES_ENTITY: "dummy_entity_1", RES_SLOT_NAME: "dummy_slot_name" } expected_result = parsing_result( text, intent=mocked_proba_parser_intent, slots=[expected_slot]) self.assertEqual(expected_result, result)
def parse(self, text, intents=None, top_n=None): if text == input_text: return parsing_result(text, intent, slots) return empty_result(text, 1.0)
def parse(self, text, intents): return empty_result(text)