def test_training_should_be_reproducible(self):
        # Given
        random_state = 42
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        parser1 = DeterministicIntentParser(random_state=random_state)
        parser1.fit(dataset)

        parser2 = DeterministicIntentParser(random_state=random_state)
        parser2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_parser1 = tmp_dir / "parser1"
            dir_parser2 = tmp_dir / "parser2"
            parser1.persist(dir_parser1)
            parser2.persist(dir_parser2)
            hash1 = dirhash(str(dir_parser1), 'sha256')
            hash2 = dirhash(str(dir_parser2), 'sha256')
            self.assertEqual(hash1, hash2)
    def test_should_be_deserializable_before_fitting_with_whitelist(self):
        # Given
        parser_dict = {
            "config": {
                "max_queries": 42,
                "max_pattern_length": 43
            },
            "language_code": None,
            "group_names_to_slot_names": None,
            "patterns": None,
            "slot_names_to_entities": None,
            "stop_words_whitelist": None
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "deterministic_intent_parser"}
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)

        # When
        parser = DeterministicIntentParser.from_path(self.tmp_file_path)

        # Then
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_pattern_length=43)
        expected_parser = DeterministicIntentParser(config=config)
        self.assertEqual(parser.to_dict(), expected_parser.to_dict())
    def test_should_fit_with_naughty_strings_no_tags(self):
        # Given
        naughty_strings_path = os.path.join(TEST_PATH, "resources",
                                            "naughty_strings.txt")
        with io.open(naughty_strings_path, encoding='utf8') as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        utterances = [{
            DATA: [{
                TEXT: naughty_string
            }]
        } for naughty_string in naughty_strings]

        # When
        naughty_dataset = {
            "intents": {
                "naughty_intent": {
                    "utterances": utterances
                }
            },
            "entities": dict(),
            "language": "en",
            "snips_nlu_version": "0.0.1"
        }

        # Then
        with self.fail_if_exception("Exception raised"):
            DeterministicIntentParser().fit(naughty_dataset)
    def test_should_ignore_very_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - "[event_type](meeting) tomorrow"

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)

---
type: entity
name: event_type
values:
  - call
  - diner""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        self.assertEqual(empty_result(text, 1.0), res)
    def test_should_parse_intent(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(intent_name="intent2",
                                                       probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
    def test_should_parse_slightly_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - call tomorrow

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(intent_name="intent_1",
                                                       probability=2. / 3.)
        expected_result = parsing_result(text, expected_intent, [])
        self.assertEqual(expected_result, res)
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - hello world
  
---
type: intent
name: intent2
utterances:
  - foo bar""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)
        text = "hello world"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        expected_intent = intent_classification_result(intent_name="intent1",
                                                       probability=1.0)
        expected_results = [extraction_result(expected_intent, [])]
        self.assertEqual(expected_results, results)
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cup of tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](two) cups of coffee
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        intent_parser = DeterministicIntentParser(**shared).fit(dataset)

        # When
        intent_parser_bytes = intent_parser.to_byte_array()
        loaded_intent_parser = DeterministicIntentParser.from_byte_array(
            intent_parser_bytes, **shared)
        result = loaded_intent_parser.parse("make me two cups of coffee")

        # Then
        self.assertEqual("MakeCoffee", result[RES_INTENT][RES_INTENT_NAME])
    def test_should_be_serializable(self, mocked_generate_regexes):
        # Given

        # pylint: disable=unused-argument
        def mock_generate_patterns(utterances, joined_entity_utterances,
                                   group_names_to_slot_names, language):
            patterns = ["mocked_regex_%s" % i for i in range(len(utterances))]
            group_to_slot = {"group_0": "dummy slot name"}
            return patterns, group_to_slot

        # pylint: enable=unused-argument

        mocked_generate_regexes.side_effect = mock_generate_patterns
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_pattern_length=100)
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_dict = {
            "unit_name": "deterministic_intent_parser",
            "config": {
                "unit_name": "deterministic_intent_parser",
                "max_queries": 42,
                "max_pattern_length": 100
            },
            "language_code": "en",
            "group_names_to_slot_names": {
                "group_0": "dummy slot name"
            },
            "patterns": {
                "dummy_intent_1": [
                    "mocked_regex_0",
                    "mocked_regex_1",
                    "mocked_regex_2",
                    "mocked_regex_3"
                ],
                "dummy_intent_2": [
                    "mocked_regex_0"
                ]
            },
            "slot_names_to_entities": {
                "dummy_intent_1": {
                    "dummy_slot_name": "dummy_entity_1",
                    "dummy_slot_name3": "dummy_entity_2",
                    "dummy_slot_name2": "dummy_entity_2"
                },
                "dummy_intent_2": {
                    "dummy slot nàme": "dummy_entity_1"
                }
            }
        }
        metadata = {"unit_name": "deterministic_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json",
                               metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_dict)
    def test_should_fit_with_naughty_strings_no_tags(self):
        # Given
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding="utf8") as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        utterances = [{
            DATA: [{
                TEXT: naughty_string
            }]
        } for naughty_string in naughty_strings]

        # When
        naughty_dataset = {
            "intents": {
                "naughty_intent": {
                    "utterances": utterances
                }
            },
            "entities": dict(),
            "language": "en",
        }

        # Then
        with self.fail_if_exception("Exception raised"):
            DeterministicIntentParser().fit(naughty_dataset)
    def test_should_be_serializable_before_fitting(self):
        # Given
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_pattern_length=43,
                                                 ignore_stop_words=True)
        parser = DeterministicIntentParser(config=config)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_dict = {
            "config": {
                "unit_name": "deterministic_intent_parser",
                "max_queries": 42,
                "max_pattern_length": 43,
                "ignore_stop_words": True
            },
            "language_code": None,
            "group_names_to_slot_names": None,
            "patterns": None,
            "slot_names_to_entities": None,
            "stop_words_whitelist": None
        }

        metadata = {"unit_name": "deterministic_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_dict)
    def test_should_limit_patterns_length(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- how are you
- hello how are you?
- what's up

---
type: intent
name: my_second_intent
utterances:
- what is the weather today ?
- does it rain
- will it rain tomorrow""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = DeterministicIntentParserConfig(max_queries=1000,
                                                 max_pattern_length=25,
                                                 ignore_stop_words=False)

        # When
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # Then
        self.assertEqual(2, len(parser.regexes_per_intent["my_first_intent"]))
        self.assertEqual(1, len(parser.regexes_per_intent["my_second_intent"]))
    def test_should_limit_nb_queries(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_first_intent
utterances:
- this is [slot1:entity1](my first entity)
- this is [slot2:entity2](my second entity)
- this is [slot3:entity3](my third entity)

---
type: intent
name: my_second_intent
utterances:
- this is [slot4:entity4](my fourth entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        config = DeterministicIntentParserConfig(max_queries=2,
                                                 max_pattern_length=1000)

        # When
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # Then
        self.assertEqual(len(parser.regexes_per_intent["my_first_intent"]), 2)
        self.assertEqual(len(parser.regexes_per_intent["my_second_intent"]), 1)
    def test_should_be_deserializable_without_stop_words(self):
        # Given
        parser_dict = {
            "config": {
                "max_queries": 42,
                "max_pattern_length": 43
            },
            "language_code": "en",
            "group_names_to_slot_names": {
                "hello_group": "hello_slot",
                "world_group": "world_slot"
            },
            "patterns": {
                "my_intent":
                ["(?P<hello_group>hello?)", "(?P<world_group>world$)"]
            },
            "slot_names_to_entities": {
                "my_intent": {
                    "hello_slot": "hello_entity",
                    "world_slot": "world_entity"
                }
            }
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "deterministic_intent_parser"}
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)

        # When
        parser = DeterministicIntentParser.from_path(self.tmp_file_path)

        # Then
        patterns = {
            "my_intent":
            ["(?P<hello_group>hello?)", "(?P<world_group>world$)"]
        }
        group_names_to_slot_names = {
            "hello_group": "hello_slot",
            "world_group": "world_slot"
        }
        slot_names_to_entities = {
            "my_intent": {
                "hello_slot": "hello_entity",
                "world_slot": "world_entity"
            }
        }
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_pattern_length=43)
        expected_parser = DeterministicIntentParser(config=config)
        expected_parser.language = LANGUAGE_EN
        expected_parser.group_names_to_slot_names = group_names_to_slot_names
        expected_parser.slot_names_to_entities = slot_names_to_entities
        expected_parser.patterns = patterns
        # pylint:disable=protected-access
        expected_parser._stop_words_whitelist = dict()
        # pylint:enable=protected-access

        self.assertEqual(parser.to_dict(), expected_parser.to_dict())
    def test_should_not_parse_when_not_fitted(self):
        # Given
        parser = DeterministicIntentParser()

        # When / Then
        self.assertFalse(parser.fitted)
        with self.assertRaises(NotTrained):
            parser.parse("foobar")
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
  - Hello John

---
type: intent
name: greeting2
utterances:
  - Hello [name](John)

---
type: intent
name: greeting3
utterances:
  - "[greeting](Hello) [name](John)"
        """)

        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)

        # When
        top_intents = parser.get_intents("Hello John")

        # Then
        expected_intents = [
            {
                RES_INTENT_NAME: "greeting1",
                RES_PROBA: 1. / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: "greeting2",
                RES_PROBA: (1. / 2.) / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: "greeting3",
                RES_PROBA: (1. / 3.) / (1. + 1. / 2. + 1. / 3.)
            },
            {
                RES_INTENT_NAME: None,
                RES_PROBA: 0.0
            },
        ]

        def sorting_key(intent_res):
            if intent_res[RES_INTENT_NAME] is None:
                return "null"
            return intent_res[RES_INTENT_NAME]

        sorted_expected_intents = sorted(expected_intents, key=sorting_key)
        sorted_intents = sorted(top_intents, key=sorting_key)
        self.assertEqual(expected_intents[0], top_intents[0])
        self.assertListEqual(sorted_expected_intents, sorted_intents)
Пример #17
0
    def test_should_limit_patterns_length(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        config = DeterministicIntentParserConfig(max_queries=1000,
                                                 max_pattern_length=300)

        # When
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # Then
        self.assertEqual(4, len(parser.regexes_per_intent["dummy_intent_1"]))
        self.assertEqual(1, len(parser.regexes_per_intent["dummy_intent_2"]))
    def test_should_be_serializable(self, mocked_generate_regexes):
        # Given

        # pylint: disable=unused-argument
        def mock_generate_regexes(utterances, joined_entity_utterances,
                                  group_names_to_slot_names, language):
            regexes = [
                re.compile(r"mocked_regex_%s" % i)
                for i in range(len(utterances))
            ]
            group_to_slot = {"group_0": "dummy slot name"}
            return regexes, group_to_slot

        # pylint: enable=unused-argument

        mocked_generate_regexes.side_effect = mock_generate_regexes
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_entities=100)
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # When
        actual_dict = parser.to_dict()

        # Then
        expected_dict = {
            "unit_name": "deterministic_intent_parser",
            "config": {
                "unit_name": "deterministic_intent_parser",
                "max_queries": 42,
                "max_entities": 100
            },
            "language_code": "en",
            "group_names_to_slot_names": {
                "group_0": "dummy slot name"
            },
            "patterns": {
                "dummy_intent_1": [
                    "mocked_regex_0", "mocked_regex_1", "mocked_regex_2",
                    "mocked_regex_3"
                ],
                "dummy_intent_2": ["mocked_regex_0"]
            },
            "slot_names_to_entities": {
                "dummy_slot_name": "dummy_entity_1",
                "dummy slot nàme": "dummy_entity_1",
                "dummy_slot_name3": "dummy_entity_2",
                "dummy_slot_name2": "dummy_entity_2"
            }
        }

        self.assertDictEqual(actual_dict, expected_dict)
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = BEVERAGE_DATASET
        intent_parser = DeterministicIntentParser().fit(dataset)

        # When
        intent_parser_bytes = intent_parser.to_byte_array()
        loaded_intent_parser = DeterministicIntentParser.from_byte_array(
            intent_parser_bytes)
        result = loaded_intent_parser.parse("make me two cups of coffee")

        # Then
        self.assertEqual("MakeCoffee", result[RES_INTENT][RES_INTENT_NAME])
    def test_should_be_deserializable(self):
        # Given
        parser_dict = {
            "config": {
                "max_queries": 42,
                "max_pattern_length": 43
            },
            "language_code": "en",
            "group_names_to_slot_names": {
                "hello_group": "hello_slot",
                "world_group": "world_slot"
            },
            "patterns": {
                "intent_name": [
                    "(?P<hello_group>hello?)",
                    "(?P<world_group>world$)"
                ]
            },
            "slot_names_to_entities": {
                "hello_slot": "hello_entity",
                "world_slot": "world_entity"
            }
        }

        # When
        parser = DeterministicIntentParser.from_dict(parser_dict)

        # Then
        patterns = {
            "intent_name": [
                "(?P<hello_group>hello?)",
                "(?P<world_group>world$)"
            ]
        }
        group_names_to_slot_names = {
            "hello_group": "hello_slot",
            "world_group": "world_slot"
        }
        slot_names_to_entities = {
            "hello_slot": "hello_entity",
            "world_slot": "world_entity"
        }
        config = DeterministicIntentParserConfig(max_queries=42,
                                                 max_pattern_length=43)
        expected_parser = DeterministicIntentParser(config=config)
        expected_parser.language = LANGUAGE_EN
        expected_parser.group_names_to_slot_names = group_names_to_slot_names
        expected_parser.slot_names_to_entities = slot_names_to_entities
        expected_parser.patterns = patterns

        self.assertEqual(parser.to_dict(), expected_parser.to_dict())
    def test_should_not_train_intents_too_big(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        config = DeterministicIntentParserConfig(max_queries=2,
                                                 max_entities=200)

        # When
        parser = DeterministicIntentParser(config=config).fit(dataset)

        # Then
        not_fitted_intent = "dummy_intent_1"
        fitted_intent = "dummy_intent_2"
        self.assertGreater(len(parser.regexes_per_intent[fitted_intent]), 0)
        self.assertListEqual(parser.regexes_per_intent[not_fitted_intent], [])
Пример #22
0
    def test_should_parse_naughty_strings(self):
        # Given
        dataset = validate_and_format_dataset(SAMPLE_DATASET)
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding='utf8') as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        # When
        parser = DeterministicIntentParser().fit(dataset)

        # Then
        for s in naughty_strings:
            with self.fail_if_exception("Exception raised"):
                parser.parse(s)
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)
  
---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = self.get_resources("en")
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config=parser_config,
                                           resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(intent_name="search",
                                                       probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11),
                            value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11),
                            value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Пример #24
0
    def test_should_fit_and_parse_with_non_ascii_tags(self):
        # Given
        inputs = ("string%s" % i for i in range(10))
        utterances = [{
            DATA: [{
                TEXT: string,
                ENTITY: "non_ascìi_entïty",
                SLOT_NAME: "non_ascìi_slöt"
            }]
        } for string in inputs]

        # When
        naughty_dataset = {
            "intents": {
                "naughty_intent": {
                    "utterances": utterances
                }
            },
            "entities": {
                "non_ascìi_entïty": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "matching_strictness": 1.0,
                    "data": []
                }
            },
            "language": "en",
        }

        naughty_dataset = validate_and_format_dataset(naughty_dataset)

        # Then
        with self.fail_if_exception("Exception raised"):
            parser = DeterministicIntentParser()
            parser.fit(naughty_dataset)
            parsing = parser.parse("string0")

            expected_slot = {
                'entity': 'non_ascìi_entïty',
                'range': {
                    "start": 0,
                    "end": 7
                },
                'slotName': u'non_ascìi_slöt',
                'value': u'string0'
            }
            intent_name = parsing[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("naughty_intent", intent_name)
            self.assertListEqual([expected_slot], parsing[RES_SLOTS])
    def test_should_get_no_slots_with_none_intent(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: greeting
utterances:
  - Hello [name](John)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = DeterministicIntentParser().fit(dataset)

        # When
        slots = parser.get_slots("Hello John", None)

        # Then
        self.assertListEqual([], slots)
Пример #26
0
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset = BEVERAGE_DATASET
        intent_parser = DeterministicIntentParser().fit(dataset)
        custom_entity_parser = intent_parser.custom_entity_parser

        # When
        intent_parser_bytes = intent_parser.to_byte_array()
        loaded_intent_parser = DeterministicIntentParser.from_byte_array(
            intent_parser_bytes,
            builtin_entity_parser=BuiltinEntityParser.build(language="en"),
            custom_entity_parser=custom_entity_parser)
        result = loaded_intent_parser.parse("make me two cups of coffee")

        # Then
        self.assertEqual("MakeCoffee", result[RES_INTENT][RES_INTENT_NAME])
Пример #27
0
    def test_should_get_intent(self):
        # Given
        dataset = validate_and_format_dataset(self.slots_dataset)

        parser = DeterministicIntentParser().fit(dataset)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Пример #28
0
    def test_should_get_intent_when_filter(self):
        # Given
        dataset = validate_and_format_dataset(
            self.duplicated_utterances_dataset)

        parser = DeterministicIntentParser().fit(dataset)
        text = "Hello world"
        intent_name_1 = "dummy_intent_1"
        intent_name_2 = "dummy_intent_2"

        # When
        res_1 = parser.parse(text, intent_name_1)
        res_2 = parser.parse(text, [intent_name_2])

        # Then
        self.assertEqual(intent_name_1, res_1[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(intent_name_2, res_2[RES_INTENT][RES_INTENT_NAME])
    def test_should_parse_intent_with_stop_words(self, mock_get_stop_words):
        # Given
        mock_get_stop_words.return_value = {"a", "hey"}
        dataset = self.slots_dataset
        config = DeterministicIntentParserConfig(ignore_stop_words=True)
        parser = DeterministicIntentParser(config).fit(dataset)
        text = "Hey this is dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
    def test_should_parse_intent_after_deserialization(self):
        # Given
        dataset = self.slots_dataset
        shared = self.get_shared_data(dataset)
        parser = DeterministicIntentParser(**shared).fit(dataset)
        parser.persist(self.tmp_file_path)
        deserialized_parser = DeterministicIntentParser.from_path(
            self.tmp_file_path, **shared)
        text = "this is a dummy_a query with another dummy_c at 10p.m. or " \
               "at 12p.m."

        # When
        parsing = deserialized_parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)
        self.assertEqual(expected_intent, parsing[RES_INTENT])