Пример #1
0
    def test_should_not_parse_when_not_fitted(self):
        # Given
        parser = LookupIntentParser()

        # When / Then
        self.assertFalse(parser.fitted)
        with self.assertRaises(NotTrained):
            parser.parse("foobar")
Пример #2
0
    def test_should_parse_intent(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="intent2", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Пример #3
0
    def test_should_ignore_very_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - "[event_type](meeting) tomorrow"

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)

---
type: entity
name: event_type
values:
  - call
  - diner""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        self.assertEqual(empty_result(text, 1.0), res)
Пример #4
0
    def test_should_parse_slightly_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent_1
utterances:
  - call tomorrow

---
type: intent
name: intent_2
utterances:
  - call [time:snips/datetime](today)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "call tomorrow"

        # When
        res = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(
            intent_name="intent_1", probability=2. / 3.)
        expected_result = parsing_result(text, expected_intent, [])
        self.assertEqual(expected_result, res)
Пример #5
0
    def test_should_parse_intent_with_ambivalent_words(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: give_flower
utterances:
  - give a rose to [name](emily)
  - give a daisy to [name](tom)
  - give a tulip to [name](daisy)
  """)
        dataset = Dataset.from_yaml_files("en",
                                          [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "give a daisy to emily"

        # When
        parsing = parser.parse(text)

        # Then
        expected_intent = intent_classification_result(
            intent_name="give_flower", probability=1.0)
        expected_slots = [
            {
                "entity": "name",
                "range": {"end": 21, "start": 16},
                "slotName": "name",
                "value": "emily"
            }
        ]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Пример #6
0
    def test_should_parse_naughty_strings(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: my_intent
utterances:
- this is [slot1:entity1](my first entity)
- this is [slot2:entity2](second_entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        naughty_strings_path = TEST_PATH / "resources" / "naughty_strings.txt"
        with naughty_strings_path.open(encoding="utf8") as f:
            naughty_strings = [line.strip("\n") for line in f.readlines()]

        # When
        parser = LookupIntentParser().fit(dataset)

        # Then
        for s in naughty_strings:
            with self.fail_if_exception("Exception raised"):
                parser.parse(s)
Пример #7
0
    def test_should_parse_stop_words_slots(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: search
utterances:
  - search
  - search [search_object](this)
  - search [search_object](a cat)

---
type: entity
name: search_object
values:
  - [this thing, that]
  """)

        resources = deepcopy(self.get_resources("en"))
        resources[STOP_WORDS] = {"a", "this", "that"}
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser_config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config=parser_config, resources=resources)
        parser.fit(dataset)

        # When
        res_1 = parser.parse("search this")
        res_2 = parser.parse("search that")

        # Then
        expected_intent = intent_classification_result(
            intent_name="search", probability=1.0)
        expected_slots_1 = [
            unresolved_slot(match_range=(7, 11), value="this",
                            entity="search_object",
                            slot_name="search_object")
        ]
        expected_slots_2 = [
            unresolved_slot(match_range=(7, 11), value="that",
                            entity="search_object",
                            slot_name="search_object")
        ]
        self.assertEqual(expected_intent, res_1[RES_INTENT])
        self.assertEqual(expected_intent, res_2[RES_INTENT])
        self.assertListEqual(expected_slots_1, res_1[RES_SLOTS])
        self.assertListEqual(expected_slots_2, res_2[RES_SLOTS])
Пример #8
0
    def test_should_parse_intent_with_stop_words(self, mock_get_stop_words):
        # Given
        mock_get_stop_words.return_value = {"a", "hey"}
        dataset = self.slots_dataset
        config = LookupIntentParserConfig(ignore_stop_words=True)
        parser = LookupIntentParser(config).fit(dataset)
        text = "Hey this is dummy_a query with another dummy_c at 10p.m. " \
               "or at 12p.m."

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="dummy_intent_1", probability=probability)

        self.assertEqual(expected_intent, parsing[RES_INTENT])
Пример #9
0
    def test_should_fit_and_parse_with_non_ascii_tags(self):
        # Given
        inputs = ["string%s" % i for i in range(10)]
        utterances = [
            {
                DATA: [
                    {
                        TEXT: string,
                        ENTITY: "non_ascìi_entïty",
                        SLOT_NAME: "non_ascìi_slöt",
                    }
                ]
            }
            for string in inputs
        ]

        # When
        naughty_dataset = {
            "intents": {"naughty_intent": {"utterances": utterances}},
            "entities": {
                "non_ascìi_entïty": {
                    "use_synonyms": False,
                    "automatically_extensible": True,
                    "matching_strictness": 1.0,
                    "data": [],
                }
            },
            "language": "en",
        }

        # Then
        with self.fail_if_exception("Exception raised"):
            parser = LookupIntentParser().fit(naughty_dataset)
            parsing = parser.parse("string0")

            expected_slot = {
                "entity": "non_ascìi_entïty",
                "range": {"start": 0, "end": 7},
                "slotName": "non_ascìi_slöt",
                "value": "string0",
            }
            intent_name = parsing[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("naughty_intent", intent_name)
            self.assertListEqual([expected_slot], parsing[RES_SLOTS])
Пример #10
0
    def test_should_parse_intent_with_duplicated_slot_names(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: math_operation
slots:
  - name: number
    entity: snips/number
utterances:
  - what is [number](one) plus [number](one)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "what is one plus one"

        # When
        parsing = parser.parse(text)

        # Then
        probability = 1.0
        expected_intent = intent_classification_result(
            intent_name="math_operation", probability=probability)
        expected_slots = [
            {
                "entity": "snips/number",
                "range": {"end": 11, "start": 8},
                "slotName": "number",
                "value": "one"
            },
            {
                "entity": "snips/number",
                "range": {"end": 20, "start": 17},
                "slotName": "number",
                "value": "one"
            }
        ]

        self.assertDictEqual(expected_intent, parsing[RES_INTENT])
        self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Пример #11
0
    def test_should_parse_intent_with_filter(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: intent1
utterances:
  - foo bar baz

---
type: intent
name: intent2
utterances:
  - foo bar ban""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "foo bar ban"

        # When
        parsing = parser.parse(text, intents=["intent1"])

        # Then
        self.assertEqual(empty_result(text, 1.0), parsing)
Пример #12
0
    def test_should_ignore_completely_ambiguous_utterances(self):
        # Given
        dataset_stream = io.StringIO(
            """
---
type: intent
name: dummy_intent_1
utterances:
  - Hello world

---
type: intent
name: dummy_intent_2
utterances:
  - Hello world""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "Hello world"

        # When
        res = parser.parse(text)

        # Then
        self.assertEqual(empty_result(text, 1.0), res)
Пример #13
0
    def test_should_parse_slots(self):
        # Given
        dataset = self.slots_dataset
        parser = LookupIntentParser().fit(dataset)
        texts = [
            (
                "this is a dummy a query with another dummy_c at 10p.m. or at"
                " 12p.m.",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(37, 44),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(45, 54),
                        value="at 10p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(58, 67),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this, is,, a, dummy a query with another dummy_c at 10pm or "
                "at 12p.m.",
                [
                    unresolved_slot(
                        match_range=(14, 21),
                        value="dummy a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                    unresolved_slot(
                        match_range=(41, 48),
                        value="dummy_c",
                        entity="dummy_entity_2",
                        slot_name="dummy_slot_name2",
                    ),
                    unresolved_slot(
                        match_range=(49, 56),
                        value="at 10pm",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(60, 69),
                        value="at 12p.m.",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                ],
            ),
            (
                "this is a dummy b",
                [
                    unresolved_slot(
                        match_range=(10, 17),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
            (
                " this is a dummy b ",
                [
                    unresolved_slot(
                        match_range=(11, 18),
                        value="dummy b",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    )
                ],
            ),
            (
                " at 8am ’ there is a dummy  a",
                [
                    unresolved_slot(
                        match_range=(1, 7),
                        value="at 8am",
                        entity="snips/datetime",
                        slot_name="startTime",
                    ),
                    unresolved_slot(
                        match_range=(21, 29),
                        value="dummy  a",
                        entity="dummy_entity_1",
                        slot_name="dummy_slot_name",
                    ),
                ],
            ),
        ]

        for text, expected_slots in texts:
            # When
            parsing = parser.parse(text)

            # Then
            self.assertListEqual(expected_slots, parsing[RES_SLOTS])
Пример #14
0
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - meeting [time:snips/datetime](today)

---
type: intent
name: intent2
utterances:
  - meeting tomorrow

---
type: intent
name: intent3
utterances:
  - "[event_type](call) [time:snips/datetime](at 9pm)"

---
type: entity
name: event_type
values:
  - meeting
  - feedback session""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        parser = LookupIntentParser().fit(dataset)
        text = "meeting tomorrow"

        # When
        results = parser.parse(text, top_n=3)

        # Then
        time_slot = {
            "entity": "snips/datetime",
            "range": {"end": 16, "start": 8},
            "slotName": "time",
            "value": "tomorrow"
        }
        event_slot = {
            "entity": "event_type",
            "range": {"end": 7, "start": 0},
            "slotName": "event_type",
            "value": "meeting"
        }
        weight_intent_1 = 1. / 2.
        weight_intent_2 = 1.
        weight_intent_3 = 1. / 3.
        total_weight = weight_intent_1 + weight_intent_2 + weight_intent_3
        proba_intent2 = weight_intent_2 / total_weight
        proba_intent1 = weight_intent_1 / total_weight
        proba_intent3 = weight_intent_3 / total_weight
        expected_results = [
            extraction_result(
                intent_classification_result(
                    intent_name="intent2", probability=proba_intent2),
                slots=[]),
            extraction_result(
                intent_classification_result(
                    intent_name="intent1", probability=proba_intent1),
                slots=[time_slot]),
            extraction_result(
                intent_classification_result(
                    intent_name="intent3", probability=proba_intent3),
                slots=[event_slot, time_slot])
        ]
        self.assertEqual(expected_results, results)