Exemplo n.º 1
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        validated_dataset = validate_and_format_dataset(dataset)

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        parser = ProbabilisticIntentParser(config)
        parser_dict = parser.to_dict()

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_dict(parser_dict).fit(
            validated_dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_dict(parser_dict).fit(
            validated_dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Exemplo n.º 2
0
    def test_should_get_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - yala yili

---
type: intent
name: intent2
utterances:
  - yala yili yulu

---
type: intent
name: intent3
utterances:
  - yili yulu yele""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(classifier_config)
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)
        text = "yala yili yulu"

        # When
        results = parser.get_intents(text)
        intents = [res[RES_INTENT_NAME] for res in results]

        # Then
        expected_intents = ["intent2", "intent1", "intent3", None]

        self.assertEqual(expected_intents, intents)
    def test_get_slots_should_raise_with_unknown_intent(self):
        # Given
        slots_dataset_stream = io.StringIO("""
---
type: intent
name: greeting1
utterances:
  - Hello [name1](John)

---
type: intent
name: goodbye
utterances:
  - Goodbye [name](Eric)""")
        dataset = Dataset.from_yaml_files("en", [slots_dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(config).fit(dataset)

        # When / Then
        with self.assertRaises(IntentNotFoundError):
            parser.get_slots("Hello John", "greeting3")
Exemplo n.º 4
0
    def test_should_be_deserializable(self):
        # When
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig()
        )
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "slot_fillers": [
                {
                    "intent": "MakeCoffee",
                    "slot_filler_name": "slot_filler_MakeCoffee"
                },
                {
                    "intent": "MakeTea",
                    "slot_filler_name": "slot_filler_MakeTea"
                }
            ],
            "config": config.to_dict(),
        }
        self.tmp_file_path.mkdir()
        (self.tmp_file_path / "intent_classifier").mkdir()
        (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir()
        (self.tmp_file_path / "slot_filler_MakeTea").mkdir()
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)
        self.writeJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            {"unit_name": "test_intent_classifier"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json",
            {"unit_name": "test_slot_filler"})
        self.writeJsonContent(
            self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json",
            {"unit_name": "test_slot_filler"})

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertDictEqual(parser.config.to_dict(), config.to_dict())
        self.assertIsNotNone(parser.intent_classifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
Exemplo n.º 5
0
    def test_should_be_serializable(self):
        # Given
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(validate_and_format_dataset(BEVERAGE_DATASET))

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "test_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "test_intent_classifier"
            }
        }
        expected_parser_dict = {
            "unit_name":
            "probabilistic_intent_parser",
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {"unit_name": "probabilistic_intent_parser"}
        metadata_slot_filler = {"unit_name": "test_slot_filler"}
        metadata_intent_classifier = {"unit_name": "test_intent_classifier"}

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)
Exemplo n.º 6
0
    def __init__(self, intent_parsers_configs=None):

        if intent_parsers_configs is None:
            from snips_nlu.pipeline.configs import (
                ProbabilisticIntentParserConfig,
                DeterministicIntentParserConfig)
            intent_parsers_configs = [
                DeterministicIntentParserConfig(),
                ProbabilisticIntentParserConfig()
            ]
        self.intent_parsers_configs = list(
            map(get_processing_unit_config, intent_parsers_configs))
Exemplo n.º 7
0
    def __init__(self, intent_parsers_configs=None):
        from snips_nlu.intent_parser import IntentParser

        if intent_parsers_configs is None:
            from snips_nlu.pipeline.configs import (
                ProbabilisticIntentParserConfig,
                DeterministicIntentParserConfig)
            intent_parsers_configs = [
                DeterministicIntentParserConfig(),
                ProbabilisticIntentParserConfig()
            ]
        self.intent_parsers_configs = [
            IntentParser.get_config(conf) for conf in intent_parsers_configs
        ]
Exemplo n.º 8
0
    def test_probabilistic_intent_parser_config(self):
        # Given
        config_dict = {
            "unit_name": "probabilistic_intent_parser",
            "intent_classifier_config":
            LogRegIntentClassifierConfig().to_dict(),
            "slot_filler_config": CRFSlotFillerConfig().to_dict(),
        }

        # When
        config = ProbabilisticIntentParserConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
Exemplo n.º 9
0
    def test_nlu_config_from_dict(self):
        # Given
        config_dict = {
            "unit_name": "nlu_engine",
            "intent_parsers_configs": [
                DeterministicIntentParserConfig().to_dict(),
                ProbabilisticIntentParserConfig().to_dict()
            ]
        }

        # When
        config = NLUEngineConfig.from_dict(config_dict)
        serialized_config = config.to_dict()

        # Then
        self.assertDictEqual(config_dict, serialized_config)
    def test_should_be_serializable_into_bytearray(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            def get_intent(self, text, intents_filter):
                if "tea" in text:
                    return intent_classification_result("MakeTea", 1.0)
                elif "coffee" in text:
                    return intent_classification_result("MakeCoffee", 1.0)
                return intent_classification_result(None, 1.0)

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)

        # When
        intent_parser_bytes = parser.to_byte_array()
        loaded_intent_parser = ProbabilisticIntentParser.from_byte_array(
            intent_parser_bytes)
        result = loaded_intent_parser.parse("make me two cups of tea")

        # Then
        self.assertEqual("MakeTea", result[RES_INTENT][RES_INTENT_NAME])
    def test_engine_with_keyword_slot_filler_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: SetLightColor
utterances:
- set the light to [color](blue) in the [room](kitchen)
- please make the lights [color](red) in the [room](bathroom)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "SetLightColor"
        slot_filler_config = {
            "unit_name": "keyword_slot_filler",
            "lowercase": True
        }
        parser_config = ProbabilisticIntentParserConfig(
            slot_filler_config=slot_filler_config)
        engine_config = NLUEngineConfig([parser_config])
        engine = SnipsNLUEngine(engine_config).fit(dataset, intent)
        engine.persist(self.tmp_file_path)
        text = "I want Red lights in the kitchen now"

        # When
        loaded_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        res = loaded_engine.parse(text)

        # Then
        expected_slots = [
            custom_slot(
                unresolved_slot(match_range={
                    START: 7,
                    END: 10
                },
                                value="Red",
                                entity="color",
                                slot_name="color"), "red"),
            custom_slot(
                unresolved_slot(match_range={
                    START: 25,
                    END: 32
                },
                                value="kitchen",
                                entity="room",
                                slot_name="room"))
        ]
        self.assertListEqual(expected_slots, res["slots"])
Exemplo n.º 12
0
    def test_should_be_deserializable_before_fitting(self):
        # When
        config = ProbabilisticIntentParserConfig().to_dict()
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": config,
            "intent_classifier": None,
            "slot_fillers": dict(),
        }

        # When
        parser = ProbabilisticIntentParser.from_dict(parser_dict)

        # Then
        self.assertEqual(parser.config.to_dict(), config)
        self.assertIsNone(parser.intent_classifier)
        self.assertDictEqual(dict(), parser.slot_fillers)
    def test_should_not_retrain_slot_filler_when_no_force_retrain(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            fit_call_count = 0

            def fit(self, dataset, intent):
                MySlotFiller.fit_call_count += 1
                return super(MySlotFiller, self).fit(dataset, intent)

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config)
        slot_filler = MySlotFiller(None)
        slot_filler.fit(dataset, "MakeCoffee")
        parser.slot_fillers["MakeCoffee"] = slot_filler

        # When / Then
        parser.fit(dataset, force_retrain=False)
        self.assertEqual(2, MySlotFiller.fit_call_count)
Exemplo n.º 14
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        shared = self.get_shared_data(dataset)
        parser = ProbabilisticIntentParser(config, **shared)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Exemplo n.º 15
0
    def test_should_parse_top_intents(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        slot_filler_config = CRFSlotFillerConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(
            classifier_config, slot_filler_config)
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        results = parser.parse(text, top_n=2)
        intents = [res[RES_INTENT][RES_INTENT_NAME] for res in results]
        entities = [[s[RES_VALUE] for s in res[RES_SLOTS]] for res in results]

        # Then
        expected_intents = ["intent2", "intent1"]
        expected_entities = [["baz"], ["foo"]]

        self.assertListEqual(expected_intents, intents)
        self.assertListEqual(expected_entities, entities)
Exemplo n.º 16
0
    def test_should_be_deserializable_before_fitting(self):
        # When
        config = ProbabilisticIntentParserConfig().to_dict()
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": config,
            "intent_classifier": None,
            "slot_fillers": dict(),
        }
        self.tmp_file_path.mkdir()
        metadata = {"unit_name": "probabilistic_intent_parser"}
        self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.writeJsonContent(self.tmp_file_path / "intent_parser.json",
                              parser_dict)

        # When
        parser = ProbabilisticIntentParser.from_path(self.tmp_file_path)

        # Then
        self.assertEqual(parser.config.to_dict(), config)
        self.assertIsNone(parser.intent_classifier)
        self.assertDictEqual(dict(), parser.slot_fillers)
Exemplo n.º 17
0
    def test_should_parse_with_filter(self):
        dataset_stream = io.StringIO("""
---
type: intent
name: intent1
utterances:
  - "[slot1:entity1](foo) bar"

---
type: intent
name: intent2
utterances:
  - foo bar [slot2:entity2](baz)

---
type: intent
name: intent3
utterances:
  - foz for [slot3:entity3](baz)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        classifier_config = LogRegIntentClassifierConfig(random_seed=42)
        slot_filler_config = CRFSlotFillerConfig(random_seed=42)
        parser_config = ProbabilisticIntentParserConfig(
            classifier_config, slot_filler_config)
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(dataset)
        text = "foo bar baz"

        # When
        result = parser.parse(text, intents=["intent1", "intent3"])

        # Then
        expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")]

        self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME])
        self.assertEqual(expected_slots, result[RES_SLOTS])
Exemplo n.º 18
0
    def test_synonyms_should_point_to_base_value(self, mocked_proba_parse):
        # Given
        dataset = {
            "intents": {
                "dummy_intent_1": {
                    "utterances": [{
                        "data": [{
                            "text": "dummy_1",
                            "entity": "dummy_entity_1",
                            "slot_name": "dummy_slot_name"
                        }]
                    }]
                }
            },
            "entities": {
                "dummy_entity_1": {
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False,
                    "data": [{
                        "value": "dummy1",
                        "synonyms": ["dummy1", "dummy1_bis"]
                    }]
                }
            },
            "language": "en"
        }

        text = "dummy1_bis"
        mocked_proba_parser_intent = intent_classification_result(
            "dummy_intent_1", 1.0)
        mocked_proba_parser_slots = [
            unresolved_slot(match_range=(0, 10),
                            value="dummy1_bis",
                            entity="dummy_entity_1",
                            slot_name="dummy_slot_name")
        ]

        mocked_proba_parse.return_value = parsing_result(
            text, mocked_proba_parser_intent, mocked_proba_parser_slots)

        config = NLUEngineConfig([ProbabilisticIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result = engine.parse(text)

        # Then
        expected_slot = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 10
            },
            RES_RAW_VALUE: "dummy1_bis",
            RES_VALUE: {
                "kind": "Custom",
                "value": "dummy1"
            },
            RES_ENTITY: "dummy_entity_1",
            RES_SLOT_NAME: "dummy_slot_name"
        }
        expected_result = parsing_result(text,
                                         intent=mocked_proba_parser_intent,
                                         slots=[expected_slot])
        self.assertEqual(expected_result, result)
Exemplo n.º 19
0
    def test_should_be_deserializable(self):
        # When
        class TestIntentClassifierConfig(ProcessingUnitConfig):
            unit_name = "test_intent_classifier"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentClassifierConfig()

        class TestIntentClassifier(IntentClassifier):
            unit_name = "test_intent_classifier"
            config_type = TestIntentClassifierConfig

            def get_intent(self, text, intents_filter):
                return None

            def fit(self, dataset):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestIntentClassifier(conf)

        class TestSlotFillerConfig(ProcessingUnitConfig):
            unit_name = "test_slot_filler"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestSlotFillerConfig()

        class TestSlotFiller(SlotFiller):
            unit_name = "test_slot_filler"
            config_type = TestSlotFillerConfig

            def get_slots(self, text):
                return []

            def fit(self, dataset, intent):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                conf = cls.config_type()
                return TestSlotFiller(conf)

        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "intent_classifier": {
                "unit_name": "test_intent_classifier"
            },
            "slot_fillers": {
                "MakeCoffee": {
                    "unit_name": "test_slot_filler"
                },
                "MakeTea": {
                    "unit_name": "test_slot_filler"
                }
            },
            "config": config.to_dict(),
        }

        # When
        parser = ProbabilisticIntentParser.from_dict(parser_dict)

        # Then
        self.assertDictEqual(parser.config.to_dict(), config.to_dict())
        self.assertIsNotNone(parser.intent_classifier)
        self.assertListEqual(sorted(parser.slot_fillers),
                             ["MakeCoffee", "MakeTea"])
Exemplo n.º 20
0
    def test_should_be_serializable(self):
        # Given
        class TestIntentClassifierConfig(ProcessingUnitConfig):
            unit_name = "test_intent_classifier"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestIntentClassifierConfig()

        class TestIntentClassifier(IntentClassifier):
            unit_name = "test_intent_classifier"
            config_type = TestIntentClassifierConfig

            def get_intent(self, text, intents_filter):
                return None

            def fit(self, dataset):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestIntentClassifier(config)

        class TestSlotFillerConfig(ProcessingUnitConfig):
            unit_name = "test_slot_filler"

            def to_dict(self):
                return {"unit_name": self.unit_name}

            @classmethod
            def from_dict(cls, obj_dict):
                return TestSlotFillerConfig()

        class TestSlotFiller(SlotFiller):
            unit_name = "test_slot_filler"
            config_type = TestSlotFillerConfig

            def get_slots(self, text):
                return []

            def fit(self, dataset, intent):
                return self

            def to_dict(self):
                return {
                    "unit_name": self.unit_name,
                }

            @classmethod
            def from_dict(cls, unit_dict):
                config = cls.config_type()
                return TestSlotFiller(config)

        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(validate_and_format_dataset(BEVERAGE_DATASET))

        # When
        actual_parser_dict = parser.to_dict()

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "test_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "test_intent_classifier"
            }
        }
        expected_parser_dict = {
            "unit_name": "probabilistic_intent_parser",
            "config": expected_parser_config,
            "intent_classifier": {
                "unit_name": "test_intent_classifier"
            },
            "slot_fillers": {
                "MakeCoffee": {
                    "unit_name": "test_slot_filler"
                },
                "MakeTea": {
                    "unit_name": "test_slot_filler"
                },
            },
        }
        self.assertDictEqual(actual_parser_dict, expected_parser_dict)
Exemplo n.º 21
0
    def test_synonyms_should_not_collide_when_remapped_to_base_value(
            self, mocked_proba_parse):
        # Given
        # Given
        dataset = {
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text": "value",
                            "entity": "entity1",
                            "slot_name": "slot1"
                        }]
                    }]
                }
            },
            "entities": {
                "entity1": {
                    "data": [{
                        "value": "a",
                        "synonyms": ["favorïte"]
                    }, {
                        "value": "b",
                        "synonyms": ["favorite"]
                    }],
                    "use_synonyms":
                    True,
                    "automatically_extensible":
                    False
                }
            },
            "language": "en",
        }

        mocked_proba_parser_intent = intent_classification_result(
            "intent1", 1.0)

        def mock_proba_parse(text, intents):
            slots = [
                unresolved_slot(match_range=(0, len(text)),
                                value=text,
                                entity="entity1",
                                slot_name="slot1")
            ]
            return parsing_result(text, mocked_proba_parser_intent, slots)

        mocked_proba_parse.side_effect = mock_proba_parse

        config = NLUEngineConfig([ProbabilisticIntentParserConfig()])
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        result1 = engine.parse("favorite")
        result2 = engine.parse("favorïte")

        # Then
        expected_slot1 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorite",
            RES_VALUE: {
                "kind": "Custom",
                "value": "b"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_slot2 = {
            RES_MATCH_RANGE: {
                "start": 0,
                "end": 8
            },
            RES_RAW_VALUE: "favorïte",
            RES_VALUE: {
                "kind": "Custom",
                "value": "a"
            },
            RES_ENTITY: "entity1",
            RES_SLOT_NAME: "slot1"
        }
        expected_result1 = parsing_result("favorite",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot1])
        expected_result2 = parsing_result("favorïte",
                                          intent=mocked_proba_parser_intent,
                                          slots=[expected_slot2])
        self.assertEqual(expected_result1, result1)
        self.assertEqual(expected_result2, result2)
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "my_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "my_intent_classifier"
            }
        }
        expected_parser_dict = {
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {
            "unit_name": "probabilistic_intent_parser",
        }
        metadata_slot_filler = {"unit_name": "my_slot_filler", "fitted": True}
        metadata_intent_classifier = {
            "unit_name": "my_intent_classifier",
            "fitted": True
        }

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)