Exemplo n.º 1
0
    def test_training_should_be_reproducible(self):
        # Given
        random_state = 42
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a hot cup of tea
- make me five tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me one cup of coffee please
- brew two cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        engine1 = SnipsNLUEngine(random_state=random_state)
        engine1.fit(dataset)

        engine2 = SnipsNLUEngine(random_state=random_state)
        engine2.fit(dataset)

        # Then
        with temp_dir() as tmp_dir:
            dir_engine1 = tmp_dir / "engine1"
            dir_engine2 = tmp_dir / "engine2"
            engine1.persist(dir_engine1)
            engine2.persist(dir_engine2)
            hash1 = dirhash(str(dir_engine1), 'sha256')
            hash2 = dirhash(str(dir_engine2), 'sha256')
            self.assertEqual(hash1, hash2)
Exemplo n.º 2
0
    def test_should_persist_resources_from_memory(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        engine = SnipsNLUEngine(**shared).fit(dataset)
        dir_temp_engine = self.fixture_dir / "temp_engine"
        engine.persist(dir_temp_engine)

        # When
        loaded_engine = SnipsNLUEngine.from_path(dir_temp_engine)
        shutil.rmtree(str(dir_temp_engine))

        # Then
        loaded_engine.to_byte_array()
Exemplo n.º 3
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset = BEVERAGE_DATASET
        engine = SnipsNLUEngine().fit(dataset)
        input_ = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(input_)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], input_)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Exemplo n.º 4
0
    def test_should_serialize_duplicated_intent_parsers(self):
        # Given
        register_processing_unit(TestIntentParser1)
        parser1_config = TestIntentParser1Config()
        parser1bis_config = TestIntentParser1Config()

        parsers_configs = [parser1_config, parser1bis_config]
        config = NLUEngineConfig(parsers_configs)
        engine = SnipsNLUEngine(config).fit(BEVERAGE_DATASET)

        # When
        engine.persist(self.tmp_file_path)

        # Then
        expected_engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": {
                "language_code": "en",
                "entities": {
                    "Temperature": {
                        "automatically_extensible": True,
                        "utterances": {
                            "boiling": "hot",
                            "Boiling": "hot",
                            "cold": "cold",
                            "Cold": "cold",
                            "hot": "hot",
                            "Hot": "hot",
                            "iced": "cold",
                            "Iced": "cold"
                        }
                    }
                },
                "slot_name_mappings": {
                    "MakeCoffee": {
                        "number_of_cups": "snips/number"
                    },
                    "MakeTea": {
                        "beverage_temperature": "Temperature",
                        "number_of_cups": "snips/number"
                    }
                },
            },
            "config": config.to_dict(),
            "intent_parsers": [
                "test_intent_parser1",
                "test_intent_parser1_2"
            ],
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.assertJsonContent(self.tmp_file_path / "nlu_engine.json",
                               expected_engine_dict)
        self.assertJsonContent(
            self.tmp_file_path / "test_intent_parser1" / "metadata.json",
            {"unit_name": "test_intent_parser1"})
        self.assertJsonContent(
            self.tmp_file_path / "test_intent_parser1_2" / "metadata.json",
            {"unit_name": "test_intent_parser1"})
Exemplo n.º 5
0
    def test_should_raise_when_persisting_at_existing_path(self):
        # Given
        self.tmp_file_path.mkdir()

        # When
        engine = SnipsNLUEngine()

        # Then
        with self.assertRaises(PersistingError):
            engine.persist(self.tmp_file_path)
Exemplo n.º 6
0
    def test_should_be_deserializable_from_dir_when_empty(self):
        # Given
        engine = SnipsNLUEngine()
        engine.persist(self.tmp_file_path)

        # When
        engine = SnipsNLUEngine.from_path(self.tmp_file_path)

        # Then
        self.assertFalse(engine.fitted)
Exemplo n.º 7
0
    def test_parse_with_intents_filter(self):
        # Given / When
        dataset_stream = io.StringIO(u"""
---
type: intent
name: MakeTea
utterances:
  - make me a [beverage_temperature:Temperature](hot) cup of tea
  - make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: Make,Coffee
utterances:
  - brew [number_of_cups:snips/number](one) cup of coffee please
  - make me [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        nlu_engine = SnipsNLUEngine().fit(dataset)
        nlu_engine.persist(self.tmp_file_path)

        # When / Then
        output_target = io.StringIO()
        with self.fail_if_exception("Failed to parse using CLI script"):
            with redirect_stdout(output_target):
                parse(str(self.tmp_file_path), "Make me two cups of coffee",
                      False, 'MakeTea,"Make,Coffee"')
        output = output_target.getvalue()

        # Then
        expected_output = """{
  "input": "Make me two cups of coffee",
  "intent": {
    "intentName": "Make,Coffee",
    "probability": 1.0
  },
  "slots": [
    {
      "entity": "snips/number",
      "range": {
        "end": 11,
        "start": 8
      },
      "rawValue": "two",
      "slotName": "number_of_cups",
      "value": {
        "kind": "Number",
        "value": 2.0
      }
    }
  ]
}
"""
        self.assertEqual(expected_output, output)
Exemplo n.º 8
0
    def test_should_parse_after_deserialization_from_dir(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups
- i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls
- can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ?

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee
- can you prepare [number_of_cups] cup of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        shared = self.get_shared_data(dataset)
        engine = SnipsNLUEngine(**shared).fit(dataset)
        text = "Give me 3 cups of hot tea please"

        # When
        engine.persist(self.tmp_file_path)
        deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path)
        result = deserialized_engine.parse(text)

        # Then
        expected_slots = [
            resolved_slot({
                START: 8,
                END: 9
            }, "3", {
                "kind": "Number",
                "value": 3.0
            }, "snips/number", "number_of_cups"),
            custom_slot(
                unresolved_slot({
                    START: 18,
                    END: 21
                }, "hot", "Temperature", "beverage_temperature"))
        ]
        self.assertEqual(result[RES_INPUT], text)
        self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea")
        self.assertListEqual(result[RES_SLOTS], expected_slots)
Exemplo n.º 9
0
    def test_should_bypass_model_version_check_when_specified(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: Greeting
utterances:
- hello world""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        with patch("snips_nlu.nlu_engine.nlu_engine.__model_version__",
                   "0.1.0"):
            engine = SnipsNLUEngine().fit(dataset)
            engine.persist(self.tmp_file_path)

        # When / Then
        SnipsNLUEngine.from_path(self.tmp_file_path, bypass_version_check=True)
Exemplo n.º 10
0
    def test_should_be_serializable_into_dir_when_empty(self):
        # Given
        nlu_engine = SnipsNLUEngine()

        # When
        nlu_engine.persist(self.tmp_file_path)

        # Then
        expected_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": None,
            "config": None,
            "intent_parsers": [],
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.assertJsonContent(self.tmp_file_path / "nlu_engine.json",
                               expected_dict)
Exemplo n.º 11
0
    def test_should_serialize_duplicated_intent_parsers(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentParser.register("my_intent_parser", True)
        class MyIntentParser(MockIntentParser):
            pass

        # pylint:enable=unused-variable

        parsers_configs = ["my_intent_parser", "my_intent_parser"]
        config = NLUEngineConfig(parsers_configs)
        engine = SnipsNLUEngine(config).fit(dataset)

        # When
        engine.persist(self.tmp_file_path)

        # Then
        expected_engine_dict = {
            "unit_name": "nlu_engine",
            "dataset_metadata": {
                "language_code": "en",
                "entities": {
                    "Temperature": {
                        "automatically_extensible": True,
                    }
                },
                "slot_name_mappings": {
                    "MakeCoffee": {
                        "number_of_cups": "snips/number"
                    },
                    "MakeTea": {
                        "beverage_temperature": "Temperature",
                        "number_of_cups": "snips/number"
                    }
                },
            },
            "config": {
                "unit_name": "nlu_engine",
                "intent_parsers_configs": [
                    {
                        "unit_name": "my_intent_parser"
                    },
                    {
                        "unit_name": "my_intent_parser"
                    }
                ]
            },
            "intent_parsers": [
                "my_intent_parser",
                "my_intent_parser_2"
            ],
            "builtin_entity_parser": "builtin_entity_parser",
            "custom_entity_parser": "custom_entity_parser",
            "model_version": snips_nlu.__model_version__,
            "training_package_version": snips_nlu.__version__
        }
        self.assertJsonContent(self.tmp_file_path / "nlu_engine.json",
                               expected_engine_dict)
        self.assertJsonContent(
            self.tmp_file_path / "my_intent_parser" / "metadata.json",
            {"unit_name": "my_intent_parser", "fitted": True})
        self.assertJsonContent(
            self.tmp_file_path / "my_intent_parser_2" / "metadata.json",
            {"unit_name": "my_intent_parser", "fitted": True})