def test_training_should_be_reproducible(self): # Given random_state = 42 dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a hot cup of tea - make me five tea cups --- type: intent name: MakeCoffee utterances: - make me one cup of coffee please - brew two cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When engine1 = SnipsNLUEngine(random_state=random_state) engine1.fit(dataset) engine2 = SnipsNLUEngine(random_state=random_state) engine2.fit(dataset) # Then with temp_dir() as tmp_dir: dir_engine1 = tmp_dir / "engine1" dir_engine2 = tmp_dir / "engine2" engine1.persist(dir_engine1) engine2.persist(dir_engine2) hash1 = dirhash(str(dir_engine1), 'sha256') hash2 = dirhash(str(dir_engine2), 'sha256') self.assertEqual(hash1, hash2)
def test_should_persist_resources_from_memory(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) dir_temp_engine = self.fixture_dir / "temp_engine" engine.persist(dir_temp_engine) # When loaded_engine = SnipsNLUEngine.from_path(dir_temp_engine) shutil.rmtree(str(dir_temp_engine)) # Then loaded_engine.to_byte_array()
def test_should_parse_after_deserialization_from_dir(self): # Given dataset = BEVERAGE_DATASET engine = SnipsNLUEngine().fit(dataset) input_ = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(input_) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], input_) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_serialize_duplicated_intent_parsers(self): # Given register_processing_unit(TestIntentParser1) parser1_config = TestIntentParser1Config() parser1bis_config = TestIntentParser1Config() parsers_configs = [parser1_config, parser1bis_config] config = NLUEngineConfig(parsers_configs) engine = SnipsNLUEngine(config).fit(BEVERAGE_DATASET) # When engine.persist(self.tmp_file_path) # Then expected_engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, "utterances": { "boiling": "hot", "Boiling": "hot", "cold": "cold", "Cold": "cold", "hot": "hot", "Hot": "hot", "iced": "cold", "Iced": "cold" } } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, }, "config": config.to_dict(), "intent_parsers": [ "test_intent_parser1", "test_intent_parser1_2" ], "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.assertJsonContent(self.tmp_file_path / "nlu_engine.json", expected_engine_dict) self.assertJsonContent( self.tmp_file_path / "test_intent_parser1" / "metadata.json", {"unit_name": "test_intent_parser1"}) self.assertJsonContent( self.tmp_file_path / "test_intent_parser1_2" / "metadata.json", {"unit_name": "test_intent_parser1"})
def test_should_raise_when_persisting_at_existing_path(self): # Given self.tmp_file_path.mkdir() # When engine = SnipsNLUEngine() # Then with self.assertRaises(PersistingError): engine.persist(self.tmp_file_path)
def test_should_be_deserializable_from_dir_when_empty(self): # Given engine = SnipsNLUEngine() engine.persist(self.tmp_file_path) # When engine = SnipsNLUEngine.from_path(self.tmp_file_path) # Then self.assertFalse(engine.fitted)
def test_parse_with_intents_filter(self): # Given / When dataset_stream = io.StringIO(u""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: Make,Coffee utterances: - brew [number_of_cups:snips/number](one) cup of coffee please - make me [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json nlu_engine = SnipsNLUEngine().fit(dataset) nlu_engine.persist(self.tmp_file_path) # When / Then output_target = io.StringIO() with self.fail_if_exception("Failed to parse using CLI script"): with redirect_stdout(output_target): parse(str(self.tmp_file_path), "Make me two cups of coffee", False, 'MakeTea,"Make,Coffee"') output = output_target.getvalue() # Then expected_output = """{ "input": "Make me two cups of coffee", "intent": { "intentName": "Make,Coffee", "probability": 1.0 }, "slots": [ { "entity": "snips/number", "range": { "end": 11, "start": 8 }, "rawValue": "two", "slotName": "number_of_cups", "value": { "kind": "Number", "value": 2.0 } } ] } """ self.assertEqual(expected_output, output)
def test_should_parse_after_deserialization_from_dir(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(**shared).fit(dataset) text = "Give me 3 cups of hot tea please" # When engine.persist(self.tmp_file_path) deserialized_engine = SnipsNLUEngine.from_path(self.tmp_file_path) result = deserialized_engine.parse(text) # Then expected_slots = [ resolved_slot({ START: 8, END: 9 }, "3", { "kind": "Number", "value": 3.0 }, "snips/number", "number_of_cups"), custom_slot( unresolved_slot({ START: 18, END: 21 }, "hot", "Temperature", "beverage_temperature")) ] self.assertEqual(result[RES_INPUT], text) self.assertEqual(result[RES_INTENT][RES_INTENT_NAME], "MakeTea") self.assertListEqual(result[RES_SLOTS], expected_slots)
def test_should_bypass_model_version_check_when_specified(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: Greeting utterances: - hello world""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json with patch("snips_nlu.nlu_engine.nlu_engine.__model_version__", "0.1.0"): engine = SnipsNLUEngine().fit(dataset) engine.persist(self.tmp_file_path) # When / Then SnipsNLUEngine.from_path(self.tmp_file_path, bypass_version_check=True)
def test_should_be_serializable_into_dir_when_empty(self): # Given nlu_engine = SnipsNLUEngine() # When nlu_engine.persist(self.tmp_file_path) # Then expected_dict = { "unit_name": "nlu_engine", "dataset_metadata": None, "config": None, "intent_parsers": [], "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.assertJsonContent(self.tmp_file_path / "nlu_engine.json", expected_dict)
def test_should_serialize_duplicated_intent_parsers(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentParser.register("my_intent_parser", True) class MyIntentParser(MockIntentParser): pass # pylint:enable=unused-variable parsers_configs = ["my_intent_parser", "my_intent_parser"] config = NLUEngineConfig(parsers_configs) engine = SnipsNLUEngine(config).fit(dataset) # When engine.persist(self.tmp_file_path) # Then expected_engine_dict = { "unit_name": "nlu_engine", "dataset_metadata": { "language_code": "en", "entities": { "Temperature": { "automatically_extensible": True, } }, "slot_name_mappings": { "MakeCoffee": { "number_of_cups": "snips/number" }, "MakeTea": { "beverage_temperature": "Temperature", "number_of_cups": "snips/number" } }, }, "config": { "unit_name": "nlu_engine", "intent_parsers_configs": [ { "unit_name": "my_intent_parser" }, { "unit_name": "my_intent_parser" } ] }, "intent_parsers": [ "my_intent_parser", "my_intent_parser_2" ], "builtin_entity_parser": "builtin_entity_parser", "custom_entity_parser": "custom_entity_parser", "model_version": snips_nlu.__model_version__, "training_package_version": snips_nlu.__version__ } self.assertJsonContent(self.tmp_file_path / "nlu_engine.json", expected_engine_dict) self.assertJsonContent( self.tmp_file_path / "my_intent_parser" / "metadata.json", {"unit_name": "my_intent_parser", "fitted": True}) self.assertJsonContent( self.tmp_file_path / "my_intent_parser_2" / "metadata.json", {"unit_name": "my_intent_parser", "fitted": True})