def test_should_parse_with_filter(self): dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[slot1:entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [slot2:entity2](baz) --- type: intent name: intent3 utterances: - foz for [slot3:entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 parser = ProbabilisticIntentParser(**shared) parser.fit(dataset) text = "foo bar baz" # When result = parser.parse(text, intents=["intent1", "intent3"]) # Then expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")] self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_slots, result[RES_SLOTS])
def test_should_not_retrain_intent_classifier_when_no_force_retrain(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json parser = ProbabilisticIntentParser() intent_classifier = LogRegIntentClassifier() intent_classifier.fit(dataset) parser.intent_classifier = intent_classifier # When / Then with patch("snips_nlu.intent_classifier.log_reg_classifier" ".LogRegIntentClassifier.fit") as mock_fit: parser.fit(dataset, force_retrain=False) mock_fit.assert_not_called()
def test_should_not_retrain_slot_filler_when_no_force_retrain(self): # Given parser = ProbabilisticIntentParser() slot_filler = CRFSlotFiller() slot_filler.fit(BEVERAGE_DATASET, "MakeCoffee") parser.slot_fillers["MakeCoffee"] = slot_filler # When / Then with patch("snips_nlu.slot_filler.crf_slot_filler.CRFSlotFiller.fit") \ as mock_fit: parser.fit(BEVERAGE_DATASET, force_retrain=False) self.assertEqual(1, mock_fit.call_count)
def test_should_not_retrain_intent_classifier_when_no_force_retrain(self): # Given parser = ProbabilisticIntentParser() intent_classifier = LogRegIntentClassifier() intent_classifier.fit(BEVERAGE_DATASET) parser.intent_classifier = intent_classifier # When / Then with patch("snips_nlu.intent_classifier.log_reg_classifier" ".LogRegIntentClassifier.fit") as mock_fit: parser.fit(BEVERAGE_DATASET, force_retrain=False) mock_fit.assert_not_called()
def test_should_be_serializable(self): # Given register_processing_unit(TestIntentClassifier) register_processing_unit(TestSlotFiller) parser_config = ProbabilisticIntentParserConfig( intent_classifier_config=TestIntentClassifierConfig(), slot_filler_config=TestSlotFillerConfig()) parser = ProbabilisticIntentParser(parser_config) parser.fit(validate_and_format_dataset(BEVERAGE_DATASET)) # When parser.persist(self.tmp_file_path) # Then expected_parser_config = { "unit_name": "probabilistic_intent_parser", "slot_filler_config": { "unit_name": "test_slot_filler" }, "intent_classifier_config": { "unit_name": "test_intent_classifier" } } expected_parser_dict = { "unit_name": "probabilistic_intent_parser", "config": expected_parser_config, "slot_fillers": [{ "intent": "MakeCoffee", "slot_filler_name": "slot_filler_0" }, { "intent": "MakeTea", "slot_filler_name": "slot_filler_1" }] } metadata = {"unit_name": "probabilistic_intent_parser"} metadata_slot_filler = {"unit_name": "test_slot_filler"} metadata_intent_classifier = {"unit_name": "test_intent_classifier"} self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata) self.assertJsonContent(self.tmp_file_path / "intent_parser.json", expected_parser_dict) self.assertJsonContent( self.tmp_file_path / "intent_classifier" / "metadata.json", metadata_intent_classifier) self.assertJsonContent( self.tmp_file_path / "slot_filler_0" / "metadata.json", metadata_slot_filler) self.assertJsonContent( self.tmp_file_path / "slot_filler_1" / "metadata.json", metadata_slot_filler)
def test_should_not_retrain_slot_filler_when_no_force_retrain(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # pylint:disable=unused-variable @IntentClassifier.register("my_intent_classifier", True) class MyIntentClassifier(MockIntentClassifier): pass @SlotFiller.register("my_slot_filler", True) class MySlotFiller(MockSlotFiller): fit_call_count = 0 def fit(self, dataset, intent): MySlotFiller.fit_call_count += 1 return super(MySlotFiller, self).fit(dataset, intent) # pylint:enable=unused-variable parser_config = ProbabilisticIntentParserConfig( intent_classifier_config="my_intent_classifier", slot_filler_config="my_slot_filler") parser = ProbabilisticIntentParser(parser_config) slot_filler = MySlotFiller(None) slot_filler.fit(dataset, "MakeCoffee") parser.slot_fillers["MakeCoffee"] = slot_filler # When / Then parser.fit(dataset, force_retrain=False) self.assertEqual(2, MySlotFiller.fit_call_count)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [entity2](baz) --- type: intent name: intent3 utterances: - foz for [entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json classifier_config = LogRegIntentClassifierConfig(random_seed=42) slot_filler_config = CRFSlotFillerConfig(random_seed=42) parser_config = ProbabilisticIntentParserConfig( classifier_config, slot_filler_config) parser = ProbabilisticIntentParser(parser_config) parser.fit(dataset) text = "foo bar baz" # When results = parser.parse(text, top_n=2) intents = [res[RES_INTENT][RES_INTENT_NAME] for res in results] entities = [[s[RES_VALUE] for s in res[RES_SLOTS]] for res in results] # Then expected_intents = ["intent2", "intent1"] expected_entities = [["baz"], ["foo"]] self.assertListEqual(expected_intents, intents) self.assertListEqual(expected_entities, entities)
def test_should_parse_top_intents(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [entity2](baz) --- type: intent name: intent3 utterances: - foz for [entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 parser = ProbabilisticIntentParser(**shared) parser.fit(dataset) text = "foo bar baz" # When results = parser.parse(text, top_n=2) intents = [res[RES_INTENT][RES_INTENT_NAME] for res in results] entities = [[s[RES_VALUE] for s in res[RES_SLOTS]] for res in results] # Then expected_intents = ["intent2", "intent1"] expected_entities = [["baz"], ["foo"]] self.assertListEqual(expected_intents, intents) self.assertListEqual(expected_entities, entities)
def test_should_parse_with_filter(self): dataset_stream = io.StringIO(""" --- type: intent name: intent1 utterances: - "[slot1:entity1](foo) bar" --- type: intent name: intent2 utterances: - foo bar [slot2:entity2](baz) --- type: intent name: intent3 utterances: - foz for [slot3:entity3](baz)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json classifier_config = LogRegIntentClassifierConfig(random_seed=42) slot_filler_config = CRFSlotFillerConfig(random_seed=42) parser_config = ProbabilisticIntentParserConfig( classifier_config, slot_filler_config) parser = ProbabilisticIntentParser(parser_config) parser.fit(dataset) text = "foo bar baz" # When result = parser.parse(text, intents=["intent1", "intent3"]) # Then expected_slots = [unresolved_slot((0, 3), "foo", "entity1", "slot1")] self.assertEqual("intent1", result[RES_INTENT][RES_INTENT_NAME]) self.assertEqual(expected_slots, result[RES_SLOTS])
def test_should_be_serializable(self): # Given class TestIntentClassifierConfig(ProcessingUnitConfig): unit_name = "test_intent_classifier" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestIntentClassifierConfig() class TestIntentClassifier(IntentClassifier): unit_name = "test_intent_classifier" config_type = TestIntentClassifierConfig def get_intent(self, text, intents_filter): return None def fit(self, dataset): return self def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): config = cls.config_type() return TestIntentClassifier(config) class TestSlotFillerConfig(ProcessingUnitConfig): unit_name = "test_slot_filler" def to_dict(self): return {"unit_name": self.unit_name} @classmethod def from_dict(cls, obj_dict): return TestSlotFillerConfig() class TestSlotFiller(SlotFiller): unit_name = "test_slot_filler" config_type = TestSlotFillerConfig def get_slots(self, text): return [] def fit(self, dataset, intent): return self def to_dict(self): return { "unit_name": self.unit_name, } @classmethod def from_dict(cls, unit_dict): config = cls.config_type() return TestSlotFiller(config) register_processing_unit(TestIntentClassifier) register_processing_unit(TestSlotFiller) parser_config = ProbabilisticIntentParserConfig( intent_classifier_config=TestIntentClassifierConfig(), slot_filler_config=TestSlotFillerConfig()) parser = ProbabilisticIntentParser(parser_config) parser.fit(validate_and_format_dataset(BEVERAGE_DATASET)) # When actual_parser_dict = parser.to_dict() # Then expected_parser_config = { "unit_name": "probabilistic_intent_parser", "slot_filler_config": { "unit_name": "test_slot_filler" }, "intent_classifier_config": { "unit_name": "test_intent_classifier" } } expected_parser_dict = { "unit_name": "probabilistic_intent_parser", "config": expected_parser_config, "intent_classifier": { "unit_name": "test_intent_classifier" }, "slot_fillers": { "MakeCoffee": { "unit_name": "test_slot_filler" }, "MakeTea": { "unit_name": "test_slot_filler" }, }, } self.assertDictEqual(actual_parser_dict, expected_parser_dict)