def test_fitting_should_be_reproducible_after_serialization(self): # Given dataset = BEVERAGE_DATASET validated_dataset = validate_and_format_dataset(dataset) seed1 = 666 seed2 = 42 config = ProbabilisticIntentParserConfig( intent_classifier_config=LogRegIntentClassifierConfig( random_seed=seed1), slot_filler_config=CRFSlotFillerConfig(random_seed=seed2)) parser = ProbabilisticIntentParser(config) parser.persist(self.tmp_file_path) # When fitted_parser_1 = ProbabilisticIntentParser.from_path( self.tmp_file_path).fit(validated_dataset) fitted_parser_2 = ProbabilisticIntentParser.from_path( self.tmp_file_path).fit(validated_dataset) # Then feature_weights_1 = fitted_parser_1.slot_fillers[ "MakeTea"].crf_model.state_features_ feature_weights_2 = fitted_parser_2.slot_fillers[ "MakeTea"].crf_model.state_features_ self.assertEqual(feature_weights_1, feature_weights_2)
def test_fitting_should_be_reproducible_after_serialization(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json seed1 = 666 seed2 = 42 config = ProbabilisticIntentParserConfig( intent_classifier_config=LogRegIntentClassifierConfig( random_seed=seed1), slot_filler_config=CRFSlotFillerConfig(random_seed=seed2)) shared = self.get_shared_data(dataset) parser = ProbabilisticIntentParser(config, **shared) parser.persist(self.tmp_file_path) # When fitted_parser_1 = ProbabilisticIntentParser.from_path( self.tmp_file_path, **shared).fit(dataset) fitted_parser_2 = ProbabilisticIntentParser.from_path( self.tmp_file_path, **shared).fit(dataset) # Then feature_weights_1 = fitted_parser_1.slot_fillers[ "MakeTea"].crf_model.state_features_ feature_weights_2 = fitted_parser_2.slot_fillers[ "MakeTea"].crf_model.state_features_ self.assertEqual(feature_weights_1, feature_weights_2)
def test_should_be_deserializable(self): # When register_processing_unit(TestIntentClassifier) register_processing_unit(TestSlotFiller) config = ProbabilisticIntentParserConfig( intent_classifier_config=TestIntentClassifierConfig(), slot_filler_config=TestSlotFillerConfig() ) parser_dict = { "unit_name": "probabilistic_intent_parser", "slot_fillers": [ { "intent": "MakeCoffee", "slot_filler_name": "slot_filler_MakeCoffee" }, { "intent": "MakeTea", "slot_filler_name": "slot_filler_MakeTea" } ], "config": config.to_dict(), } self.tmp_file_path.mkdir() (self.tmp_file_path / "intent_classifier").mkdir() (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir() (self.tmp_file_path / "slot_filler_MakeTea").mkdir() self.writeJsonContent(self.tmp_file_path / "intent_parser.json", parser_dict) self.writeJsonContent( self.tmp_file_path / "intent_classifier" / "metadata.json", {"unit_name": "test_intent_classifier"}) self.writeJsonContent( self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json", {"unit_name": "test_slot_filler"}) self.writeJsonContent( self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json", {"unit_name": "test_slot_filler"}) # When parser = ProbabilisticIntentParser.from_path(self.tmp_file_path) # Then self.assertDictEqual(parser.config.to_dict(), config.to_dict()) self.assertIsNotNone(parser.intent_classifier) self.assertListEqual(sorted(parser.slot_fillers), ["MakeCoffee", "MakeTea"])
def test_should_be_deserializable_before_fitting(self): # When config = ProbabilisticIntentParserConfig().to_dict() parser_dict = { "unit_name": "probabilistic_intent_parser", "config": config, "intent_classifier": None, "slot_fillers": dict(), } self.tmp_file_path.mkdir() metadata = {"unit_name": "probabilistic_intent_parser"} self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata) self.writeJsonContent(self.tmp_file_path / "intent_parser.json", parser_dict) # When parser = ProbabilisticIntentParser.from_path(self.tmp_file_path) # Then self.assertEqual(parser.config.to_dict(), config) self.assertIsNone(parser.intent_classifier) self.assertDictEqual(dict(), parser.slot_fillers)
def test_should_be_deserializable(self): # When @IntentClassifier.register("my_intent_classifier", True) class MyIntentClassifier(MockIntentClassifier): pass @SlotFiller.register("my_slot_filler", True) class MySlotFiller(MockSlotFiller): pass parser_config = { "unit_name": "probabilistic_intent_parser", "intent_classifier_config": { "unit_name": "my_intent_classifier" }, "slot_filler_config": { "unit_name": "my_slot_filler" } } parser_dict = { "unit_name": "probabilistic_intent_parser", "slot_fillers": [{ "intent": "MakeCoffee", "slot_filler_name": "slot_filler_MakeCoffee" }, { "intent": "MakeTea", "slot_filler_name": "slot_filler_MakeTea" }], "config": parser_config, } self.tmp_file_path.mkdir() (self.tmp_file_path / "intent_classifier").mkdir() (self.tmp_file_path / "slot_filler_MakeCoffee").mkdir() (self.tmp_file_path / "slot_filler_MakeTea").mkdir() self.writeJsonContent(self.tmp_file_path / "intent_parser.json", parser_dict) self.writeJsonContent( self.tmp_file_path / "intent_classifier" / "metadata.json", { "unit_name": "my_intent_classifier", "fitted": True }) self.writeJsonContent( self.tmp_file_path / "slot_filler_MakeCoffee" / "metadata.json", { "unit_name": "my_slot_filler", "fitted": True }) self.writeJsonContent( self.tmp_file_path / "slot_filler_MakeTea" / "metadata.json", { "unit_name": "my_slot_filler", "fitted": True }) # When parser = ProbabilisticIntentParser.from_path(self.tmp_file_path) # Then self.assertDictEqual(parser.config.to_dict(), parser_config) self.assertIsInstance(parser.intent_classifier, MyIntentClassifier) self.assertListEqual(sorted(parser.slot_fillers), ["MakeCoffee", "MakeTea"]) for slot_filler in itervalues(parser.slot_fillers): self.assertIsInstance(slot_filler, MySlotFiller)