Exemplo n.º 1
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        validated_dataset = validate_and_format_dataset(dataset)

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        parser = ProbabilisticIntentParser(config)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path).fit(validated_dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path).fit(validated_dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Exemplo n.º 2
0
    def test_should_be_serializable(self):
        # Given
        register_processing_unit(TestIntentClassifier)
        register_processing_unit(TestSlotFiller)

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config=TestIntentClassifierConfig(),
            slot_filler_config=TestSlotFillerConfig())
        parser = ProbabilisticIntentParser(parser_config)
        parser.fit(validate_and_format_dataset(BEVERAGE_DATASET))

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "test_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "test_intent_classifier"
            }
        }
        expected_parser_dict = {
            "unit_name":
            "probabilistic_intent_parser",
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {"unit_name": "probabilistic_intent_parser"}
        metadata_slot_filler = {"unit_name": "test_slot_filler"}
        metadata_intent_classifier = {"unit_name": "test_intent_classifier"}

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)
Exemplo n.º 3
0
    def test_fitting_should_be_reproducible_after_serialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        seed1 = 666
        seed2 = 42
        config = ProbabilisticIntentParserConfig(
            intent_classifier_config=LogRegIntentClassifierConfig(
                random_seed=seed1),
            slot_filler_config=CRFSlotFillerConfig(random_seed=seed2))
        shared = self.get_shared_data(dataset)
        parser = ProbabilisticIntentParser(config, **shared)
        parser.persist(self.tmp_file_path)

        # When
        fitted_parser_1 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        fitted_parser_2 = ProbabilisticIntentParser.from_path(
            self.tmp_file_path, **shared).fit(dataset)

        # Then
        feature_weights_1 = fitted_parser_1.slot_fillers[
            "MakeTea"].crf_model.state_features_
        feature_weights_2 = fitted_parser_2.slot_fillers[
            "MakeTea"].crf_model.state_features_
        self.assertEqual(feature_weights_1, feature_weights_2)
Exemplo n.º 4
0
    def test_should_be_serializable_before_fitting(self):
        # Given
        parser = ProbabilisticIntentParser()

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_dict = {
            "config": {
                "unit_name": "probabilistic_intent_parser",
                "slot_filler_config": CRFSlotFillerConfig().to_dict(),
                "intent_classifier_config":
                    LogRegIntentClassifierConfig().to_dict()
            },
            "slot_fillers": []
        }
        metadata = {"unit_name": "probabilistic_intent_parser"}
        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups

---
type: intent
name: MakeCoffee
utterances:
- make me [number_of_cups:snips/number](one) cup of coffee please
- brew [number_of_cups] cups of coffee""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # pylint:disable=unused-variable
        @IntentClassifier.register("my_intent_classifier", True)
        class MyIntentClassifier(MockIntentClassifier):
            pass

        @SlotFiller.register("my_slot_filler", True)
        class MySlotFiller(MockSlotFiller):
            pass

        # pylint:enable=unused-variable

        parser_config = ProbabilisticIntentParserConfig(
            intent_classifier_config="my_intent_classifier",
            slot_filler_config="my_slot_filler")
        parser = ProbabilisticIntentParser(parser_config).fit(dataset)

        # When
        parser.persist(self.tmp_file_path)

        # Then
        expected_parser_config = {
            "unit_name": "probabilistic_intent_parser",
            "slot_filler_config": {
                "unit_name": "my_slot_filler"
            },
            "intent_classifier_config": {
                "unit_name": "my_intent_classifier"
            }
        }
        expected_parser_dict = {
            "config":
            expected_parser_config,
            "slot_fillers": [{
                "intent": "MakeCoffee",
                "slot_filler_name": "slot_filler_0"
            }, {
                "intent": "MakeTea",
                "slot_filler_name": "slot_filler_1"
            }]
        }
        metadata = {
            "unit_name": "probabilistic_intent_parser",
        }
        metadata_slot_filler = {"unit_name": "my_slot_filler", "fitted": True}
        metadata_intent_classifier = {
            "unit_name": "my_intent_classifier",
            "fitted": True
        }

        self.assertJsonContent(self.tmp_file_path / "metadata.json", metadata)
        self.assertJsonContent(self.tmp_file_path / "intent_parser.json",
                               expected_parser_dict)
        self.assertJsonContent(
            self.tmp_file_path / "intent_classifier" / "metadata.json",
            metadata_intent_classifier)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_0" / "metadata.json",
            metadata_slot_filler)
        self.assertJsonContent(
            self.tmp_file_path / "slot_filler_1" / "metadata.json",
            metadata_slot_filler)