Exemplo n.º 1
0
    def test_should_be_deserializable_when_fitted_without_slots(self):
        # Given
        dataset = {
            "language": "en",
            "intents": {
                "intent1": {
                    "utterances": [{
                        "data": [{
                            "text":
                            "This is an utterance without "
                            "slots"
                        }]
                    }]
                }
            },
            "entities": {}
        }

        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(**shared)
        slot_filler.fit(dataset, intent="intent1")
        slot_filler.persist(self.tmp_file_path)
        loaded_slot_filler = CRFSlotFiller.from_path(self.tmp_file_path,
                                                     **shared)

        # When
        slots = loaded_slot_filler.get_slots(
            "This is an utterance without slots")

        # Then
        self.assertListEqual([], slots)
Exemplo n.º 2
0
    def test_should_be_serializable_before_fit(self):
        # Given
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)

        slot_filler = CRFSlotFiller(config)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        expected_slot_filler_dict = {
            "crf_model_file": None,
            "language_code": None,
            "config": config.to_dict(),
            "intent": None,
            "slot_name_mapping": None,
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
Exemplo n.º 3
0
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me [number_of_cups:snips/number](one) cup of tea
- i want [number_of_cups] cups of tea please
- can you prepare [number_of_cups] cups of tea ?""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        intent = "MakeTea"
        shared = self.get_shared_data(dataset)
        shared[RANDOM_STATE] = 42
        slot_filler = CRFSlotFiller(**shared)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path, **shared)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
Exemplo n.º 4
0
    def test_should_be_serializable(self):
        # Given
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        dataset = SAMPLE_DATASET

        slot_filler = CRFSlotFiller(config)
        intent = "dummy_intent_1"
        slot_filler.fit(dataset, intent=intent)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        expected_crf_file = Path(slot_filler.crf_model.modelfile.name).name
        self.assertTrue((self.tmp_file_path / expected_crf_file).exists())

        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": "en"
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=expected_feature_factories)
        expected_slot_filler_dict = {
            "crf_model_file": expected_crf_file,
            "language_code": "en",
            "config": expected_config.to_dict(),
            "intent": intent,
            "slot_name_mapping": {
                "dummy_slot_name": "dummy_entity_1",
                "dummy_slot_name2": "dummy_entity_2",
                "dummy_slot_name3": "dummy_entity_2",
            }
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
Exemplo n.º 5
0
    def test_should_get_slots_after_deserialization(self):
        # Given
        dataset = BEVERAGE_DATASET
        config = CRFSlotFillerConfig(random_seed=42)
        intent = "MakeTea"
        slot_filler = CRFSlotFiller(config)
        slot_filler.fit(dataset, intent)
        slot_filler.persist(self.tmp_file_path)

        custom_entity_parser = slot_filler.custom_entity_parser
        builtin_entity_parser = slot_filler.builtin_entity_parser

        deserialized_slot_filler = CRFSlotFiller.from_path(
            self.tmp_file_path,
            custom_entity_parser=custom_entity_parser,
            builtin_entity_parser=builtin_entity_parser)

        # When
        slots = deserialized_slot_filler.get_slots("make me two cups of tea")

        # Then
        expected_slots = [
            unresolved_slot(match_range={
                START: 8,
                END: 11
            },
                            value='two',
                            entity='snips/number',
                            slot_name='number_of_cups')
        ]
        self.assertListEqual(expected_slots, slots)
Exemplo n.º 6
0
    def test_should_be_serializable_when_fitted_without_slots(self):
        # Given
        features_factories = [
            {
                "factory_name": ShapeNgramFactory.name,
                "args": {"n": 1},
                "offsets": [0]
            },
            {
                "factory_name": IsDigitFactory.name,
                "args": {},
                "offsets": [-1, 0]
            }
        ]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        dataset = {
            "language": "en",
            "intents": {
                "intent1": {
                    "utterances": [
                        {
                            "data": [
                                {
                                    "text": "This is an utterance without "
                                            "slots"
                                }
                            ]
                        }
                    ]
                }
            },
            "entities": {}
        }

        slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset))
        slot_filler.fit(dataset, intent="intent1")

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})
        self.assertIsNone(slot_filler.crf_model)
Exemplo n.º 7
0
    def test_crfsuite_files_modes_should_be_644(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: MakeTea
utterances:
- make me a [beverage_temperature:Temperature](hot) cup of tea
- make me [number_of_cups:snips/number](five) tea cups""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json

        # When
        slot_filler = CRFSlotFiller().fit(dataset, "MakeTea")
        slot_filler.persist(self.tmp_file_path)

        # Then
        crfmodel_file = str(self.tmp_file_path / CRF_MODEL_FILENAME)
        filemode = oct(os.stat(crfmodel_file).st_mode & 0o0777)
        self.assertEqual(oct(0o644), filemode)
Exemplo n.º 8
0
    def test_should_be_serializable(self):
        # Given
        dataset_stream = io.StringIO("""
---
type: intent
name: my_intent
utterances:
- this is [slot1:entity1](my first entity)
- this is [slot2:entity2](second_entity)""")
        dataset = Dataset.from_yaml_files("en", [dataset_stream]).json
        features_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=features_factories)
        shared = self.get_shared_data(dataset)
        slot_filler = CRFSlotFiller(config, **shared)
        intent = "my_intent"
        slot_filler.fit(dataset, intent=intent)

        # When
        slot_filler.persist(self.tmp_file_path)

        # Then
        metadata_path = self.tmp_file_path / "metadata.json"
        self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"})

        self.assertTrue((self.tmp_file_path / CRF_MODEL_FILENAME).exists())

        expected_feature_factories = [{
            "factory_name": ShapeNgramFactory.name,
            "args": {
                "n": 1,
                "language_code": "en"
            },
            "offsets": [0]
        }, {
            "factory_name": IsDigitFactory.name,
            "args": {},
            "offsets": [-1, 0]
        }]
        expected_config = CRFSlotFillerConfig(
            tagging_scheme=TaggingScheme.BILOU,
            feature_factory_configs=expected_feature_factories)
        expected_slot_filler_dict = {
            "crf_model_file": CRF_MODEL_FILENAME,
            "language_code": "en",
            "config": expected_config.to_dict(),
            "intent": intent,
            "slot_name_mapping": {
                "slot1": "entity1",
                "slot2": "entity2",
            }
        }
        slot_filler_path = self.tmp_file_path / "slot_filler.json"
        self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)