def test_should_be_deserializable_when_fitted_without_slots(self): # Given dataset = { "language": "en", "intents": { "intent1": { "utterances": [{ "data": [{ "text": "This is an utterance without " "slots" }] }] } }, "entities": {} } shared = self.get_shared_data(dataset) slot_filler = CRFSlotFiller(**shared) slot_filler.fit(dataset, intent="intent1") slot_filler.persist(self.tmp_file_path) loaded_slot_filler = CRFSlotFiller.from_path(self.tmp_file_path, **shared) # When slots = loaded_slot_filler.get_slots( "This is an utterance without slots") # Then self.assertListEqual([], slots)
def test_should_be_serializable_before_fit(self): # Given features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) slot_filler = CRFSlotFiller(config) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) expected_slot_filler_dict = { "crf_model_file": None, "language_code": None, "config": config.to_dict(), "intent": None, "slot_name_mapping": None, } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
def test_should_get_slots_after_deserialization(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me [number_of_cups:snips/number](one) cup of tea - i want [number_of_cups] cups of tea please - can you prepare [number_of_cups] cups of tea ?""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json intent = "MakeTea" shared = self.get_shared_data(dataset) shared[RANDOM_STATE] = 42 slot_filler = CRFSlotFiller(**shared) slot_filler.fit(dataset, intent) slot_filler.persist(self.tmp_file_path) deserialized_slot_filler = CRFSlotFiller.from_path( self.tmp_file_path, **shared) # When slots = deserialized_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def test_should_be_serializable(self): # Given features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) dataset = SAMPLE_DATASET slot_filler = CRFSlotFiller(config) intent = "dummy_intent_1" slot_filler.fit(dataset, intent=intent) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) expected_crf_file = Path(slot_filler.crf_model.modelfile.name).name self.assertTrue((self.tmp_file_path / expected_crf_file).exists()) expected_feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": "en" }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] expected_config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=expected_feature_factories) expected_slot_filler_dict = { "crf_model_file": expected_crf_file, "language_code": "en", "config": expected_config.to_dict(), "intent": intent, "slot_name_mapping": { "dummy_slot_name": "dummy_entity_1", "dummy_slot_name2": "dummy_entity_2", "dummy_slot_name3": "dummy_entity_2", } } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)
def test_should_get_slots_after_deserialization(self): # Given dataset = BEVERAGE_DATASET config = CRFSlotFillerConfig(random_seed=42) intent = "MakeTea" slot_filler = CRFSlotFiller(config) slot_filler.fit(dataset, intent) slot_filler.persist(self.tmp_file_path) custom_entity_parser = slot_filler.custom_entity_parser builtin_entity_parser = slot_filler.builtin_entity_parser deserialized_slot_filler = CRFSlotFiller.from_path( self.tmp_file_path, custom_entity_parser=custom_entity_parser, builtin_entity_parser=builtin_entity_parser) # When slots = deserialized_slot_filler.get_slots("make me two cups of tea") # Then expected_slots = [ unresolved_slot(match_range={ START: 8, END: 11 }, value='two', entity='snips/number', slot_name='number_of_cups') ] self.assertListEqual(expected_slots, slots)
def test_should_be_serializable_when_fitted_without_slots(self): # Given features_factories = [ { "factory_name": ShapeNgramFactory.name, "args": {"n": 1}, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] } ] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) dataset = { "language": "en", "intents": { "intent1": { "utterances": [ { "data": [ { "text": "This is an utterance without " "slots" } ] } ] } }, "entities": {} } slot_filler = CRFSlotFiller(config, **self.get_shared_data(dataset)) slot_filler.fit(dataset, intent="intent1") # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) self.assertIsNone(slot_filler.crf_model)
def test_crfsuite_files_modes_should_be_644(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json # When slot_filler = CRFSlotFiller().fit(dataset, "MakeTea") slot_filler.persist(self.tmp_file_path) # Then crfmodel_file = str(self.tmp_file_path / CRF_MODEL_FILENAME) filemode = oct(os.stat(crfmodel_file).st_mode & 0o0777) self.assertEqual(oct(0o644), filemode)
def test_should_be_serializable(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: my_intent utterances: - this is [slot1:entity1](my first entity) - this is [slot2:entity2](second_entity)""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json features_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1 }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=features_factories) shared = self.get_shared_data(dataset) slot_filler = CRFSlotFiller(config, **shared) intent = "my_intent" slot_filler.fit(dataset, intent=intent) # When slot_filler.persist(self.tmp_file_path) # Then metadata_path = self.tmp_file_path / "metadata.json" self.assertJsonContent(metadata_path, {"unit_name": "crf_slot_filler"}) self.assertTrue((self.tmp_file_path / CRF_MODEL_FILENAME).exists()) expected_feature_factories = [{ "factory_name": ShapeNgramFactory.name, "args": { "n": 1, "language_code": "en" }, "offsets": [0] }, { "factory_name": IsDigitFactory.name, "args": {}, "offsets": [-1, 0] }] expected_config = CRFSlotFillerConfig( tagging_scheme=TaggingScheme.BILOU, feature_factory_configs=expected_feature_factories) expected_slot_filler_dict = { "crf_model_file": CRF_MODEL_FILENAME, "language_code": "en", "config": expected_config.to_dict(), "intent": intent, "slot_name_mapping": { "slot1": "entity1", "slot2": "entity2", } } slot_filler_path = self.tmp_file_path / "slot_filler.json" self.assertJsonContent(slot_filler_path, expected_slot_filler_dict)