def test_nlu_engine_should_train_and_parse_in_all_languages(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: MakeTea utterances: - make me a [beverage_temperature:Temperature](hot) cup of tea - make me [number_of_cups:snips/number](five) tea cups - i want [number_of_cups] cups of [beverage_temperature](boiling hot) tea pls - can you prepare [number_of_cups] cup of [beverage_temperature](cold) tea ? --- type: intent name: MakeCoffee utterances: - make me [number_of_cups:snips/number](one) cup of coffee please - brew [number_of_cups] cups of coffee - can you prepare [number_of_cups] cup of coffee""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json text = "please brew me a cup of coffee" for language in get_all_languages(): dataset[LANGUAGE] = language engine = SnipsNLUEngine() # When / Then msg = "Could not fit engine in '%s'" % language with self.fail_if_exception(msg): engine = engine.fit(dataset) msg = "Could not parse in '%s'" % language with self.fail_if_exception(msg): res = engine.parse(text) self.assertEqual("MakeCoffee", res[RES_INTENT][RES_INTENT_NAME])
def test_space_should_by_ignored(self): # Given text = " " for l in get_all_languages(): # When tokens = tokenize(text, l) # Then self.assertEqual(len(tokens), 0)
def test_should_parse_in_all_languages(self): # Given all_languages = get_all_languages() text = "1234" # When / Then for language in all_languages: parser = BuiltinEntityParser.build(language) parser.parse(text)
def test_should_support_all_languages(self): # Given text = "" for language in get_all_languages(): parser = BuiltinEntityParser.build(language=language) msg = "get_builtin_entities does not support %s." % language with self.fail_if_exception(msg): # When / Then parser.parse(text)
def validate_and_format_dataset(dataset): """Checks that the dataset is valid and format it Raise: DatasetFormatError: When the dataset format is wrong """ from snips_nlu_parsers import get_all_languages if isinstance(dataset, Dataset): dataset = dataset.json # Make this function idempotent if dataset.get(VALIDATED, False): return dataset dataset = deepcopy(dataset) dataset = json.loads(json.dumps(dataset)) validate_type(dataset, dict, object_label="dataset") mandatory_keys = [INTENTS, ENTITIES, LANGUAGE] for key in mandatory_keys: validate_key(dataset, key, object_label="dataset") validate_type(dataset[ENTITIES], dict, object_label="entities") validate_type(dataset[INTENTS], dict, object_label="intents") language = dataset[LANGUAGE] validate_type(language, str, object_label="language") if language not in get_all_languages(): raise DatasetFormatError("Unknown language: '%s'" % language) dataset[INTENTS] = { intent_name: intent_data for intent_name, intent_data in sorted(iteritems(dataset[INTENTS]))} for intent in itervalues(dataset[INTENTS]): _validate_and_format_intent(intent, dataset[ENTITIES]) utterance_entities_values = extract_utterance_entities(dataset) builtin_entity_parser = BuiltinEntityParser.build(dataset=dataset) dataset[ENTITIES] = { intent_name: entity_data for intent_name, entity_data in sorted(iteritems(dataset[ENTITIES]))} for entity_name, entity in iteritems(dataset[ENTITIES]): uterrance_entities = utterance_entities_values[entity_name] if is_builtin_entity(entity_name): dataset[ENTITIES][entity_name] = \ _validate_and_format_builtin_entity(entity, uterrance_entities) else: dataset[ENTITIES][entity_name] = \ _validate_and_format_custom_entity( entity, uterrance_entities, language, builtin_entity_parser) dataset[VALIDATED] = True return dataset
def test_default_configs_should_work(self): # Given dataset_stream = io.StringIO(""" --- type: intent name: TurnLightOn utterances: - turn on the lights - please switch on the light - switch the light on - can you turn the light on ? - I need you to turn on the lights --- type: intent name: GetWeather utterances: - what is the weather today - What's the weather in tokyo today? - Can you tell me the weather please ? - what is the weather forecast for this weekend""") dataset = Dataset.from_yaml_files("en", [dataset_stream]).json for language in get_all_languages(): # When config = DEFAULT_CONFIGS.get(language) self.assertIsNotNone(config, "Missing default config for '%s'" % language) dataset[LANGUAGE] = language shared = self.get_shared_data(dataset) engine = SnipsNLUEngine(config, **shared).fit(dataset) result = engine.parse("Please give me the weather in Paris") # Then self.assertIsNotNone(result[RES_INTENT]) intent_name = result[RES_INTENT][RES_INTENT_NAME] self.assertEqual("GetWeather", intent_name)
def download_all_languages(*pip_args): """Download compatible resources for all supported languages""" from snips_nlu_parsers import get_all_languages for language in get_all_languages(): download(language, False, *pip_args)
def download_all_languages(*pip_args): """Download compatible resources for all supported languages""" for language in get_all_languages(): download(language, False, *pip_args)