Exemplo n.º 1
0
def validate_and_format_dataset(dataset):
    """Checks that the dataset is valid and format it"""
    # Make this function idempotent
    if dataset.get(VALIDATED, False):
        return dataset
    dataset = deepcopy(dataset)
    dataset = json.loads(json.dumps(dataset))
    validate_type(dataset, dict)
    mandatory_keys = [INTENTS, ENTITIES, LANGUAGE]
    for key in mandatory_keys:
        validate_key(dataset, key, object_label="dataset")
    validate_type(dataset[ENTITIES], dict)
    validate_type(dataset[INTENTS], dict)
    language = dataset[LANGUAGE]
    validate_type(language, str)
    if language not in get_all_languages():
        raise ValueError("Unknown language: '%s'" % language)

    for intent in itervalues(dataset[INTENTS]):
        validate_and_format_intent(intent, dataset[ENTITIES])

    queries_entities_values = extract_queries_entities(dataset)

    for entity_name, entity in iteritems(dataset[ENTITIES]):
        queries_entities = queries_entities_values[entity_name]
        if is_builtin_entity(entity_name):
            dataset[ENTITIES][entity_name] = \
                validate_and_format_builtin_entity(entity, queries_entities)
        else:
            dataset[ENTITIES][entity_name] = validate_and_format_custom_entity(
                entity, queries_entities, language)
    dataset[VALIDATED] = True
    return dataset
Exemplo n.º 2
0
def validate_and_format_dataset(dataset):
    """Checks that the dataset is valid and format it"""
    # Make this function idempotent
    if dataset.get(VALIDATED, False):
        return dataset
    dataset = deepcopy(dataset)
    dataset = json.loads(json.dumps(dataset))
    validate_type(dataset, dict)
    mandatory_keys = [INTENTS, ENTITIES, LANGUAGE]
    for key in mandatory_keys:
        validate_key(dataset, key, object_label="dataset")
    validate_type(dataset[ENTITIES], dict)
    validate_type(dataset[INTENTS], dict)
    language = dataset[LANGUAGE]
    validate_type(language, str)
    if language not in get_all_languages():
        raise ValueError("Unknown language: '%s'" % language)

    for intent in itervalues(dataset[INTENTS]):
        validate_and_format_intent(intent, dataset[ENTITIES])

    queries_entities_values = extract_queries_entities(dataset)

    for entity_name, entity in iteritems(dataset[ENTITIES]):
        queries_entities = queries_entities_values[entity_name]
        if is_builtin_entity(entity_name):
            dataset[ENTITIES][entity_name] = \
                validate_and_format_builtin_entity(entity, queries_entities)
        else:
            dataset[ENTITIES][entity_name] = validate_and_format_custom_entity(
                entity, queries_entities, language)
    dataset[VALIDATED] = True
    return dataset
Exemplo n.º 3
0
 def test_space_should_by_ignored(self):
     # Given
     text = " "
     for l in get_all_languages():
         # When
         tokens = tokenize(text, l)
         # Then
         self.assertEqual(len(tokens), 0)
Exemplo n.º 4
0
 def test_space_should_by_ignored(self):
     # Given
     text = " "
     for l in get_all_languages():
         # When
         tokens = tokenize(text, l)
         # Then
         self.assertEqual(len(tokens), 0)
Exemplo n.º 5
0
    def test_resources_index_should_have_all_languages(self):
        # Given
        index = RESOURCE_INDEX

        # When
        languages = set(index)

        # Then
        self.assertSetEqual(languages, get_all_languages())
Exemplo n.º 6
0
    def test_should_parse_in_all_languages(self):
        # Given
        all_languages = get_all_languages()
        text = "1234"

        # When / Then
        for language in all_languages:
            parser = BuiltinEntityParser(language)
            parser.parse(text)
Exemplo n.º 7
0
    def test_get_builtin_entities_should_support_all_languages(self):
        # Given
        text = ""

        for language in get_all_languages():
            msg = "get_builtin_entities does not support %s." % language
            with self.fail_if_exception(msg):
                # When / Then
                get_builtin_entities(text, language)
Exemplo n.º 8
0
    def test_get_builtin_entities_should_support_all_languages(self):
        # Given
        text = ""

        for language in get_all_languages():
            msg = "get_builtin_entities does not support %s." % language
            with self.fail_if_exception(msg):
                # When / Then
                get_builtin_entities(text, language)
Exemplo n.º 9
0
 def test_all_languages_should_have_stop_words(self):
     # The capitalization for the CRF assumes all languages have stop_words
     # Given
     for language in get_all_languages():
         try:
             # When/Then
             get_stop_words(language)
         except:  # pylint: disable=W0702
             self.fail("%s has not stop words" % language)
Exemplo n.º 10
0
 def test_all_languages_should_have_stop_words(self):
     # The capitalization for the CRF assumes all languages have stop_words
     # Given
     for language in get_all_languages():
         try:
             # When/Then
             get_stop_words(language)
         except:  # pylint: disable=W0702
             self.fail("%s has not stop words" % language)
Exemplo n.º 11
0
    def test_resources_index_should_have_all_languages(self):
        # Given
        index = RESOURCE_INDEX

        # When
        languages = set(index)

        # Then
        self.assertSetEqual(languages, get_all_languages())
Exemplo n.º 12
0
    def test_should_support_all_languages(self):
        # Given
        text = ""

        for language in get_all_languages():
            parser = BuiltinEntityParser.build(language=language)
            msg = "get_builtin_entities does not support %s." % language
            with self.fail_if_exception(msg):
                # When / Then
                parser.parse(text)
Exemplo n.º 13
0
    def test_default_configs_should_work(self):
        # Given
        dataset = deepcopy(WEATHER_DATASET)

        for language in get_all_languages():
            # When
            config = DEFAULT_CONFIGS.get(language)
            self.assertIsNotNone(config,
                                 "Missing default config for '%s'" % language)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine(config).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("SearchWeatherForecast", intent_name)
Exemplo n.º 14
0
    def test_nlu_engine_should_train_and_parse_in_all_languages(self):
        # Given
        text = "brew me an espresso"
        for language in get_all_languages():
            dataset = deepcopy(BEVERAGE_DATASET)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine()

            # When / Then
            msg = "Could not fit engine in '%s'" % language
            with self.fail_if_exception(msg):
                engine = engine.fit(dataset)

            msg = "Could not parse in '%s'" % language
            with self.fail_if_exception(msg):
                engine.parse(text)
Exemplo n.º 15
0
    def test_nlu_engine_should_train_and_parse_in_all_languages(self):
        # Given
        text = "brew me an espresso"
        for language in get_all_languages():
            dataset = deepcopy(BEVERAGE_DATASET)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine()

            # When / Then
            msg = "Could not fit engine in '%s'" % language
            with self.fail_if_exception(msg):
                engine = engine.fit(dataset)

            msg = "Could not parse in '%s'" % language
            with self.fail_if_exception(msg):
                engine.parse(text)
Exemplo n.º 16
0
    def test_sample_configs_should_work(self):
        # Given
        dataset = self.sample_dataset

        for language in get_all_languages():
            # When
            config_file = "config_%s.json" % language
            config_path = os.path.join(SAMPLES_PATH, "configs", config_file)
            with io.open(config_path) as f:
                config = json.load(f)
            dataset[LANGUAGE] = language
            engine = SnipsNLUEngine(config).fit(dataset)
            result = engine.parse("Please give me the weather in Paris")

            # Then
            intent_name = result[RES_INTENT][RES_INTENT_NAME]
            self.assertEqual("sampleGetWeather", intent_name)
Exemplo n.º 17
0
def download_all_languages(*pip_args):
    """Download compatible resources for all supported languages"""
    for language in get_all_languages():
        download(language, False, *pip_args)
Exemplo n.º 18
0
 def __init__(self, methodName='runTest'):
     super(SnipsTest, self).__init__(methodName)
     for l in get_all_languages():
         load_resources(l)
Exemplo n.º 19
0
 def __init__(self, methodName='runTest'):
     super(SnipsTest, self).__init__(methodName)
     for l in get_all_languages():
         load_resources(l)
Exemplo n.º 20
0
 def setUp(self):
     for l in get_all_languages():
         load_resources(l)