def test_fit_transform(self, mocked_preprocess): t = "a b c d e f" u = text_to_utterance(t) builtin_ents = [ { "value": "e", "resolved_value": "e", "range": { "start": 8, "end": 9 }, "entity_kind": "the_snips_e_entity" } ] custom_ents = [ { "value": "c", "resolved_value": "c", "range": { "start": 4, "end": 5 }, "entity_kind": "the_c_entity" } ] mocked_preprocess.return_value = [u], [builtin_ents], [custom_ents] config = CooccurrenceVectorizerConfig( window_size=3, unknown_words_replacement_string="b", filter_stop_words=False ) dataset = get_empty_dataset("en") builtin_parser = EntityParserMock({t: builtin_ents}) custom_parser = EntityParserMock({t: custom_ents}) resources = {STOP_WORDS: set()} vectorizer1 = CooccurrenceVectorizer( config, builtin_entity_parser=builtin_parser, custom_entity_parser=custom_parser, resources=resources) vectorizer2 = CooccurrenceVectorizer( config, builtin_entity_parser=builtin_parser, custom_entity_parser=custom_parser, resources=resources) # When x = [u] x_0 = vectorizer1.fit(x, dataset).transform(x).todense().tolist() x_1 = vectorizer2.fit_transform(x, dataset).todense().tolist() # Then self.assertListEqual(x_0, x_1)
def test_should_create_number_variation(self): # Given args = { 1: { "numbers": True, "and_": True, "case": True, "punctuation": True, }, 1001: { "numbers": False, "and_": True, "case": True, "punctuation": True, }, 10001: { "numbers": False, "and_": False, "case": False, "punctuation": False, } } for num_ents, expected_args in iteritems(args): entity = { "matching_strictness": 1.0, "use_synonyms": False, "automatically_extensible": False, "data": [{ "value": str(i), "synonyms": [] } for i in range(num_ents)] } builtin_entity_parser = EntityParserMock(dict()) with patch("snips_nlu.dataset.validation" ".get_string_variations") as mocked_string_variations: mocked_string_variations.return_value = [] # When _validate_and_format_custom_entity(entity, [], "en", builtin_entity_parser) # Then for call in mocked_string_variations.mock_calls: kwargs = call[2] for k in expected_args: self.assertEqual(expected_args[k], kwargs[k])
def test_transform(self): # Given config = CooccurrenceVectorizerConfig( filter_stop_words=True, window_size=3, unknown_words_replacement_string="d") t_0 = "yo a b c d e f yo" t_1 = "yo a b c d e" u_0 = text_to_utterance(t_0) u_1 = text_to_utterance(t_1) resources = {STOP_WORDS: {"b"}} builtin_ents = [{ "value": "e", "resolved_value": "e", "range": { "start": 11, "end": 12 }, "entity_kind": "the_snips_e_entity" }] custom_ents = [{ "value": "c", "resolved_value": "c", "range": { "start": 7, "end": 8 }, "entity_kind": "the_c_entity" }] builtin_parser = EntityParserMock({ t_0: builtin_ents, t_1: builtin_ents }) custom_parser = EntityParserMock({t_0: custom_ents, t_1: custom_ents}) vectorizer = CooccurrenceVectorizer( config, builtin_entity_parser=builtin_parser, custom_entity_parser=custom_parser, resources=resources) vectorizer._language = "en" vectorizer._word_pairs = { ("THE_SNIPS_E_ENTITY", "f"): 0, ("a", "THE_C_ENTITY"): 1, ("a", "THE_SNIPS_E_ENTITY"): 2, ("b", "THE_SNIPS_E_ENTITY"): 3, ("yo", "yo"): 4, ("d", "THE_SNIPS_E_ENTITY"): 5 } data = [u_0, u_1] # When x = vectorizer.transform(data) # Then expected = [[1, 1, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0]] self.assertEqual(expected, x.todense().tolist())
def test_should_be_deserializable(self): # Given parser_dict = { "config": { "unit_name": "lookup_intent_parser", "ignore_stop_words": True }, "language_code": "en", "map": { hash_str("make coffee"): [0, []], hash_str("prepare % snipsnumber % coffees"): [0, [0]], hash_str("% snipsnumber % teas at % snipstemperature %"): [1, [0, 1]], }, "slots_names": ["nb_cups", "tea_temperature"], "intents_names": ["MakeCoffee", "MakeTea"], "entity_scopes": [ { "entity_scope": { "builtin": ["snips/number"], "custom": [], }, "intent_group": ["MakeCoffee"] }, { "entity_scope": { "builtin": ["snips/number", "snips/temperature"], "custom": [], }, "intent_group": ["MakeTea"] }, ], "stop_words_whitelist": dict() } self.tmp_file_path.mkdir() metadata = {"unit_name": "lookup_intent_parser"} self.writeJsonContent( self.tmp_file_path / "intent_parser.json", parser_dict) self.writeJsonContent(self.tmp_file_path / "metadata.json", metadata) resources = self.get_resources("en") builtin_entity_parser = BuiltinEntityParser.build(language="en") custom_entity_parser = EntityParserMock() # When parser = LookupIntentParser.from_path( self.tmp_file_path, custom_entity_parser=custom_entity_parser, builtin_entity_parser=builtin_entity_parser, resources=resources) res_make_coffee = parser.parse("make me a coffee") res_make_tea = parser.parse("two teas at 90°C please") # Then expected_result_coffee = parsing_result( input="make me a coffee", intent=intent_classification_result("MakeCoffee", 1.0), slots=[]) expected_result_tea = parsing_result( input="two teas at 90°C please", intent=intent_classification_result("MakeTea", 1.0), slots=[ { "entity": "snips/number", "range": {"end": 3, "start": 0}, "slotName": "nb_cups", "value": "two" }, { "entity": "snips/temperature", "range": {"end": 16, "start": 12}, "slotName": "tea_temperature", "value": "90°C" } ]) self.assertEqual(expected_result_coffee, res_make_coffee) self.assertEqual(expected_result_tea, res_make_tea)