def test_multiword_entities(): data = u""" { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name, "en") assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example["entities"] assert len(entities) == 1 start, end = MitieEntityExtractor.find_entity(entities[0], example["text"]) assert start == 4 assert end == 7
def test_multiword_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "show me flights to New York City", "intent": "unk", "entities": [ { "entity": "destination", "start": 19, "end": 32, "value": "New York City" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 4 assert end == 7
def test_repeated_entities(): data = u""" { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = load_data(f.name, "en") assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example["entities"] assert len(entities) == 1 start, end = MitieEntityExtractor.find_entity(entities[0], example["text"]) assert start == 9 assert end == 10
def test_repeated_entities(): data = """ { "rasa_nlu_data": { "common_examples" : [ { "text": "book a table today from 3 to 6 for 3 people", "intent": "unk", "entities": [ { "entity": "description", "start": 35, "end": 36, "value": "3" } ] } ] } }""" with tempfile.NamedTemporaryFile(suffix="_tmp_training_data.json") as f: f.write(data.encode("utf-8")) f.flush() td = training_data.load_data(f.name) assert len(td.entity_examples) == 1 example = td.entity_examples[0] entities = example.get("entities") assert len(entities) == 1 tokens = WhitespaceTokenizer().tokenize(example.text) start, end = MitieEntityExtractor.find_entity(entities[0], example.text, tokens) assert start == 9 assert end == 10