def test_entity_synonyms_substitute_two_entity(): example = Message(text="Looking for a chines restaurant in New York tomorrow", data={ "entities": [{ "entity": "type", "value": "chinese", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 }] }) ent_synonyms = {"chines": "chinese", "new york": "NYC"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert example.text == "Looking for a chinese restaurant in NYC tomorrow" e_type = list(filter(lambda e: e["entity"] == 'type', example.get("entities")))[0] e_city = list(filter(lambda e: e["entity"] == 'city', example.get("entities")))[0] assert e_type["start"] == 14 assert e_type["end"] == 21 assert e_city["start"] == 36 assert e_city["end"] == 39
def test_entity_synonyms_substitute_and_replace(): initial_text = "Looking for a chines restaurant in New York tomorrow for three people" initial_entities = [{ "entity": "type", "value": "chines", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 } , { "entity": "count", "value": "three", "start": 57, "end": 62 } ] example = Message(text=initial_text, data={ "entities": initial_entities, }) ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) EntitySynonymEnd().process(example)
def _collect_messages(evts: List[Dict[Text, Any]]) -> List[Message]: """Collect the message text and parsed data from the UserMessage events into a list""" from rasa_nlu.extractors.duckling_http_extractor import \ DucklingHTTPExtractor from rasa_nlu.extractors.mitie_entity_extractor import MitieEntityExtractor from rasa_nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor msgs = [] for evt in evts: if evt.get("event") == UserUttered.type_name: data = evt.get("parse_data") for entity in data.get("entities", []): excluded_extractors = [ DucklingHTTPExtractor.__name__, SpacyEntityExtractor.__name__, MitieEntityExtractor.__name__ ] logger.debug("Exclude entity marking of following extractors" " {} when writing nlu data " "to file.".format(excluded_extractors)) if entity.get("extractor") in excluded_extractors: data["entities"].remove(entity) msg = Message.build(data["text"], data["intent"]["name"], data["entities"]) msgs.append(msg) return msgs
def test_multiple_errors(): instance = _get_instance() message = Message(text='Ths i a tst mesae') flagged_tokens = [{ "offset": 0, "token": "Ths", "type": "UnknownToken", "suggestions": [{ "suggestion": "This", "score": 0.825389307284585 }] }, { "offset": 4, "token": "i", "type": "UnknownToken", "suggestions": [{ "suggestion": "is", "score": 0.825389307284585 }] }, { "offset": 8, "token": "tst", "type": "UnknownToken", "suggestions": [{ "suggestion": "test", "score": 0.825389307284585 }, { "suggestion": "text", "score": 0.646529276890009 }] }, { "offset": 12, "token": "mesae", "type": "UnknownToken", "suggestions": [{ "suggestion": "message", "score": 0.825389307284585 }, { "suggestion": "mesa", "score": 0.761621385590906 }] }] tokens = instance._get_replacements(flagged_tokens) assert len(tokens) == len(flagged_tokens) text = instance._replace(message.text, tokens) assert text == 'This is a test message'
def test_entity_synonyms_substitute_one_entity(): example = Message(text="Looking for a chines restaurant", data={ "entities": [{ "entity": "type", "value": "chinese", "start": 14, "end": 20 }] }) ent_synonyms = {"chines": "chinese"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert example.text == "Looking for a chinese restaurant" e_type = list(filter(lambda e: e["entity"] == 'type', example.get("entities")))[0] assert e_type["start"] == 14 assert e_type["end"] == 21
def test_entity_sweeper(): entities = [{ "entity": "cuisine", "value": "chinese", "start": 0, "end": 6 }, { "entity": "time", "value": "whatever", "start": 0, "end": 6 }] sweeper = Sweeper(component_config={'entity_names': ['time']}) message = Message("xxx", {'entities': entities}) sweeper.process(message) assert len(message.get('entities')) == 1 assert message.get('entities')[0]["entity"] == "cuisine"
def _collect_messages(evts: List[Dict[Text, Any]]) -> List[Message]: """Collect the message text and parsed data from the UserMessage events into a list""" msgs = [] for evt in evts: if evt.get("event") == UserUttered.type_name: data = evt.get("parse_data") msg = Message.build(data["text"], data["intent"]["name"], data["entities"]) msgs.append(msg) return msgs
def test_entity_synonyms_substitute(): example = Message(text="Looking for a chines restaurant in New York", data={ "entities": [{ "entity": "type", "value": "chinese", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 }] }) ent_synonyms = {"chines": "chinese", "new york": "NYC"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert example.text == "Looking for a chinese restaurant in NYC"
def _setup_example(config=None): instance = _get_instance(config=config) message = Message(text='This is a tst message') flagged_tokens = [{ "offset": 10, "token": "tst", "type": "UnknownToken", "suggestions": [{ "suggestion": "test", "score": 0.95155325585711 }, { "suggestion": "text", "score": 0.805342621979041 }] }] return instance, message, flagged_tokens
def _get_example(config=None, gazette=None, primary=None): if primary is None: primary = { "entity": "type", "value": "chines", "start": 14, "end": 20, "extractor": "ner_crf", } return _process_example(Message(text="Looking for a chines restaurant in New York", data={ "entities": [primary, { "entity": "type", "value": "restaurant", "start": 21, "end": 31, "extractor": "ner_crf", }, { "entity": "city", "value": "New York", "start": 35, "end": 43, "extractor": "ner_crf", }] }), config=config, gazette=gazette)
'我的车牌是沪A12345': 'intent<inform_carno> slot<carno:沪A12345>', '我的车牌是浙F12345': 'intent<inform_carno> slot<carno:浙F12345>', '我的车牌是陕AT8834': 'intent<inform_carno> slot<carno:陕AT8834>', } if __name__ == '__main__': ''' match_objs = re.fullmatch('我是(.+)', '我是王二小', re.IGNORECASE) print(match_objs) ''' from rasa_nlu.training_data.message import Message from termcolor import cprint nlp = HcRegexNLP(None) for text, expected in TEST_CASES.items(): message = Message(text) nlp.process(message) if 'intent' in message.data: intent = message.data["intent"] entities = message.data["entities"] result = "intent<%s>" % intent['name'] if len(entities) > 0: result += ' slot<' for entity in entities: result += entity['entity'] + ':' + entity['value'] + ' ' result = result[:-1] + '>' else: result = "intent<>" if result != expected: cprint("%s => %s failed" % (text, result), 'red') else:
def test_entity_synonyms_substitute_and_replace_w_insertions(): text_initial = "Looking for a chines restaurant in New York tomorrow for three people" initial_entities = [{ "entity": "type", "value": "chines", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 } , { "entity": "count", "value": "three", "start": 57, "end": 62 } ] example = Message(text=text_initial, data={ "entities": initial_entities, }) ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) # import IPython # IPython.embed() example.data["entities"].extend([ { "entity": "action", "value": "Looking", "start": 0, "end": 7, }, { "entity": "place", "value": "restaurant", "start": 22, "end": 32, }, { "entity": "species", "value": "people", "start": 55, "end": 61, }, ]) EntitySynonymEnd().process(example) def has_changed(entity): return entity["value"] != example.text[entity["start"]:entity["end"]] assert example.text == text_initial changed_entities = filter(has_changed, example.data["entities"]) # Check the unchanged entities match value <-> text[start:end] assert len(list(changed_entities)) == 3 # Check the changed entities are reverted properly for initial, entity in zip(initial_entities, changed_entities): assert raises(KeyError, lambda x: print(x["literal"]), entity) assert entity["start"] == initial["start"] assert entity["end"] == initial["end"]