def _extract_entities(self, message: Message) -> List[Dict[Text, Any]]: """Extract entities of the given type from the given user message.""" entities = [] flags = 0 # default flag if not self.case_sensitive: flags = re.IGNORECASE for pattern in self.patterns: matches = re.finditer(pattern["pattern"], message.get(TEXT), flags=flags) matches = list(matches) for match in matches: start_index = match.start() end_index = match.end() entities.append({ ENTITY_ATTRIBUTE_TYPE: pattern["name"], ENTITY_ATTRIBUTE_START: start_index, ENTITY_ATTRIBUTE_END: end_index, ENTITY_ATTRIBUTE_VALUE: message.get(TEXT)[start_index:end_index], }) return entities
def test_entity_synonyms_substitute_two_entity(): example = Message( text="Looking for a chines restaurant in New York tomorrow", data={ "entities": [{ "entity": "type", "value": "chinese", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 }] }) ent_synonyms = {"chines": "chinese", "new york": "NYC"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert example.text == "Looking for a chinese restaurant in NYC tomorrow" e_type = list( filter(lambda e: e["entity"] == 'type', example.get("entities")))[0] e_city = list( filter(lambda e: e["entity"] == 'city', example.get("entities")))[0] assert e_type["start"] == 14 assert e_type["end"] == 21 assert e_city["start"] == 36 assert e_city["end"] == 39
def test_entity_sweeper(): entities = [{ "entity": "cuisine", "value": "chinese", "start": 0, "end": 6 }, { "entity": "time", "value": "whatever", "start": 0, "end": 6 }] sweeper = Sweeper(component_config={'entity_names': ['time']}) message = Message("xxx", {'entities': entities}) sweeper.process(message) assert len(message.get('entities')) == 1 assert message.get('entities')[0]["entity"] == "cuisine"
def test_entity_synonyms_substitute_three_entity(): example = Message( text= "Looking for a chines restaurant in New York tomorrow for three people", data={ "entities": [ { "entity": "type", "value": "chines", "start": 14, "end": 20 }, { "entity": "city", "value": "New York", "start": 35, "end": 43 }, { "entity": "count", "value": "three", "start": 57, "end": 62 }, ] }, ) ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert (example.text == "Looking for a chinese restaurant in NYC tomorrow for 3 people") e_type = list( filter(lambda e: e["entity"] == "type", example.get("entities")))[0] e_city = list( filter(lambda e: e["entity"] == "city", example.get("entities")))[0] e_count = list( filter(lambda e: e["entity"] == "count", example.get("entities")))[0] assert e_type["start"] == 14 assert e_type["end"] == 21 assert e_city["start"] == 36 assert e_city["end"] == 39 assert e_count["start"] == 53 assert e_count["end"] == 54
def process(self, message: Message, **kwargs: Any) -> None: if not self.patterns: return extracted_entities = self._extract_entities(message) extracted_entities = self.add_extractor_name(extracted_entities) message.set(ENTITIES, message.get(ENTITIES, []) + extracted_entities, add_to_output=True)
def test_entity_synonyms_substitute_one_entity(): example = Message(text="Looking for a chines restaurant", data={ "entities": [{ "entity": "type", "value": "chinese", "start": 14, "end": 20 }] }) ent_synonyms = {"chines": "chinese"} EntitySynonymBegin(synonyms=ent_synonyms).process(example) assert example.text == "Looking for a chinese restaurant" e_type = list( filter(lambda e: e["entity"] == 'type', example.get("entities")))[0] assert e_type["start"] == 14 assert e_type["end"] == 21
def test_classification(self, trained_classifier, message, intent): text = Message(message) trained_classifier.process(text) assert text.get("intent").get("name", "NOT_CLASSIFIED") == intent