def _extract_entities(self, message: Message) -> List[Dict[Text, Any]]:
        """Extract entities of the given type from the given user message."""
        entities = []

        flags = 0  # default flag
        if not self.case_sensitive:
            flags = re.IGNORECASE

        for pattern in self.patterns:
            matches = re.finditer(pattern["pattern"],
                                  message.get(TEXT),
                                  flags=flags)
            matches = list(matches)

            for match in matches:
                start_index = match.start()
                end_index = match.end()
                entities.append({
                    ENTITY_ATTRIBUTE_TYPE:
                    pattern["name"],
                    ENTITY_ATTRIBUTE_START:
                    start_index,
                    ENTITY_ATTRIBUTE_END:
                    end_index,
                    ENTITY_ATTRIBUTE_VALUE:
                    message.get(TEXT)[start_index:end_index],
                })

        return entities
示例#2
0
def test_entity_synonyms_substitute_two_entity():
    example = Message(
        text="Looking for a chines restaurant in New York tomorrow",
        data={
            "entities": [{
                "entity": "type",
                "value": "chinese",
                "start": 14,
                "end": 20
            }, {
                "entity": "city",
                "value": "New York",
                "start": 35,
                "end": 43
            }]
        })
    ent_synonyms = {"chines": "chinese", "new york": "NYC"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    assert example.text == "Looking for a chinese restaurant in NYC tomorrow"
    e_type = list(
        filter(lambda e: e["entity"] == 'type', example.get("entities")))[0]
    e_city = list(
        filter(lambda e: e["entity"] == 'city', example.get("entities")))[0]

    assert e_type["start"] == 14
    assert e_type["end"] == 21
    assert e_city["start"] == 36
    assert e_city["end"] == 39
示例#3
0
def test_entity_sweeper():
    entities = [{
        "entity": "cuisine",
        "value": "chinese",
        "start": 0,
        "end": 6
    }, {
        "entity": "time",
        "value": "whatever",
        "start": 0,
        "end": 6
    }]
    sweeper = Sweeper(component_config={'entity_names': ['time']})
    message = Message("xxx", {'entities': entities})
    sweeper.process(message)
    assert len(message.get('entities')) == 1
    assert message.get('entities')[0]["entity"] == "cuisine"
def test_entity_synonyms_substitute_three_entity():
    example = Message(
        text=
        "Looking for a chines restaurant in New York tomorrow for three people",
        data={
            "entities": [
                {
                    "entity": "type",
                    "value": "chines",
                    "start": 14,
                    "end": 20
                },
                {
                    "entity": "city",
                    "value": "New York",
                    "start": 35,
                    "end": 43
                },
                {
                    "entity": "count",
                    "value": "three",
                    "start": 57,
                    "end": 62
                },
            ]
        },
    )
    ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    assert (example.text ==
            "Looking for a chinese restaurant in NYC tomorrow for 3 people")
    e_type = list(
        filter(lambda e: e["entity"] == "type", example.get("entities")))[0]
    e_city = list(
        filter(lambda e: e["entity"] == "city", example.get("entities")))[0]
    e_count = list(
        filter(lambda e: e["entity"] == "count", example.get("entities")))[0]

    assert e_type["start"] == 14
    assert e_type["end"] == 21
    assert e_city["start"] == 36
    assert e_city["end"] == 39

    assert e_count["start"] == 53
    assert e_count["end"] == 54
    def process(self, message: Message, **kwargs: Any) -> None:
        if not self.patterns:
            return

        extracted_entities = self._extract_entities(message)
        extracted_entities = self.add_extractor_name(extracted_entities)

        message.set(ENTITIES,
                    message.get(ENTITIES, []) + extracted_entities,
                    add_to_output=True)
示例#6
0
def test_entity_synonyms_substitute_one_entity():
    example = Message(text="Looking for a chines restaurant",
                      data={
                          "entities": [{
                              "entity": "type",
                              "value": "chinese",
                              "start": 14,
                              "end": 20
                          }]
                      })
    ent_synonyms = {"chines": "chinese"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    assert example.text == "Looking for a chinese restaurant"
    e_type = list(
        filter(lambda e: e["entity"] == 'type', example.get("entities")))[0]

    assert e_type["start"] == 14
    assert e_type["end"] == 21
示例#7
0
 def test_classification(self, trained_classifier, message, intent):
     text = Message(message)
     trained_classifier.process(text)
     assert text.get("intent").get("name", "NOT_CLASSIFIED") == intent