def test_entity_synonyms_substitute_two_entity():
    example = Message(text="Looking for a chines restaurant in New York tomorrow", data={
        "entities": [{
            "entity": "type",
            "value": "chinese",
            "start": 14,
            "end": 20
        }, {
            "entity": "city",
            "value": "New York",
            "start": 35,
            "end": 43
        }]
    })
    ent_synonyms = {"chines": "chinese", "new york": "NYC"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    assert example.text == "Looking for a chinese restaurant in NYC tomorrow"
    e_type = list(filter(lambda e: e["entity"] == 'type', example.get("entities")))[0]
    e_city = list(filter(lambda e: e["entity"] == 'city', example.get("entities")))[0]

    assert e_type["start"] == 14
    assert e_type["end"] == 21
    assert e_city["start"] == 36
    assert e_city["end"] == 39
def test_entity_synonyms_substitute_and_replace():
    initial_text = "Looking for a chines restaurant in New York tomorrow for three people"
    initial_entities = [{
            "entity": "type",
            "value": "chines",
            "start": 14,
            "end": 20
        }, {
            "entity": "city",
            "value": "New York",
            "start": 35,
            "end": 43
        }
        , {
            "entity": "count",
            "value": "three",
            "start": 57,
            "end": 62
        }
        ]

    example = Message(text=initial_text, data={
        "entities": initial_entities,
    })
    ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)
    EntitySynonymEnd().process(example)
Пример #3
0
def _collect_messages(evts: List[Dict[Text, Any]]) -> List[Message]:
    """Collect the message text and parsed data from the UserMessage events
    into a list"""
    from rasa_nlu.extractors.duckling_http_extractor import \
        DucklingHTTPExtractor
    from rasa_nlu.extractors.mitie_entity_extractor import MitieEntityExtractor
    from rasa_nlu.extractors.spacy_entity_extractor import SpacyEntityExtractor

    msgs = []

    for evt in evts:
        if evt.get("event") == UserUttered.type_name:
            data = evt.get("parse_data")

            for entity in data.get("entities", []):

                excluded_extractors = [
                    DucklingHTTPExtractor.__name__,
                    SpacyEntityExtractor.__name__,
                    MitieEntityExtractor.__name__
                ]
                logger.debug("Exclude entity marking of following extractors"
                             " {} when writing nlu data "
                             "to file.".format(excluded_extractors))

                if entity.get("extractor") in excluded_extractors:
                    data["entities"].remove(entity)

            msg = Message.build(data["text"], data["intent"]["name"],
                                data["entities"])
            msgs.append(msg)

    return msgs
def test_multiple_errors():
    instance = _get_instance()
    message = Message(text='Ths i a tst mesae')
    flagged_tokens = [{
        "offset":
        0,
        "token":
        "Ths",
        "type":
        "UnknownToken",
        "suggestions": [{
            "suggestion": "This",
            "score": 0.825389307284585
        }]
    }, {
        "offset":
        4,
        "token":
        "i",
        "type":
        "UnknownToken",
        "suggestions": [{
            "suggestion": "is",
            "score": 0.825389307284585
        }]
    }, {
        "offset":
        8,
        "token":
        "tst",
        "type":
        "UnknownToken",
        "suggestions": [{
            "suggestion": "test",
            "score": 0.825389307284585
        }, {
            "suggestion": "text",
            "score": 0.646529276890009
        }]
    }, {
        "offset":
        12,
        "token":
        "mesae",
        "type":
        "UnknownToken",
        "suggestions": [{
            "suggestion": "message",
            "score": 0.825389307284585
        }, {
            "suggestion": "mesa",
            "score": 0.761621385590906
        }]
    }]

    tokens = instance._get_replacements(flagged_tokens)
    assert len(tokens) == len(flagged_tokens)

    text = instance._replace(message.text, tokens)
    assert text == 'This is a test message'
def test_entity_synonyms_substitute_one_entity():
    example = Message(text="Looking for a chines restaurant", data={
        "entities": [{
            "entity": "type",
            "value": "chinese",
            "start": 14,
            "end": 20
        }]
    })
    ent_synonyms = {"chines": "chinese"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    assert example.text == "Looking for a chinese restaurant"
    e_type = list(filter(lambda e: e["entity"] == 'type', example.get("entities")))[0]

    assert e_type["start"] == 14
    assert e_type["end"] == 21
Пример #6
0
def test_entity_sweeper():
    entities = [{
        "entity": "cuisine",
        "value": "chinese",
        "start": 0,
        "end": 6
    }, {
        "entity": "time",
        "value": "whatever",
        "start": 0,
        "end": 6
    }]
    sweeper = Sweeper(component_config={'entity_names': ['time']})
    message = Message("xxx", {'entities': entities})
    sweeper.process(message)
    assert len(message.get('entities')) == 1
    assert message.get('entities')[0]["entity"] == "cuisine"
Пример #7
0
def _collect_messages(evts: List[Dict[Text, Any]]) -> List[Message]:
    """Collect the message text and parsed data from the UserMessage events
    into a list"""

    msgs = []

    for evt in evts:
        if evt.get("event") == UserUttered.type_name:
            data = evt.get("parse_data")
            msg = Message.build(data["text"], data["intent"]["name"],
                                data["entities"])
            msgs.append(msg)

    return msgs
def test_entity_synonyms_substitute():
    example = Message(text="Looking for a chines restaurant in New York", data={
        "entities": [{
            "entity": "type",
            "value": "chinese",
            "start": 14,
            "end": 20
        }, {
            "entity": "city",
            "value": "New York",
            "start": 35,
            "end": 43
        }]
    })
    ent_synonyms = {"chines": "chinese", "new york": "NYC"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)
    assert example.text == "Looking for a chinese restaurant in NYC"
def _setup_example(config=None):
    instance = _get_instance(config=config)
    message = Message(text='This is a tst message')
    flagged_tokens = [{
        "offset":
        10,
        "token":
        "tst",
        "type":
        "UnknownToken",
        "suggestions": [{
            "suggestion": "test",
            "score": 0.95155325585711
        }, {
            "suggestion": "text",
            "score": 0.805342621979041
        }]
    }]

    return instance, message, flagged_tokens
def _get_example(config=None, gazette=None, primary=None):
    if primary is None:
        primary = {
            "entity": "type",
            "value": "chines",
            "start": 14,
            "end": 20,
            "extractor": "ner_crf",
        }
    return _process_example(Message(text="Looking for a chines restaurant in New York", data={
        "entities": [primary, {
            "entity": "type",
            "value": "restaurant",
            "start": 21,
            "end": 31,
            "extractor": "ner_crf",
        }, {
            "entity": "city",
            "value": "New York",
            "start": 35,
            "end": 43,
            "extractor": "ner_crf",
        }]
    }), config=config, gazette=gazette)
Пример #11
0
    '我的车牌是沪A12345': 'intent<inform_carno> slot<carno:沪A12345>',
    '我的车牌是浙F12345': 'intent<inform_carno> slot<carno:浙F12345>',
    '我的车牌是陕AT8834': 'intent<inform_carno> slot<carno:陕AT8834>',
}

if __name__ == '__main__':
    '''
    match_objs = re.fullmatch('我是(.+)', '我是王二小', re.IGNORECASE)
    print(match_objs)
    '''
    from rasa_nlu.training_data.message import Message
    from termcolor import cprint

    nlp = HcRegexNLP(None)
    for text, expected in TEST_CASES.items():
        message = Message(text)
        nlp.process(message)
        if 'intent' in message.data:
            intent = message.data["intent"]
            entities = message.data["entities"]
            result = "intent<%s>" % intent['name']
            if len(entities) > 0:
                result += ' slot<'
                for entity in entities:
                    result += entity['entity'] + ':' + entity['value'] + ' '
                result = result[:-1] + '>'
        else:
            result = "intent<>"
        if result != expected:
            cprint("%s => %s failed" % (text, result), 'red')
        else:
def test_entity_synonyms_substitute_and_replace_w_insertions():
    text_initial = "Looking for a chines restaurant in New York tomorrow for three people"
    initial_entities = [{
            "entity": "type",
            "value": "chines",
            "start": 14,
            "end": 20
        }, {
            "entity": "city",
            "value": "New York",
            "start": 35,
            "end": 43
        }
        , {
            "entity": "count",
            "value": "three",
            "start": 57,
            "end": 62
        }
        ]

    example = Message(text=text_initial, data={
        "entities": initial_entities,
    })
    ent_synonyms = {"chines": "chinese", "new york": "NYC", "three": "3"}
    EntitySynonymBegin(synonyms=ent_synonyms).process(example)

    # import IPython
    # IPython.embed()
    example.data["entities"].extend([
        {
            "entity": "action",
            "value": "Looking",
            "start": 0,
            "end": 7,
        },
        {
            "entity": "place",
            "value": "restaurant",
            "start": 22,
            "end": 32,
        },
        {
            "entity": "species",
            "value": "people",
            "start": 55,
            "end": 61,
        },
    ])

    EntitySynonymEnd().process(example)

    def has_changed(entity):
        return entity["value"] != example.text[entity["start"]:entity["end"]]

    assert example.text == text_initial

    changed_entities = filter(has_changed, example.data["entities"])
    # Check the unchanged entities match value <-> text[start:end]
    assert len(list(changed_entities)) == 3
    # Check the changed entities are reverted properly
    for initial, entity in zip(initial_entities, changed_entities):
        assert raises(KeyError, lambda x: print(x["literal"]), entity)
        assert entity["start"] == initial["start"]
        assert entity["end"] == initial["end"]