def test_markdown_entity_regex(): r = MarkdownReader() md = """ ## intent:restaurant_search - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town - show me [chines](cuisine:chinese) restaurants - show me [chines](22_ab-34*3.A:43er*+?df) restaurants """ result = r.reads(md) assert len(result.training_examples) == 4 first = result.training_examples[0] assert first.data == {"intent": "restaurant_search"} assert first.text == "i'm looking for a place to eat" second = result.training_examples[1] assert second.data == { 'intent': 'restaurant_search', 'entities': [{ 'start': 31, 'end': 36, 'value': 'north', 'entity': 'loc-direction' }] } assert second.text == "i'm looking for a place in the north of town" third = result.training_examples[2] assert third.data == { 'intent': 'restaurant_search', 'entities': [{ 'start': 8, 'end': 14, 'value': 'chinese', 'entity': 'cuisine' }] } assert third.text == "show me chines restaurants" fourth = result.training_examples[3] assert fourth.data == { 'intent': 'restaurant_search', 'entities': [{ 'start': 8, 'end': 14, 'value': '43er*+?df', 'entity': '22_ab-34*3.A' }] } assert fourth.text == "show me chines restaurants"
def _correct_entities(latest_message: Dict[Text, Any], endpoint: EndpointConfig, sender_id: Text) -> Dict[Text, Any]: """Validate the entities of a user message. Returns the corrected entities""" entity_str = _as_md_message(latest_message.get("parse_data", {})) question = questionary.text( "Please mark the entities using [value](type) notation", default=entity_str) annotation = _ask_or_abort(question, sender_id, endpoint) # noinspection PyProtectedMember parsed = MarkdownReader()._parse_training_example(annotation) return parsed.get("entities", [])
def md_format_message(text, intent, entities): message_from_md = MarkdownReader()._parse_training_example(text) deserialised_entities = deserialise_entities(entities) return MarkdownWriter()._generate_message_md( {"text": message_from_md.text, "intent": intent, "entities": deserialised_entities} )
def test_markdown_entity_regex(): r = MarkdownReader() md = """ ## intent:restaurant_search - i'm looking for a place to eat - i'm looking for a place in the [north](loc-direction) of town - show me [chines](cuisine:chinese) restaurants - show me [chines](22_ab-34*3.A:43er*+?df) restaurants """ result = r.reads(md) assert len(result.training_examples) == 4 first = result.training_examples[0] assert first.data == {"intent": "restaurant_search"} assert first.text == "i'm looking for a place to eat" second = result.training_examples[1] assert second.data == {'intent': 'restaurant_search', 'entities': [ {'start': 31, 'end': 36, 'value': 'north', 'entity': 'loc-direction'} ]} assert second.text == "i'm looking for a place in the north of town" third = result.training_examples[2] assert third.data == {'intent': 'restaurant_search', 'entities': [ {'start': 8, 'end': 14, 'value': 'chinese', 'entity': 'cuisine'}]} assert third.text == "show me chines restaurants" fourth = result.training_examples[3] assert fourth.data == {'intent': 'restaurant_search', 'entities': [ {'start': 8, 'end': 14, 'value': '43er*+?df', 'entity': '22_ab-34*3.A'}]} assert fourth.text == "show me chines restaurants"
def _correct_entities(latest_message, endpoint, sender_id): # type: (Dict[Text, Any], EndpointConfig, Text) -> Dict[Text, Any] """Validate the entities of a user message. Returns the corrected entities""" q = "Please mark the entities using [value](type) notation" entity_str = _as_md_message(latest_message.get("parse_data", {})) questions = [{ "type": "input", "name": "annotation", "default": entity_str, "message": q, }] answers = _ask_questions(questions, sender_id, endpoint) # noinspection PyProtectedMember parsed = MarkdownReader()._parse_training_example(answers["annotation"]) return parsed.get("entities", [])
def _reader_factory(fformat): """Generates the appropriate reader class based on the file format.""" reader = None if fformat == LUIS: reader = LuisReader() elif fformat == WIT: reader = WitReader() elif fformat in DIALOGFLOW_RELEVANT: reader = DialogflowReader() elif fformat == RASA: reader = RasaReader() elif fformat == MARKDOWN: reader = MarkdownReader() return reader
def _reader_factory(fformat): """Generates the appropriate reader class based on the file format.""" from rasa_nlu.training_data.formats import ( MarkdownReader, WitReader, LuisReader, RasaReader, DialogflowReader) reader = None if fformat == LUIS: reader = LuisReader() elif fformat == WIT: reader = WitReader() elif fformat in DIALOGFLOW_RELEVANT: reader = DialogflowReader() elif fformat == RASA: reader = RasaReader() elif fformat == MARKDOWN: reader = MarkdownReader() return reader
def _reader_factory(fformat): """Generates the appropriate reader class based on the file format.""" WIT = "wit" LUIS = "luis" RASA = "rasa_nlu" MARKDOWN = "md" DIALOGFLOW_RELEVANT = {DIALOGFLOW_ENTITIES, DIALOGFLOW_INTENT} reader = None if fformat == LUIS: reader = LuisReader() elif fformat == WIT: reader = WitReader() elif fformat in DIALOGFLOW_RELEVANT: reader = DialogflowReader() elif fformat == RASA: reader = RasaReader() elif fformat == MARKDOWN: reader = MarkdownReader() return reader
def _load(filename, language='en'): """Loads a single training data file from disk.""" fformat = _guess_format(filename) logger.info("Training data format of {} is {}".format(filename, fformat)) if fformat == LUIS: return LuisReader().read(filename) elif fformat == WIT: return WitReader().read(filename) elif fformat.startswith("dialogflow"): return _from_dialogflow_file(filename, language, fformat) elif fformat == RASA: return RasaReader().read(filename) elif fformat == MARKDOWN: return MarkdownReader().read(filename) else: raise ValueError("unknown training file format : {} for " "file {}".format(fformat, filename))
async def _correct_entities(latest_message: Dict[Text, Any], endpoint: EndpointConfig, sender_id: Text) -> List[Dict[Text, Any]]: """Validate the entities of a user message. Returns the corrected entities""" from rasa_nlu.training_data.formats import MarkdownReader parse_original = latest_message.get("parse_data", {}) entity_str = _as_md_message(parse_original) question = questionary.text( "Please mark the entities using [value](type) notation", default=entity_str) annotation = await _ask_questions(question, sender_id, endpoint) # noinspection PyProtectedMember parse_annotated = MarkdownReader()._parse_training_example(annotation) corrected_entities = _merge_annotated_and_original_entities( parse_annotated, parse_original) return corrected_entities