def _parse_raw_user_utterance( self, step: Dict[Text, Any]) -> Optional[UserUttered]: from rasa.shared.nlu.interpreter import RegexInterpreter intent_name, full_retrieval_intent = self._user_intent_from_step(step) intent = { INTENT_NAME_KEY: intent_name, FULL_RETRIEVAL_INTENT_NAME_KEY: full_retrieval_intent, PREDICTED_CONFIDENCE_KEY: 1.0, } if KEY_USER_MESSAGE in step: user_message = step[KEY_USER_MESSAGE].strip() entities = entities_parser.find_entities_in_training_example( user_message) plain_text = entities_parser.replace_entities(user_message) if plain_text.startswith(INTENT_MESSAGE_PREFIX): entities = ( RegexInterpreter().synchronous_parse(plain_text).get( ENTITIES, [])) else: raw_entities = step.get(KEY_ENTITIES, []) entities = self._parse_raw_entities(raw_entities) # set plain_text to None because only intent was provided in the stories plain_text = None return UserUttered(plain_text, intent, entities)
def test_markdown_entity_regex(example: Text, expected_entities: List[Dict[Text, Any]], expected_text: Text): result = entities_parser.find_entities_in_training_example(example) assert result == expected_entities replaced_text = entities_parser.replace_entities(example) assert replaced_text == expected_text
async def replace_placeholders( self, example: Message, faker_: Faker, matches: List[Tuple[Any, ...]], count: int ) -> AsyncIterator[Message]: original_text = await self.rebuild_original_text(example) for _ in range(count): text = await self.replace_placeholders_in_text(example.data.get("text"), faker_, matches) original_text = await self.replace_placeholders_in_text(original_text, faker_, matches) entities = find_entities_in_training_example(original_text) new_message = Message.build(text, example.get("intent"), entities) yield new_message
def _parse_training_examples( self, examples: Union[Text, List[Dict[Text, Any]]], intent: Text ) -> List[Tuple[Text, List[Dict[Text, Any]], Optional[Any]]]: import rasa.shared.nlu.training_data.entities_parser as entities_parser if isinstance(examples, list): example_tuples = [ ( # pytype: disable=attribute-error example.get(KEY_INTENT_TEXT, "").strip(STRIP_SYMBOLS), example.get(KEY_METADATA), ) for example in examples if example ] # pytype: enable=attribute-error elif isinstance(examples, str): example_tuples = [ (example, None) for example in self._parse_multiline_example(intent, examples) ] else: rasa.shared.utils.io.raise_warning( f"Unexpected block found in '{self.filename}' " f"while processing intent '{intent}':\n" f"{examples}\n" f"This block will be skipped.", docs=DOCS_URL_TRAINING_DATA_NLU, ) return [] if not example_tuples: rasa.shared.utils.io.raise_warning( f"Issue found while processing '{self.filename}': " f"Intent '{intent}' has no examples.", docs=DOCS_URL_TRAINING_DATA_NLU, ) results = [] for example, metadata in example_tuples: entities = entities_parser.find_entities_in_training_example(example) results.append((example, entities, metadata)) return results
def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]: intent_name = self._user_intent_from_step(step) intent = {"name": intent_name, "confidence": 1.0} if KEY_USER_MESSAGE in step: user_message = step[KEY_USER_MESSAGE].strip() entities = entities_parser.find_entities_in_training_example(user_message) plain_text = entities_parser.replace_entities(user_message) if plain_text.startswith(INTENT_MESSAGE_PREFIX): entities = ( RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, []) ) else: raw_entities = step.get(KEY_ENTITIES, []) entities = self._parse_raw_entities(raw_entities) # set plain_text to None because only intent was provided in the stories plain_text = None return UserUttered(plain_text, intent, entities)
def test_markdown_entity_regex_error_handling_wrong_schema(): with pytest.raises(SchemaValidationError): entities_parser.find_entities_in_training_example( # Schema error: "entiti" instead of "entity" 'I want to fly from [Berlin]{"entiti": "city", "role": "from"}')
def test_markdown_entity_regex_error_handling_not_json(): with pytest.raises(InvalidEntityFormatException): entities_parser.find_entities_in_training_example( # JSON syntax error: missing closing " for `role` 'I want to fly from [Berlin]{"entity": "city", "role: "from"}')
dir_path = r"F:\Documents\stopansko\masters\thesis\sig-detect\data\clean\enron_random_clean_signatures" full_d = [] for root, dirs, filenames in os.walk(dir_path): if ".idea" in root: continue for i, filename in enumerate(filenames): # d = defaultdict(list) file_features = [] print(f"{i}. {filename} ...") with open(os.path.join(root, filename), encoding="utf-8") as f: lines = f.readlines() for line in lines: entities = find_entities_in_training_example(line) plain_text = replace_entities(line) doc = nlp(plain_text) for t in doc: low = t.orth_.lower() curr_d = { "token": t.orth_, "filename": filename, "label": get_label(t.idx, t.orth_, entities), "email": t.like_email, "url": t.like_url, "num": t.like_num, "stop": t.is_stop, "alpha": t.is_alpha, "title": t.is_title,