示例#1
0
    def _parse_raw_user_utterance(
            self, step: Dict[Text, Any]) -> Optional[UserUttered]:
        from rasa.shared.nlu.interpreter import RegexInterpreter

        intent_name, full_retrieval_intent = self._user_intent_from_step(step)
        intent = {
            INTENT_NAME_KEY: intent_name,
            FULL_RETRIEVAL_INTENT_NAME_KEY: full_retrieval_intent,
            PREDICTED_CONFIDENCE_KEY: 1.0,
        }

        if KEY_USER_MESSAGE in step:
            user_message = step[KEY_USER_MESSAGE].strip()
            entities = entities_parser.find_entities_in_training_example(
                user_message)
            plain_text = entities_parser.replace_entities(user_message)

            if plain_text.startswith(INTENT_MESSAGE_PREFIX):
                entities = (
                    RegexInterpreter().synchronous_parse(plain_text).get(
                        ENTITIES, []))
        else:
            raw_entities = step.get(KEY_ENTITIES, [])
            entities = self._parse_raw_entities(raw_entities)
            # set plain_text to None because only intent was provided in the stories
            plain_text = None
        return UserUttered(plain_text, intent, entities)
示例#2
0
    def _parse_intent(self, intent_data: Dict[Text, Any]) -> None:
        import rasa.shared.nlu.training_data.entities_parser as entities_parser
        import rasa.shared.nlu.training_data.synonyms_parser as synonyms_parser

        intent = intent_data.get(KEY_INTENT, "")
        if not intent:
            rasa.shared.utils.io.raise_warning(
                f"Issue found while processing '{self.filename}': "
                f"The intent has an empty name. "
                f"Intents should have a name defined under the {KEY_INTENT} key. "
                f"It will be skipped.",
                docs=DOCS_URL_TRAINING_DATA,
            )
            return

        examples = intent_data.get(KEY_INTENT_EXAMPLES, "")
        intent_metadata = intent_data.get(KEY_METADATA)
        for example, entities, metadata in self._parse_training_examples(
                examples, intent):

            plain_text = entities_parser.replace_entities(example)

            synonyms_parser.add_synonyms_from_entities(plain_text, entities,
                                                       self.entity_synonyms)

            self.training_examples.append(
                Message.build(plain_text, intent, entities, intent_metadata,
                              metadata))
def test_markdown_entity_regex(example: Text,
                               expected_entities: List[Dict[Text, Any]],
                               expected_text: Text):

    result = entities_parser.find_entities_in_training_example(example)
    assert result == expected_entities

    replaced_text = entities_parser.replace_entities(example)
    assert replaced_text == expected_text
示例#4
0
    def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]:
        intent_name = self._user_intent_from_step(step)
        intent = {"name": intent_name, "confidence": 1.0}

        if KEY_USER_MESSAGE in step:
            user_message = step[KEY_USER_MESSAGE].strip()
            entities = entities_parser.find_entities_in_training_example(user_message)
            plain_text = entities_parser.replace_entities(user_message)

            if plain_text.startswith(INTENT_MESSAGE_PREFIX):
                entities = (
                    RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, [])
                )
        else:
            raw_entities = step.get(KEY_ENTITIES, [])
            entities = self._parse_raw_entities(raw_entities)
            # set plain_text to None because only intent was provided in the stories
            plain_text = None
        return UserUttered(plain_text, intent, entities)
示例#5
0
dir_path = r"F:\Documents\stopansko\masters\thesis\sig-detect\data\clean\enron_random_clean_signatures"
full_d = []
for root, dirs, filenames in os.walk(dir_path):
    if ".idea" in root:
        continue
    for i, filename in enumerate(filenames):
        # d = defaultdict(list)
        file_features = []
        print(f"{i}. {filename} ...")
        with open(os.path.join(root, filename), encoding="utf-8") as f:
            lines = f.readlines()

        for line in lines:
            entities = find_entities_in_training_example(line)
            plain_text = replace_entities(line)
            doc = nlp(plain_text)
            for t in doc:
                low = t.orth_.lower()
                curr_d = {
                    "token": t.orth_,
                    "filename": filename,
                    "label": get_label(t.idx, t.orth_, entities),

                    "email": t.like_email,
                    "url": t.like_url,
                    "num": t.like_num,
                    "stop": t.is_stop,
                    "alpha": t.is_alpha,
                    "title": t.is_title,
                    "first": low in first_names,