def process(self, message: Message, **kwargs: Any) -> None:
        """Return the most likely intent and its probability for a message."""

        if not self.clf:
            # component is either not trained or didn't
            # receive enough training data
            intent = None
            intent_ranking = []
        else:
            X = message.get("text_features").reshape(1, -1)
            intent_ids, probabilities = self.predict(X)
            intents = self.transform_labels_num2str(np.ravel(intent_ids))
            # `predict` returns a matrix as it is supposed
            # to work for multiple examples as well, hence we need to flatten
            probabilities = probabilities.flatten()

            if intents.size > 0 and probabilities.size > 0:
                ranking = list(
                    zip(list(intents),
                        list(probabilities)))[:INTENT_RANKING_LENGTH]

                intent = {"name": intents[0], "confidence": probabilities[0]}

                intent_ranking = [{
                    "name": intent_name,
                    "confidence": score
                } for intent_name, score in ranking]
            else:
                intent = {"name": None, "confidence": 0.0}
                intent_ranking = []

        message.set("intent", intent, add_to_output=True)
        message.set("intent_ranking", intent_ranking, add_to_output=True)
Exemplo n.º 2
0
    def process(self, message: Message, **kwargs: Any) -> None:

        mitie_feature_extractor = self._mitie_feature_extractor(**kwargs)
        features = self.features_for_tokens(message.get("tokens"),
                                            mitie_feature_extractor)
        message.set("text_features",
                    self._combine_with_existing_text_features(message,
                                                              features))
Exemplo n.º 3
0
    def process(self, message: Message, **kwargs: Any) -> None:

        self._check_spacy_doc(message)

        extracted = self.add_extractor_name(self.extract_entities(message))
        message.set("entities",
                    message.get("entities", []) + extracted,
                    add_to_output=True)
Exemplo n.º 4
0
 def process(self, message: Message, **kwargs: Any) -> None:
     # can't use the existing doc here (spacy_doc on the message)
     # because tokens are lower cased which is bad for NER
     spacy_nlp = kwargs.get("spacy_nlp", None)
     doc = spacy_nlp(message.text)
     extracted = self.add_extractor_name(self.extract_entities(doc))
     message.set("entities",
                 message.get("entities", []) + extracted,
                 add_to_output=True)
Exemplo n.º 5
0
    def process(self, message: Message, **kwargs: Any) -> None:
        if self.vect is None:
            logger.error("There is no trained CountVectorizer: "
                         "component is either not trained or "
                         "didn't receive enough training data")
        else:
            message_text = self._get_message_text(message)

            bag = self.vect.transform([message_text]).toarray().squeeze()
            message.set(
                "text_features",
                self._combine_with_existing_text_features(message, bag))
    def process(self, message: Message, **kwargs: Any) -> None:

        mitie_feature_extractor = kwargs.get("mitie_feature_extractor")
        if not mitie_feature_extractor:
            raise Exception("Failed to train 'intent_featurizer_mitie'. "
                            "Missing a proper MITIE feature extractor.")

        ents = self.extract_entities(message.text, message.get("tokens"),
                                     mitie_feature_extractor)
        extracted = self.add_extractor_name(ents)
        message.set("entities",
                    message.get("entities", []) + extracted,
                    add_to_output=True)
Exemplo n.º 7
0
    def _parse_training_example(self, example):
        """Extract entities and synonyms, and convert to plain text."""
        from spawn_ai.training_data import Message

        entities = self._find_entities_in_training_example(example)
        plain_text = re.sub(ent_regex,
                            lambda m: m.groupdict()['entity_text'],
                            example)
        self._add_synonyms(plain_text, entities)
        message = Message(plain_text, {'intent': self.current_title})
        if len(entities) > 0:
            message.set('entities', entities)
        return message
    def process(self, message: Message, **kwargs: Any) -> None:

        if self._url() is not None:
            reference_time = self._reference_time_from_message(message)
            matches = self._duckling_parse(message.text, reference_time)
            dimensions = self.component_config["dimensions"]
            relevant_matches = filter_irrelevant_matches(matches, dimensions)
            extracted = convert_duckling_format_to_rasa(relevant_matches)
        else:
            extracted = []
            logger.warning("Duckling HTTP component in pipeline, but no "
                           "`url` configuration in the config "
                           "file nor is `RASA_DUCKLING_HTTP_URL` "
                           "set as an environment variable.")

        extracted = self.add_extractor_name(extracted)
        message.set("entities",
                    message.get("entities", []) + extracted,
                    add_to_output=True)
    def process(self, message: Message, **kwargs: Any) -> None:

        mitie_feature_extractor = kwargs.get("mitie_feature_extractor")
        if not mitie_feature_extractor:
            raise Exception("Failed to train 'intent_featurizer_mitie'. "
                            "Missing a proper MITIE feature extractor.")

        if self.clf:
            token_strs = self._tokens_of_message(message)
            intent, confidence = self.clf(token_strs, mitie_feature_extractor)
        else:
            # either the model didn't get trained or it wasn't
            # provided with any data
            intent = None
            confidence = 0.0

        message.set("intent", {
            "name": intent,
            "confidence": confidence
        },
                    add_to_output=True)
Exemplo n.º 10
0
    def process(self, message: Message, **kwargs: Any) -> None:

        message.set("tokens", self.tokenize(message.get("spacy_doc")))
Exemplo n.º 11
0
    def process(self, message: Message, **kwargs: Any) -> None:

        updated_entities = message.get("entities", [])[:]
        self.replace_synonyms(updated_entities)
        message.set("entities", updated_entities, add_to_output=True)
Exemplo n.º 12
0
    def process(self, message: Message, **kwargs: Any) -> None:

        intent = {"name": self.parse(message.text), "confidence": 1.0}
        message.set("intent", intent,
                    add_to_output=True)
Exemplo n.º 13
0
 def process(self, message: Message, **kwargs: Any) -> None:
     message.set("tokens", self.tokenize(message.text))
Exemplo n.º 14
0
    def process(self, message: Message, **kwargs: Any) -> None:

        updated = self._text_features_with_regex(message)
        message.set("text_features", updated)
Exemplo n.º 15
0
    def process(self, message: Message, **kwargs: Any):

        updated = self._text_features_with_ngrams(message,
                                                  self.best_num_ngrams)
        message.set("text_features", updated)
Exemplo n.º 16
0
    def process(self, message: Message, **kwargs: Any) -> None:

        message.set("spacy_doc", self.doc_for_text(message.text))