Exemplo n.º 1
0
    def _set_lm_features(self,
                         message: Message,
                         attribute: Text = TEXT) -> None:
        """Adds the precomputed word vectors to the messages features."""
        doc = self._get_doc(message, attribute)

        if doc is None:
            return

        sequence_features = doc[SEQUENCE_FEATURES]
        sentence_features = doc[SENTENCE_FEATURES]

        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)
        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
Exemplo n.º 2
0
    def _set_spacy_features(self,
                            message: Message,
                            attribute: Text = TEXT) -> None:
        """Adds the spacy word vectors to the messages features."""
        doc = self.get_doc(message, attribute)

        if doc is None:
            return

        # in case an empty spaCy model was used, no vectors are present
        if doc.vocab.vectors_length == 0:
            logger.debug(
                "No features present. You are using an empty spaCy model.")
            return

        sequence_features = self._features_for_doc(doc)
        sentence_features = self._calculate_sentence_features(
            sequence_features, self.pooling_operation)

        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)
        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
Exemplo n.º 3
0
    def process(self, message: Message, **kwargs: Any) -> None:
        mitie_feature_extractor = self._mitie_feature_extractor(**kwargs)
        tokens = train_utils.tokens_without_cls(message)
        features = self.features_for_tokens(tokens, mitie_feature_extractor)

        final_features = Features(
            features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS])
        message.add_features(final_features)
Exemplo n.º 4
0
    def process(
        self, message: Message, *, tf_hub_module: Any = None, **kwargs: Any
    ) -> None:
        features = self._compute_features([message], tf_hub_module)[0]

        final_features = Features(
            features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS]
        )
        message.add_features(final_features)
Exemplo n.º 5
0
    def _text_features_with_regex(self, message: Message,
                                  attribute: Text) -> None:
        if self.known_patterns:
            features = self._features_for_patterns(message, attribute)

            if features is not None:
                final_features = Features(
                    features, attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS])
                message.add_features(final_features)
Exemplo n.º 6
0
    def process_training_example(self, example: Message, attribute: Text,
                                 mitie_feature_extractor: Any):
        tokens = train_utils.tokens_without_cls(example, attribute)

        if tokens is not None:
            features = self.features_for_tokens(tokens,
                                                mitie_feature_extractor)

            final_features = Features(
                features, attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS])
            example.add_features(final_features)
    def _create_sparse_features(self, message: Message) -> None:
        """Convert incoming messages into sparse features using the configured
        features."""
        import scipy.sparse

        # [:-1] to remove CLS token
        tokens = message.get(TOKENS_NAMES[TEXT])[:-1]

        sentence_features = self._tokens_to_features(tokens)
        one_hot_feature_vector = self._features_to_one_hot(sentence_features)

        sparse_features = scipy.sparse.coo_matrix(one_hot_feature_vector)

        final_features = Features(
            sparse_features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS]
        )
        message.add_features(final_features)
Exemplo n.º 8
0
    def process(self, message: Message, **kwargs: Any) -> None:
        """Process incoming message and compute and set features"""

        if self.vectorizers is None:
            logger.error("There is no trained CountVectorizer: "
                         "component is either not trained or "
                         "didn't receive enough training data")
            return

        attribute = TEXT
        message_tokens = self._get_processed_message_tokens_by_attribute(
            message, attribute)

        # features shape (1, seq, dim)
        features = self._create_sequence(attribute, [message_tokens])

        if features[0] is not None:
            final_features = Features(
                features[0], attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS])
            message.add_features(final_features)
Exemplo n.º 9
0
    def _set_features(
        self,
        message: Message,
        sequence_features: np.ndarray,
        sentence_features: np.ndarray,
        attribute: Text,
    ):
        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)

        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
Exemplo n.º 10
0
    def _text_features_with_regex(self, message: Message, attribute: Text) -> None:
        if self.known_patterns:
            sequence_features, sentence_features = self._features_for_patterns(
                message, attribute
            )

            if sequence_features is not None:
                final_sequence_features = Features(
                    sequence_features,
                    FEATURE_TYPE_SEQUENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sequence_features)

            if sentence_features is not None:
                final_sentence_features = Features(
                    sentence_features,
                    FEATURE_TYPE_SENTENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sentence_features)
Exemplo n.º 11
0
    def _set_spacy_features(self,
                            message: Message,
                            attribute: Text = TEXT) -> None:
        """Adds the spacy word vectors to the messages features."""
        doc = self.get_doc(message, attribute)

        if doc is None:
            return

        # in case an empty spaCy model was used, no vectors are present
        if doc.vocab.vectors_length == 0:
            logger.debug(
                "No features present. You are using an empty spaCy model.")
            return

        features = self._features_for_doc(doc)

        cls_token_vec = self._calculate_cls_vector(features,
                                                   self.pooling_operation)
        features = np.concatenate([features, cls_token_vec])

        final_features = Features(
            features, attribute, self.component_config[FEATURIZER_CLASS_ALIAS])
        message.add_features(final_features)