示例#1
0
    def _set_lm_features(self,
                         message: Message,
                         attribute: Text = TEXT) -> None:
        """Adds the precomputed word vectors to the messages features."""
        doc = self._get_doc(message, attribute)

        if doc is None:
            return

        sequence_features = doc[SEQUENCE_FEATURES]
        sentence_features = doc[SENTENCE_FEATURES]

        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)
        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
    def _set_attribute_features(
        self,
        attribute: Text,
        sequence_features: List[scipy.sparse.spmatrix],
        sentence_features: List[scipy.sparse.spmatrix],
        examples: List[Message],
    ) -> None:
        """Set computed features of the attribute to corresponding message objects"""
        for i, message in enumerate(examples):
            # create bag for each example
            if sequence_features[i] is not None:
                final_sequence_features = Features(
                    sequence_features[i],
                    FEATURE_TYPE_SEQUENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sequence_features)

            if sentence_features[i] is not None:
                final_sentence_features = Features(
                    sentence_features[i],
                    FEATURE_TYPE_SENTENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sentence_features)
示例#3
0
    def _set_spacy_features(self,
                            message: Message,
                            attribute: Text = TEXT) -> None:
        """Adds the spacy word vectors to the messages features."""
        doc = self.get_doc(message, attribute)

        if doc is None:
            return

        # in case an empty spaCy model was used, no vectors are present
        if doc.vocab.vectors_length == 0:
            logger.debug(
                "No features present. You are using an empty spaCy model.")
            return

        sequence_features = self._features_for_doc(doc)
        sentence_features = self._calculate_sentence_features(
            sequence_features, self.pooling_operation)

        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)
        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
示例#4
0
def test_combine_with_existing_dense_features_shape_mismatch():
    existing_features = Features(np.array([[1, 0, 2, 3], [2, 0, 0, 1]]),
                                 FEATURE_TYPE_SEQUENCE, TEXT, "test")
    new_features = np.array([[0, 1]])

    with pytest.raises(ValueError):
        existing_features.combine_with_features(new_features)
示例#5
0
def test_combine_with_existing_dense_features():
    existing_features = Features(np.array([[1, 0, 2, 3], [2, 0, 0, 1]]), TEXT, "test")
    new_features = np.array([[1, 0], [0, 1]])
    expected_features = np.array([[1, 0, 2, 3, 1, 0], [2, 0, 0, 1, 0, 1]])

    actual_features = existing_features.combine_with_features(new_features)

    assert np.all(expected_features == actual_features)
示例#6
0
def test_combine_with_existing_sparse_features_shape_mismatch():
    existing_features = Features(
        scipy.sparse.csr_matrix([[1, 0, 2, 3], [2, 0, 0, 1]]), TEXT, "test"
    )
    new_features = scipy.sparse.csr_matrix([[0, 1]])

    with pytest.raises(ValueError):
        existing_features.combine_with_features(new_features)
示例#7
0
def test_combine_with_existing_sparse_features():
    existing_features = Features(
        scipy.sparse.csr_matrix([[1, 0, 2, 3], [2, 0, 0, 1]]), TEXT, "test"
    )
    new_features = scipy.sparse.csr_matrix([[1, 0], [0, 1]])
    expected_features = [[1, 0, 2, 3, 1, 0], [2, 0, 0, 1, 0, 1]]

    actual_features = existing_features.combine_with_features(new_features)
    actual_features = actual_features.toarray()

    assert np.all(expected_features == actual_features)
示例#8
0
    def process(self, message: Message, **kwargs: Any) -> None:
        mitie_feature_extractor = self._mitie_feature_extractor(**kwargs)
        tokens = train_utils.tokens_without_cls(message)
        features = self.features_for_tokens(tokens, mitie_feature_extractor)

        final_features = Features(
            features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS])
        message.add_features(final_features)
示例#9
0
    def process(
        self, message: Message, *, tf_hub_module: Any = None, **kwargs: Any
    ) -> None:
        features = self._compute_features([message], tf_hub_module)[0]

        final_features = Features(
            features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS]
        )
        message.add_features(final_features)
示例#10
0
    def _text_features_with_regex(self, message: Message,
                                  attribute: Text) -> None:
        if self.known_patterns:
            features = self._features_for_patterns(message, attribute)

            if features is not None:
                final_features = Features(
                    features, attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS])
                message.add_features(final_features)
示例#11
0
    def process_training_example(self, example: Message, attribute: Text,
                                 mitie_feature_extractor: Any):
        tokens = train_utils.tokens_without_cls(example, attribute)

        if tokens is not None:
            features = self.features_for_tokens(tokens,
                                                mitie_feature_extractor)

            final_features = Features(
                features, attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS])
            example.add_features(final_features)
 def _set_attribute_features(self, attribute: Text,
                             attribute_features: List,
                             training_data: TrainingData) -> None:
     """Set computed features of the attribute to corresponding message objects"""
     for i, message in enumerate(training_data.training_examples):
         # create bag for each example
         if attribute_features[i] is not None:
             final_features = Features(
                 attribute_features[i],
                 attribute,
                 self.component_config[FEATURIZER_CLASS_ALIAS],
             )
             message.add_features(final_features)
示例#13
0
    def _set_features(
        self,
        message: Message,
        sequence_features: np.ndarray,
        sentence_features: np.ndarray,
        attribute: Text,
    ):
        final_sequence_features = Features(
            sequence_features,
            FEATURE_TYPE_SEQUENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sequence_features)

        final_sentence_features = Features(
            sentence_features,
            FEATURE_TYPE_SENTENCE,
            attribute,
            self.component_config[FEATURIZER_CLASS_ALIAS],
        )
        message.add_features(final_sentence_features)
示例#14
0
    def _text_features_with_regex(self, message: Message, attribute: Text) -> None:
        if self.known_patterns:
            sequence_features, sentence_features = self._features_for_patterns(
                message, attribute
            )

            if sequence_features is not None:
                final_sequence_features = Features(
                    sequence_features,
                    FEATURE_TYPE_SEQUENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sequence_features)

            if sentence_features is not None:
                final_sentence_features = Features(
                    sentence_features,
                    FEATURE_TYPE_SENTENCE,
                    attribute,
                    self.component_config[FEATURIZER_CLASS_ALIAS],
                )
                message.add_features(final_sentence_features)
示例#15
0
    def _set_features(
        self,
        examples: List[Message],
        sequence_features: np.ndarray,
        sentence_features: np.ndarray,
        attribute: Text,
    ) -> None:
        for index, example in enumerate(examples):
            _sequence_features = Features(
                sequence_features[index],
                FEATURE_TYPE_SEQUENCE,
                attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS],
            )
            example.add_features(_sequence_features)

            _sentence_features = Features(
                sentence_features[index],
                FEATURE_TYPE_SENTENCE,
                attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS],
            )
            example.add_features(_sentence_features)
示例#16
0
    def train(
        self,
        training_data: TrainingData,
        config: Optional[RasaNLUModelConfig] = None,
        *,
        tf_hub_module: Any = None,
        **kwargs: Any,
    ) -> None:
        if config is not None and config.language != "en":
            common_utils.raise_warning(
                f"Since ``ConveRT`` model is trained only on an english "
                f"corpus of conversations, this featurizer should only be "
                f"used if your training data is in english language. "
                f"However, you are training in '{config.language}'. ",
                docs=DOCS_URL_COMPONENTS + "#convertfeaturizer",
            )

        batch_size = 64

        for attribute in DENSE_FEATURIZABLE_ATTRIBUTES:

            non_empty_examples = list(
                filter(lambda x: x.get(attribute), training_data.training_examples)
            )

            progress_bar = tqdm(
                range(0, len(non_empty_examples), batch_size),
                desc=attribute.capitalize() + " batches",
            )
            for batch_start_index in progress_bar:
                batch_end_index = min(
                    batch_start_index + batch_size, len(non_empty_examples)
                )

                # Collect batch examples
                batch_examples = non_empty_examples[batch_start_index:batch_end_index]

                batch_features = self._compute_features(
                    batch_examples, tf_hub_module, attribute
                )

                for index, ex in enumerate(batch_examples):
                    features = Features(
                        batch_features[index],
                        attribute,
                        self.component_config[FEATURIZER_CLASS_ALIAS],
                    )
                    ex.add_features(features)
    def _create_sparse_features(self, message: Message) -> None:
        """Convert incoming messages into sparse features using the configured
        features."""
        import scipy.sparse

        # [:-1] to remove CLS token
        tokens = message.get(TOKENS_NAMES[TEXT])[:-1]

        sentence_features = self._tokens_to_features(tokens)
        one_hot_feature_vector = self._features_to_one_hot(sentence_features)

        sparse_features = scipy.sparse.coo_matrix(one_hot_feature_vector)

        final_features = Features(
            sparse_features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS]
        )
        message.add_features(final_features)
    def process(self, message: Message, **kwargs: Any) -> None:
        """Process incoming message and compute and set features"""

        if self.vectorizers is None:
            logger.error("There is no trained CountVectorizer: "
                         "component is either not trained or "
                         "didn't receive enough training data")
            return

        attribute = TEXT
        message_tokens = self._get_processed_message_tokens_by_attribute(
            message, attribute)

        # features shape (1, seq, dim)
        features = self._create_sequence(attribute, [message_tokens])

        if features[0] is not None:
            final_features = Features(
                features[0], attribute,
                self.component_config[FEATURIZER_CLASS_ALIAS])
            message.add_features(final_features)
示例#19
0
    def _set_spacy_features(self,
                            message: Message,
                            attribute: Text = TEXT) -> None:
        """Adds the spacy word vectors to the messages features."""
        doc = self.get_doc(message, attribute)

        if doc is None:
            return

        # in case an empty spaCy model was used, no vectors are present
        if doc.vocab.vectors_length == 0:
            logger.debug(
                "No features present. You are using an empty spaCy model.")
            return

        features = self._features_for_doc(doc)

        cls_token_vec = self._calculate_cls_vector(features,
                                                   self.pooling_operation)
        features = np.concatenate([features, cls_token_vec])

        final_features = Features(
            features, attribute, self.component_config[FEATURIZER_CLASS_ALIAS])
        message.add_features(final_features)
示例#20
0
    for i, o in enumerate(output):
        assert isinstance(o, np.ndarray)
        assert o[0][i] == 1
        assert o.shape == (1, len(label_features))


@pytest.mark.parametrize(
    "messages, expected",
    [
        (
            [
                Message(
                    "test a",
                    features=[
                        Features(np.zeros(1), FEATURE_TYPE_SEQUENCE, TEXT, "test"),
                        Features(np.zeros(1), FEATURE_TYPE_SENTENCE, TEXT, "test"),
                    ],
                ),
                Message(
                    "test b",
                    features=[
                        Features(np.zeros(1), FEATURE_TYPE_SEQUENCE, TEXT, "test"),
                        Features(np.zeros(1), FEATURE_TYPE_SENTENCE, TEXT, "test"),
                    ],
                ),
            ],
            True,
        ),
        (
            [
示例#21
0
from typing import Optional, Text, List

import pytest
import numpy as np
import scipy.sparse

from rasa.nlu.featurizers.featurizer import Features
from rasa.nlu.constants import TEXT
from rasa.nlu.training_data import Message


@pytest.mark.parametrize(
    "features, attribute, featurizers, expected_features",
    [
        (None, TEXT, [], None),
        ([Features(np.array([1, 1, 0]), TEXT, "test")], TEXT, [], [1, 1, 0]),
        (
            [
                Features(np.array([1, 1, 0]), TEXT, "c2"),
                Features(np.array([1, 2, 2]), TEXT, "c1"),
                Features(np.array([1, 2, 1]), TEXT, "c1"),
            ],
            TEXT,
            [],
            [1, 2, 1, 1, 2, 2, 1, 1, 0],
        ),
        (
            [
                Features(np.array([1, 1, 0]), TEXT, "c1"),
                Features(np.array([1, 2, 1]), TEXT, "test"),
                Features(np.array([1, 1, 1]), TEXT, "test"),
示例#22
0
import pytest
import numpy as np
import scipy.sparse

from rasa.nlu.featurizers.featurizer import Features
from rasa.nlu.constants import TEXT, FEATURE_TYPE_SEQUENCE, FEATURE_TYPE_SENTENCE
from rasa.nlu.training_data import Message


@pytest.mark.parametrize(
    "features, attribute, featurizers, expected_seq_features, expected_sen_features",
    [
        (None, TEXT, [], None, None),
        (
            [Features(np.array([1, 1, 0]), FEATURE_TYPE_SEQUENCE, TEXT, "test")],
            TEXT,
            [],
            [1, 1, 0],
            None,
        ),
        (
            [
                Features(np.array([1, 1, 0]), FEATURE_TYPE_SEQUENCE, TEXT, "c2"),
                Features(np.array([1, 2, 2]), FEATURE_TYPE_SENTENCE, TEXT, "c1"),
                Features(np.array([1, 2, 1]), FEATURE_TYPE_SEQUENCE, TEXT, "c1"),
            ],
            TEXT,
            [],
            [1, 2, 1, 1, 1, 0],
            [1, 2, 2],