Exemplo n.º 1
0
    def train(
        self,
        training_data: TrainingData,
        config: Optional[RasaNLUModelConfig] = None,
        **kwargs: Any,
    ) -> None:
        """Trains the component with all patterns extracted from training data.

        Args:
            training_data: Training data consisting of training examples and patterns
                available.
            config: NLU Pipeline config
            **kwargs: Any other arguments
        """
        patterns_from_data = pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=self.component_config["use_lookup_tables"],
            use_regexes=self.component_config["use_regexes"],
            use_word_boundaries=self.component_config["use_word_boundaries"],
        )
        if self.finetune_mode:
            # Merge patterns extracted from data with known patterns
            self._merge_new_patterns(patterns_from_data)
        else:
            self.known_patterns = patterns_from_data

        for example in training_data.training_examples:
            for attribute in [TEXT, RESPONSE, ACTION_TEXT]:
                self._text_features_with_regex(example, attribute)
Exemplo n.º 2
0
def test_extract_patterns(
    lookup_tables: Dict[Text, List[Text]],
    regex_features: Dict[Text, Text],
    expected_patterns: Dict[Text, Text],
):
    training_data = TrainingData()
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]
    if regex_features:
        training_data.regex_features = [regex_features]

    actual_patterns = pattern_utils.extract_patterns(training_data)

    assert actual_patterns == expected_patterns
Exemplo n.º 3
0
def test_regex_validation(
    lookup_tables: Dict[Text, List[Text]],
    regex_features: Dict[Text, Text],
    use_lookup_tables: bool,
    use_regex_features: bool,
):
    """Tests if exception is raised when regex patterns are invalid."""

    training_data = TrainingData()
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]
    if regex_features:
        training_data.regex_features = [regex_features]

    with pytest.raises(Exception) as e:
        pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=use_lookup_tables,
            use_regexes=use_regex_features,
        )

    assert "Model training failed." in str(e.value)
    assert "not a valid regex." in str(e.value)
    assert "Please update your nlu training data configuration" in str(e.value)
Exemplo n.º 4
0
    def train(
        self,
        training_data: TrainingData,
        config: Optional[RasaNLUModelConfig] = None,
        **kwargs: Any,
    ) -> None:

        self.known_patterns = pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=self.component_config["use_lookup_tables"],
            use_regexes=self.component_config["use_regexes"],
        )

        for example in training_data.training_examples:
            for attribute in [TEXT, RESPONSE]:
                self._text_features_with_regex(example, attribute)
Exemplo n.º 5
0
    def train(self, training_data: TrainingData) -> Resource:
        """Trains the component with all patterns extracted from training data."""
        patterns_from_data = pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=self._config["use_lookup_tables"],
            use_regexes=self._config["use_regexes"],
            use_word_boundaries=self._config["use_word_boundaries"],
        )
        if self.finetune_mode:
            # Merge patterns extracted from data with known patterns
            self._merge_new_patterns(patterns_from_data)
        else:
            self.known_patterns = patterns_from_data

        self._persist()
        return self._resource
Exemplo n.º 6
0
    def train(
        self,
        training_data: TrainingData,
        config: Optional[RasaNLUModelConfig] = None,
        **kwargs: Any,
    ) -> None:
        self.patterns = pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=self.component_config["use_lookup_tables"],
            use_regexes=self.component_config["use_regexes"],
            use_only_entities=True,
        )

        if not self.patterns:
            rasa.shared.utils.io.raise_warning(
                "No lookup tables or regexes defined in the training data that have "
                "a name equal to any entity in the training data. In order for this "
                "component to work you need to define valid lookup tables or regexes "
                "in the training data.")
Exemplo n.º 7
0
def test_extract_patterns_use_only_entities_lookup_tables(
        entity: Text, lookup_tables: Dict[Text, Text],
        expected_patterns: Dict[Text, Text]):
    training_data = TrainingData()
    if entity:
        training_data.training_examples = [
            Message("text",
                    data={"entities": [{
                        "entity": entity,
                        "value": "text"
                    }]})
        ]
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]

    actual_patterns = pattern_utils.extract_patterns(training_data,
                                                     use_only_entities=True)

    assert actual_patterns == expected_patterns
Exemplo n.º 8
0
def test_extract_patterns_use_only_lookup_tables_or_regex_features(
    lookup_tables: Dict[Text, List[Text]],
    regex_features: Dict[Text, Text],
    use_lookup_tables: bool,
    use_regex_features: bool,
    expected_patterns: Dict[Text, Text],
):
    training_data = TrainingData()
    if lookup_tables:
        training_data.lookup_tables = [lookup_tables]
    if regex_features:
        training_data.regex_features = [regex_features]

    actual_patterns = pattern_utils.extract_patterns(
        training_data,
        use_lookup_tables=use_lookup_tables,
        use_regexes=use_regex_features,
    )

    assert actual_patterns == expected_patterns
Exemplo n.º 9
0
def test_extract_patterns_use_only_entities_regexes(
        entity: Text, regex_features: Dict[Text, Text],
        expected_patterns: Dict[Text, Text]):
    training_data = TrainingData()
    if entity:
        training_data.training_examples = [
            Message(data={
                "text": "text",
                "entities": [{
                    "entity": entity,
                    "value": "text"
                }]
            })
        ]
    if regex_features:
        training_data.regex_features = [regex_features]

    actual_patterns = pattern_utils.extract_patterns(training_data,
                                                     use_only_entities=True)

    assert actual_patterns == expected_patterns
Exemplo n.º 10
0
    def train(self, training_data: TrainingData) -> Resource:
        """Extract patterns from the training data.

        Args:
            training_data: the training data
        """
        self.patterns = pattern_utils.extract_patterns(
            training_data,
            use_lookup_tables=self._config["use_lookup_tables"],
            use_regexes=self._config["use_regexes"],
            use_only_entities=True,
            use_word_boundaries=self._config["use_word_boundaries"],
        )

        if not self.patterns:
            rasa.shared.utils.io.raise_warning(
                "No lookup tables or regexes defined in the training data that have "
                "a name equal to any entity in the training data. In order for this "
                "component to work you need to define valid lookup tables or regexes "
                "in the training data.")
        self.persist()
        return self._resource