示例#1
0
def validate_required_components_from_data(
    pipeline: List["Component"], data: TrainingData
) -> None:
    """Validates that all components are present in the pipeline based on data.

    Args:
        pipeline: The list of the :class:`rasa.nlu.components.Component`s.
        data: The :class:`rasa.shared.nlu.training_data.training_data.TrainingData`.
    """

    if data.response_examples and not any_components_in_pipeline(
        ["ResponseSelector"], pipeline
    ):
        rasa.shared.utils.io.raise_warning(
            "You have defined training data with examples for training a response "
            "selector, but your NLU pipeline does not include a response selector "
            "component. To train a model on your response selector data, add a "
            "'ResponseSelector' to your pipeline."
        )

    if data.entity_examples and not any_components_in_pipeline(
        TRAINABLE_EXTRACTORS, pipeline
    ):
        rasa.shared.utils.io.raise_warning(
            "You have defined training data consisting of entity examples, but "
            "your NLU pipeline does not include an entity extractor trained on "
            "your training data. To extract non-pretrained entities, add one of "
            f"{TRAINABLE_EXTRACTORS} to your pipeline."
        )

    if data.entity_examples and not any_components_in_pipeline(
        {"DIETClassifier", "CRFEntityExtractor"}, pipeline
    ):
        if data.entity_roles_groups_used():
            rasa.shared.utils.io.raise_warning(
                "You have defined training data with entities that have roles/groups, "
                "but your NLU pipeline does not include a 'DIETClassifier' or a "
                "'CRFEntityExtractor'. To train entities that have roles/groups, "
                "add either 'DIETClassifier' or 'CRFEntityExtractor' to your "
                "pipeline."
            )

    if data.regex_features and not any_components_in_pipeline(
        ["RegexFeaturizer", "RegexEntityExtractor"], pipeline
    ):
        rasa.shared.utils.io.raise_warning(
            "You have defined training data with regexes, but "
            "your NLU pipeline does not include a 'RegexFeaturizer' or a "
            "'RegexEntityExtractor'. To use regexes, include either a "
            "'RegexFeaturizer' or a 'RegexEntityExtractor' in your pipeline."
        )

    if data.lookup_tables and not any_components_in_pipeline(
        ["RegexFeaturizer", "RegexEntityExtractor"], pipeline
    ):
        rasa.shared.utils.io.raise_warning(
            "You have defined training data consisting of lookup tables, but "
            "your NLU pipeline does not include a 'RegexFeaturizer' or a "
            "'RegexEntityExtractor'. To use lookup tables, include either a "
            "'RegexFeaturizer' or a 'RegexEntityExtractor' in your pipeline."
        )

    if data.lookup_tables:
        if not any_components_in_pipeline(
            ["CRFEntityExtractor", "DIETClassifier"], pipeline
        ):
            rasa.shared.utils.io.raise_warning(
                "You have defined training data consisting of lookup tables, but "
                "your NLU pipeline does not include any components that use these "
                "features. To make use of lookup tables, add a 'DIETClassifier' or a "
                "'CRFEntityExtractor' with the 'pattern' feature to your pipeline."
            )
        elif any_components_in_pipeline(["CRFEntityExtractor"], pipeline):
            crf_components = [c for c in pipeline if c.name == "CRFEntityExtractor"]
            # check to see if any of the possible CRFEntityExtractors will
            # featurize `pattern`
            has_pattern_feature = False
            for crf in crf_components:
                crf_features = crf.component_config.get("features")
                # iterate through [[before],[word],[after]] features
                has_pattern_feature = "pattern" in itertools.chain(*crf_features)

            if not has_pattern_feature:
                rasa.shared.utils.io.raise_warning(
                    "You have defined training data consisting of lookup tables, but "
                    "your NLU pipeline's 'CRFEntityExtractor' does not include the "
                    "'pattern' feature. To featurize lookup tables, add the 'pattern' "
                    "feature to the 'CRFEntityExtractor' in your pipeline."
                )

    if data.entity_synonyms and not any_components_in_pipeline(
        ["EntitySynonymMapper"], pipeline
    ):
        rasa.shared.utils.io.raise_warning(
            "You have defined synonyms in your training data, but "
            "your NLU pipeline does not include an 'EntitySynonymMapper'. "
            "To map synonyms, add an 'EntitySynonymMapper' to your pipeline."
        )
    def _warn_if_some_training_data_is_unused(
            self, training_data: TrainingData) -> None:
        """Validates that all training data will be consumed by some component.

        For example, if you specify response examples in your training data, but there
        is no `ResponseSelector` component in your configuration, then this method
        issues a warning.

        Args:
            training_data: The training data for the NLU components.
        """
        if (training_data.response_examples
                and ResponseSelector not in self._component_types):
            rasa.shared.utils.io.raise_warning(
                f"You have defined training data with examples for training a response "
                f"selector, but your NLU configuration does not include a response "
                f"selector component. "
                f"To train a model on your response selector data, add a "
                f"'{ResponseSelector.__name__}' to your configuration.",
                docs=DOCS_URL_COMPONENTS,
            )

        if training_data.entity_examples and self._component_types.isdisjoint(
                TRAINABLE_EXTRACTORS):
            rasa.shared.utils.io.raise_warning(
                f"You have defined training data consisting of entity examples, but "
                f"your NLU configuration does not include an entity extractor "
                f"trained on your training data. "
                f"To extract non-pretrained entities, add one of "
                f"{_types_to_str(TRAINABLE_EXTRACTORS)} to your configuration.",
                docs=DOCS_URL_COMPONENTS,
            )

        if training_data.entity_examples and self._component_types.isdisjoint(
            {DIETClassifier, CRFEntityExtractor}):
            if training_data.entity_roles_groups_used():
                rasa.shared.utils.io.raise_warning(
                    f"You have defined training data with entities that "
                    f"have roles/groups, but your NLU configuration does not "
                    f"include a '{DIETClassifier.__name__}' "
                    f"or a '{CRFEntityExtractor.__name__}'. "
                    f"To train entities that have roles/groups, "
                    f"add either '{DIETClassifier.__name__}' "
                    f"or '{CRFEntityExtractor.__name__}' to your "
                    f"configuration.",
                    docs=DOCS_URL_COMPONENTS,
                )

        if training_data.regex_features and self._component_types.isdisjoint(
            [RegexFeaturizer, RegexEntityExtractor]):
            rasa.shared.utils.io.raise_warning(
                f"You have defined training data with regexes, but "
                f"your NLU configuration does not include a 'RegexFeaturizer' "
                f" or a "
                f"'RegexEntityExtractor'. To use regexes, include either a "
                f"'{RegexFeaturizer.__name__}' or a "
                f"'{RegexEntityExtractor.__name__}' "
                f"in your configuration.",
                docs=DOCS_URL_COMPONENTS,
            )

        if training_data.lookup_tables and self._component_types.isdisjoint(
            [RegexFeaturizer, RegexEntityExtractor]):
            rasa.shared.utils.io.raise_warning(
                f"You have defined training data consisting of lookup tables, but "
                f"your NLU configuration does not include a featurizer "
                f"or an entity extractor using the lookup table."
                f"To use the lookup tables, include either a "
                f"'{RegexFeaturizer.__name__}' "
                f"or a '{RegexEntityExtractor.__name__}' "
                f"in your configuration.",
                docs=DOCS_URL_COMPONENTS,
            )

        if training_data.lookup_tables:

            if self._component_types.isdisjoint(
                [CRFEntityExtractor, DIETClassifier]):
                rasa.shared.utils.io.raise_warning(
                    f"You have defined training data consisting of lookup tables, but "
                    f"your NLU configuration does not include any components "
                    f"that uses the features created from the lookup table. "
                    f"To make use of the features that are created with the "
                    f"help of the lookup tables, "
                    f"add a '{DIETClassifier.__name__}' or a "
                    f"'{CRFEntityExtractor.__name__}' "
                    f"with the 'pattern' feature "
                    f"to your configuration.",
                    docs=DOCS_URL_COMPONENTS,
                )

            elif CRFEntityExtractor in self._component_types:

                crf_schema_nodes = [
                    schema_node
                    for schema_node in self._graph_schema.nodes.values()
                    if schema_node.uses == CRFEntityExtractor
                ]
                has_pattern_feature = any(
                    CRFEntityExtractorOptions.PATTERN in feature_list
                    for crf in crf_schema_nodes
                    for feature_list in crf.config.get("features", []))

                if not has_pattern_feature:
                    rasa.shared.utils.io.raise_warning(
                        f"You have defined training data consisting of "
                        f"lookup tables, but your NLU configuration's "
                        f"'{CRFEntityExtractor.__name__}' "
                        f"does not include the "
                        f"'pattern' feature. To featurize lookup tables, "
                        f"add the 'pattern' feature to the "
                        f"'{CRFEntityExtractor.__name__}' "
                        "in your configuration.",
                        docs=DOCS_URL_COMPONENTS,
                    )

        if (training_data.entity_synonyms
                and EntitySynonymMapper not in self._component_types):
            rasa.shared.utils.io.raise_warning(
                f"You have defined synonyms in your training data, but "
                f"your NLU configuration does not include an "
                f"'{EntitySynonymMapper.__name__}'. "
                f"To map synonyms, add an "
                f"'{EntitySynonymMapper.__name__}' to your "
                f"configuration.",
                docs=DOCS_URL_COMPONENTS,
            )