def validate_required_components_from_data( pipeline: List["Component"], data: TrainingData ) -> None: """Validates that all components are present in the pipeline based on data. Args: pipeline: The list of the :class:`rasa.nlu.components.Component`s. data: The :class:`rasa.shared.nlu.training_data.training_data.TrainingData`. """ if data.response_examples and not any_components_in_pipeline( ["ResponseSelector"], pipeline ): rasa.shared.utils.io.raise_warning( "You have defined training data with examples for training a response " "selector, but your NLU pipeline does not include a response selector " "component. To train a model on your response selector data, add a " "'ResponseSelector' to your pipeline." ) if data.entity_examples and not any_components_in_pipeline( TRAINABLE_EXTRACTORS, pipeline ): rasa.shared.utils.io.raise_warning( "You have defined training data consisting of entity examples, but " "your NLU pipeline does not include an entity extractor trained on " "your training data. To extract non-pretrained entities, add one of " f"{TRAINABLE_EXTRACTORS} to your pipeline." ) if data.entity_examples and not any_components_in_pipeline( {"DIETClassifier", "CRFEntityExtractor"}, pipeline ): if data.entity_roles_groups_used(): rasa.shared.utils.io.raise_warning( "You have defined training data with entities that have roles/groups, " "but your NLU pipeline does not include a 'DIETClassifier' or a " "'CRFEntityExtractor'. To train entities that have roles/groups, " "add either 'DIETClassifier' or 'CRFEntityExtractor' to your " "pipeline." ) if data.regex_features and not any_components_in_pipeline( ["RegexFeaturizer", "RegexEntityExtractor"], pipeline ): rasa.shared.utils.io.raise_warning( "You have defined training data with regexes, but " "your NLU pipeline does not include a 'RegexFeaturizer' or a " "'RegexEntityExtractor'. To use regexes, include either a " "'RegexFeaturizer' or a 'RegexEntityExtractor' in your pipeline." ) if data.lookup_tables and not any_components_in_pipeline( ["RegexFeaturizer", "RegexEntityExtractor"], pipeline ): rasa.shared.utils.io.raise_warning( "You have defined training data consisting of lookup tables, but " "your NLU pipeline does not include a 'RegexFeaturizer' or a " "'RegexEntityExtractor'. To use lookup tables, include either a " "'RegexFeaturizer' or a 'RegexEntityExtractor' in your pipeline." ) if data.lookup_tables: if not any_components_in_pipeline( ["CRFEntityExtractor", "DIETClassifier"], pipeline ): rasa.shared.utils.io.raise_warning( "You have defined training data consisting of lookup tables, but " "your NLU pipeline does not include any components that use these " "features. To make use of lookup tables, add a 'DIETClassifier' or a " "'CRFEntityExtractor' with the 'pattern' feature to your pipeline." ) elif any_components_in_pipeline(["CRFEntityExtractor"], pipeline): crf_components = [c for c in pipeline if c.name == "CRFEntityExtractor"] # check to see if any of the possible CRFEntityExtractors will # featurize `pattern` has_pattern_feature = False for crf in crf_components: crf_features = crf.component_config.get("features") # iterate through [[before],[word],[after]] features has_pattern_feature = "pattern" in itertools.chain(*crf_features) if not has_pattern_feature: rasa.shared.utils.io.raise_warning( "You have defined training data consisting of lookup tables, but " "your NLU pipeline's 'CRFEntityExtractor' does not include the " "'pattern' feature. To featurize lookup tables, add the 'pattern' " "feature to the 'CRFEntityExtractor' in your pipeline." ) if data.entity_synonyms and not any_components_in_pipeline( ["EntitySynonymMapper"], pipeline ): rasa.shared.utils.io.raise_warning( "You have defined synonyms in your training data, but " "your NLU pipeline does not include an 'EntitySynonymMapper'. " "To map synonyms, add an 'EntitySynonymMapper' to your pipeline." )
def _warn_if_some_training_data_is_unused( self, training_data: TrainingData) -> None: """Validates that all training data will be consumed by some component. For example, if you specify response examples in your training data, but there is no `ResponseSelector` component in your configuration, then this method issues a warning. Args: training_data: The training data for the NLU components. """ if (training_data.response_examples and ResponseSelector not in self._component_types): rasa.shared.utils.io.raise_warning( f"You have defined training data with examples for training a response " f"selector, but your NLU configuration does not include a response " f"selector component. " f"To train a model on your response selector data, add a " f"'{ResponseSelector.__name__}' to your configuration.", docs=DOCS_URL_COMPONENTS, ) if training_data.entity_examples and self._component_types.isdisjoint( TRAINABLE_EXTRACTORS): rasa.shared.utils.io.raise_warning( f"You have defined training data consisting of entity examples, but " f"your NLU configuration does not include an entity extractor " f"trained on your training data. " f"To extract non-pretrained entities, add one of " f"{_types_to_str(TRAINABLE_EXTRACTORS)} to your configuration.", docs=DOCS_URL_COMPONENTS, ) if training_data.entity_examples and self._component_types.isdisjoint( {DIETClassifier, CRFEntityExtractor}): if training_data.entity_roles_groups_used(): rasa.shared.utils.io.raise_warning( f"You have defined training data with entities that " f"have roles/groups, but your NLU configuration does not " f"include a '{DIETClassifier.__name__}' " f"or a '{CRFEntityExtractor.__name__}'. " f"To train entities that have roles/groups, " f"add either '{DIETClassifier.__name__}' " f"or '{CRFEntityExtractor.__name__}' to your " f"configuration.", docs=DOCS_URL_COMPONENTS, ) if training_data.regex_features and self._component_types.isdisjoint( [RegexFeaturizer, RegexEntityExtractor]): rasa.shared.utils.io.raise_warning( f"You have defined training data with regexes, but " f"your NLU configuration does not include a 'RegexFeaturizer' " f" or a " f"'RegexEntityExtractor'. To use regexes, include either a " f"'{RegexFeaturizer.__name__}' or a " f"'{RegexEntityExtractor.__name__}' " f"in your configuration.", docs=DOCS_URL_COMPONENTS, ) if training_data.lookup_tables and self._component_types.isdisjoint( [RegexFeaturizer, RegexEntityExtractor]): rasa.shared.utils.io.raise_warning( f"You have defined training data consisting of lookup tables, but " f"your NLU configuration does not include a featurizer " f"or an entity extractor using the lookup table." f"To use the lookup tables, include either a " f"'{RegexFeaturizer.__name__}' " f"or a '{RegexEntityExtractor.__name__}' " f"in your configuration.", docs=DOCS_URL_COMPONENTS, ) if training_data.lookup_tables: if self._component_types.isdisjoint( [CRFEntityExtractor, DIETClassifier]): rasa.shared.utils.io.raise_warning( f"You have defined training data consisting of lookup tables, but " f"your NLU configuration does not include any components " f"that uses the features created from the lookup table. " f"To make use of the features that are created with the " f"help of the lookup tables, " f"add a '{DIETClassifier.__name__}' or a " f"'{CRFEntityExtractor.__name__}' " f"with the 'pattern' feature " f"to your configuration.", docs=DOCS_URL_COMPONENTS, ) elif CRFEntityExtractor in self._component_types: crf_schema_nodes = [ schema_node for schema_node in self._graph_schema.nodes.values() if schema_node.uses == CRFEntityExtractor ] has_pattern_feature = any( CRFEntityExtractorOptions.PATTERN in feature_list for crf in crf_schema_nodes for feature_list in crf.config.get("features", [])) if not has_pattern_feature: rasa.shared.utils.io.raise_warning( f"You have defined training data consisting of " f"lookup tables, but your NLU configuration's " f"'{CRFEntityExtractor.__name__}' " f"does not include the " f"'pattern' feature. To featurize lookup tables, " f"add the 'pattern' feature to the " f"'{CRFEntityExtractor.__name__}' " "in your configuration.", docs=DOCS_URL_COMPONENTS, ) if (training_data.entity_synonyms and EntitySynonymMapper not in self._component_types): rasa.shared.utils.io.raise_warning( f"You have defined synonyms in your training data, but " f"your NLU configuration does not include an " f"'{EntitySynonymMapper.__name__}'. " f"To map synonyms, add an " f"'{EntitySynonymMapper.__name__}' to your " f"configuration.", docs=DOCS_URL_COMPONENTS, )