Пример #1
0
    def create(
        cls,
        config: Dict[Text, Any],
        model_storage: ModelStorage,
        resource: Resource,
        execution_context: ExecutionContext,
    ) -> MitieNLP:
        """Creates component (see parent class for full docstring)."""
        import mitie

        model_file = config.get("model")
        if not model_file:
            raise InvalidConfigException(
                "The MITIE component 'MitieNLP' needs "
                "the configuration value for 'model'."
                "Please take a look at the "
                "documentation in the pipeline section "
                "to get more info about this "
                "parameter."
            )
        if not Path(model_file).is_file():
            raise InvalidConfigException(
                "The model file configured in the MITIE "
                "component cannot be found. "
                "Please ensure the directory path and/or "
                "filename, '{}', are correct.".format(model_file)
            )
        extractor = mitie.total_word_feature_extractor(str(model_file))

        return cls(Path(model_file), extractor)
Пример #2
0
    def graph_config_for_recipe(
        self,
        config: Dict,
        cli_parameters: Dict[Text, Any],
        training_type: TrainingType = TrainingType.BOTH,
        is_finetuning: bool = False,
    ) -> GraphModelConfiguration:
        """Converts the default config to graphs (see interface for full docstring)."""
        self._use_core = (
            bool(config.get("policies")) and not training_type == TrainingType.NLU
        )
        self._use_nlu = (
            bool(config.get("pipeline")) and not training_type == TrainingType.CORE
        )

        if not self._use_nlu and training_type == TrainingType.NLU:
            raise InvalidConfigException(
                "Can't train an NLU model without a specified pipeline. Please make "
                "sure to specify a valid pipeline in your configuration."
            )

        if not self._use_core and training_type == TrainingType.CORE:
            raise InvalidConfigException(
                "Can't train an Core model without policies. Please make "
                "sure to specify a valid policy in your configuration."
            )

        self._use_end_to_end = (
            self._use_nlu
            and self._use_core
            and training_type == TrainingType.END_TO_END
        )

        self._is_finetuning = is_finetuning

        train_nodes, preprocessors = self._create_train_nodes(config, cli_parameters)
        predict_nodes = self._create_predict_nodes(config, preprocessors, train_nodes)

        core_target = "select_prediction" if self._use_core else None

        from rasa.nlu.classifiers.regex_message_handler import RegexMessageHandler

        return GraphModelConfiguration(
            train_schema=GraphSchema(train_nodes),
            predict_schema=GraphSchema(predict_nodes),
            training_type=training_type,
            language=config.get("language"),
            core_target=core_target,
            nlu_target=f"run_{RegexMessageHandler.__name__}",
        )
Пример #3
0
 def _check_data(self) -> None:
     if TEXT not in self.data_signature:
         raise InvalidConfigException(
             f"No text features specified. "
             f"Cannot train '{self.__class__.__name__}' model.")
     if LABEL not in self.data_signature:
         raise InvalidConfigException(
             f"No label features specified. "
             f"Cannot train '{self.__class__.__name__}' model.")
     if (self.config[SHARE_HIDDEN_LAYERS]
             and self.data_signature[TEXT][SENTENCE] !=
             self.data_signature[LABEL][SENTENCE]):
         raise ValueError(
             "If hidden layer weights are shared, data signatures "
             "for text_features and label_features must coincide.")
Пример #4
0
 def _raise_invalid_speech_model_exception(self) -> None:
     """Raises an error if an invalid speech_model is provided."""
     raise InvalidConfigException(
         f"The value {self.speech_model} for speech_model is invalid. "
         f"You must choose one of 'default', 'numbers_and_commands', "
         f"or 'phone_call'. Refer to the documentation for details "
         f"about the selections.")
Пример #5
0
 def _raise_invalid_speech_model_timeout_exception(self) -> None:
     """Raises an error if incompatible speech_timeout and speech_model used."""
     raise InvalidConfigException(
         "If speech_timeout is 'auto' the speech_model must be "
         "'numbers_and_commands'. Please update your speech_model "
         "to be 'numbers_and_commands' if you would like to continue "
         "using the 'auto' speech_model.")
Пример #6
0
    def from_tag_and_sub_config(
            tag: Text,
            sub_config: Any,
            name: Optional[Text] = None) -> ConditionMarker:
        """Creates an atomic marker from the given config.

        Args:
            tag: the tag identifying a condition
            sub_config: a single text parameter expected by all condition markers;
               e.g. if the tag is for an `intent_detected` marker then the `config`
               should contain an intent name
            name: a custom name for this marker
        Returns:
            the configured `ConditionMarker`
        Raises:
            `InvalidMarkerConfig` if the given config or the tag are not well-defined
        """
        positive_tag, is_negation = MarkerRegistry.get_non_negated_tag(tag)
        marker_class = MarkerRegistry.condition_tag_to_marker_class.get(
            positive_tag)
        if marker_class is None:
            raise InvalidConfigException(f"Unknown condition '{tag}'.")

        if not isinstance(sub_config, str):
            raise InvalidMarkerConfig(
                f"Expected a text parameter to be specified for marker '{tag}'."
            )
        marker = marker_class(sub_config, negated=is_negation)
        marker.name = name
        return marker
Пример #7
0
    def _compare_or_memorize(
        self,
        fingerprint_key: Text,
        new_fingerprint: Text,
        error_message: Text,
    ) -> None:
        """Compares given fingerprint if we are finetuning, otherwise just saves it.

        Args:
           fingerprint_key: name of the fingerprint
           new_fingerprint: a new fingerprint value
           error_message: message of `InvalidConfigException` that will be raised if
              a fingerprint is stored under `fingerprint_key` and differs from the
              `new_fingerprint` - and we're in finetuning mode (according to the
              execution context of this component)

        Raises:
           `InvalidConfigException` if and old fingerprint exists and differs from
           the new one
        """
        if self._is_finetuning:
            old_fingerprint = self._fingerprints[fingerprint_key]
            if old_fingerprint != new_fingerprint:
                raise InvalidConfigException(error_message)
        else:
            self._fingerprints[fingerprint_key] = new_fingerprint
Пример #8
0
    def _validate_policy_priorities(self) -> None:
        """Checks if every policy has a valid priority value.

        A policy must have a priority value. The priority values of
        the policies used in the configuration should be unique.

        Raises:
            `InvalidConfigException` if any of the policies doesn't have a priority
        """
        priority_dict = defaultdict(list)
        for schema_node in self._policy_schema_nodes:
            default_config = schema_node.uses.get_default_config()
            if POLICY_PRIORITY not in default_config:
                raise InvalidConfigException(
                    f"Found a policy {schema_node.uses.__name__} which has no "
                    f"priority. Every policy must have a priority value which you "
                    f"can set in the `get_default_config` method of your policy."
                )
            default_priority = default_config[POLICY_PRIORITY]
            priority = schema_node.config.get(POLICY_PRIORITY,
                                              default_priority)
            priority_dict[priority].append(schema_node.uses)

        for k, v in priority_dict.items():
            if len(v) > 1:
                rasa.shared.utils.io.raise_warning(
                    f"Found policies {_types_to_str(v)} with same priority {k} "
                    f"in PolicyEnsemble. When personalizing "
                    f"priorities, be sure to give all policies "
                    f"different priorities.",
                    docs=DOCS_URL_POLICIES,
                )
Пример #9
0
 def _raise_invalid_voice_exception(self) -> None:
     """Raises an error if an invalid voice is provided."""
     raise InvalidConfigException(
         f"The value {self.assistant_voice} is an invalid for assistant_voice. "
         f"Please refer to the documentation for a list of valid voices "
         f"you can use for your voice assistant."
     )
Пример #10
0
    def _extract_raw_features_from_token(
        cls,
        feature_name: Text,
        token: Token,
        token_position: int,
        num_tokens: int,
    ) -> Text:
        """Extracts a raw feature from the token at the given position.

        Args:
          feature_name: the name of a supported feature
          token: the token from which we want to extract the feature
          token_position: the position of the token inside the tokenized text
          num_tokens: the total number of tokens in the tokenized text
        Returns:
          the raw feature value as text
        """
        if feature_name not in cls.SUPPORTED_FEATURES:
            raise InvalidConfigException(
                f"Configured feature '{feature_name}' not valid. Please check "
                f"'{DOCS_URL_COMPONENTS}' for valid configuration parameters.")
        if feature_name == END_OF_SENTENCE:
            return str(token_position == num_tokens - 1)
        if feature_name == BEGIN_OF_SENTENCE:
            return str(token_position == 0)
        return str(cls._FUNCTION_DICT[feature_name](token))
Пример #11
0
 def load(
     cls,
     config: Dict[Text, Any],
     model_storage: ModelStorage,
     resource: Resource,
     execution_context: ExecutionContext,
     **kwargs: Any,
 ) -> GraphComponent:
     """Loads a `FineTuningValidator` (see parent class for full docstring)."""
     try:
         with model_storage.read_from(resource) as path:
             fingerprints = rasa.shared.utils.io.read_json_file(
                 filename=path / cls.FILENAME
             )
             return cls(
                 config=config,
                 model_storage=model_storage,
                 execution_context=execution_context,
                 resource=resource,
                 fingerprints=fingerprints,
             )
     except ValueError as e:
         raise InvalidConfigException(
             f"Loading {cls.__name__} failed. Ensure that the {cls.__name__} "
             f"is part of your training graph and re-train your models before "
             f"attempting to use the {cls.__name__}."
         ) from e
Пример #12
0
def _check_tolerance_setting(component_config: Dict[Text, Any]) -> None:
    if not (0.0 <= component_config.get(TOLERANCE, 0.0) <= 1.0):
        raise InvalidConfigException(
            f"`{TOLERANCE}` was set to `{component_config.get(TOLERANCE)}` "
            f"which is an invalid setting. Please set it to a value "
            f"between 0.0 and 1.0 inclusive."
        )
Пример #13
0
    def get_targets(
        self, config: Dict, training_type: TrainingType
    ) -> Tuple[Text, Any]:
        """Return NLU and core targets from config dictionary.

        Note that default recipe has `nlu_target` and `core_target` as
        fixed values of `run_RegexMessageHandler` and `select_prediction`
        respectively. For graph recipe, target values are customizable. These
        can be used in validation (default recipe does this validation check)
        and during execution (all recipes use targets during execution).
        """
        if training_type == TrainingType.NLU:
            core_required = False
            core_target = None
        else:
            core_required = True
            core_target = config.get("core_target")
        # NLU target is required because core (prediction) depends on NLU.
        nlu_target = config.get("nlu_target")
        if nlu_target is None or (core_required and core_target is None):
            raise InvalidConfigException(
                "Can't find target names for NLU and/or core. Please make "
                "sure to provide 'nlu_target' (required for all training types) "
                "and 'core_target' (required if training is not just NLU) values in "
                "your config.yml file."
            )
        return nlu_target, core_target
Пример #14
0
    def _raise_if_more_than_one_tokenizer(self) -> None:
        """Validates that only one tokenizer is present in the configuration.

        Note that the existence of a tokenizer and its position in the graph schema
        will be validated via the validation of required components during
        schema validation.

        Raises:
            `InvalidConfigException` in case there is more than one tokenizer
        """
        types_of_tokenizer_schema_nodes = [
            schema_node.uses
            for schema_node in self._graph_schema.nodes.values()
            if issubclass(schema_node.uses, Tokenizer)
            and schema_node.fn != "train"
        ]

        is_end_to_end = any(
            issubclass(schema_node.uses, CoreFeaturizationInputConverter)
            for schema_node in self._graph_schema.nodes.values())

        allowed_number_of_tokenizers = 2 if is_end_to_end else 1
        if len(types_of_tokenizer_schema_nodes) > allowed_number_of_tokenizers:
            raise InvalidConfigException(
                f"The configuration configuration contains more than one tokenizer, "
                f"which is not possible at this time. You can only use one tokenizer. "
                f"The configuration contains the following tokenizers: "
                f"{_types_to_str(types_of_tokenizer_schema_nodes)}. ")
Пример #15
0
def _check_confidence_setting(component_config: Dict[Text, Any]) -> None:
    if component_config[MODEL_CONFIDENCE] == COSINE:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={COSINE} was introduced in Rasa Open Source 2.3.0 "
            f"but post-release experiments revealed that using cosine similarity can "
            f"change the order of predicted labels. "
            f"Since this is not ideal, using `{MODEL_CONFIDENCE}={COSINE}` has been "
            f"removed in versions post `2.3.3`. "
            f"Please use either `{SOFTMAX}` or `{LINEAR_NORM}` as possible values."
        )
    if component_config[MODEL_CONFIDENCE] == INNER:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={INNER} is deprecated as it produces an unbounded "
            f"range of confidences which can break the logic of assistants in various "
            f"other places. "
            f"Please use `{MODEL_CONFIDENCE}={LINEAR_NORM}` which will produce a "
            f"linearly normalized version of dot product similarities with each value "
            f"in the range `[0,1]`."
        )
    if component_config[MODEL_CONFIDENCE] not in [SOFTMAX, LINEAR_NORM, AUTO]:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={component_config[MODEL_CONFIDENCE]} is not a valid "
            f"setting. Possible values: `{SOFTMAX}`, `{LINEAR_NORM}`."
        )
    if component_config[MODEL_CONFIDENCE] == SOFTMAX:
        rasa.shared.utils.io.raise_warning(
            f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended "
            f"to try using `{MODEL_CONFIDENCE}={LINEAR_NORM}` to make it easier to "
            f"tune fallback thresholds.",
            category=UserWarning,
        )
        if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]:
            raise InvalidConfigException(
                f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {LOSS_TYPE}={CROSS_ENTROPY}."
            )
        if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]:
            raise InvalidConfigException(
                f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {SIMILARITY_TYPE}={INNER}."
            )
Пример #16
0
 def _validate_credentials(self) -> None:
     """Raises exceptions if the connector is not properly configured."""
     if not self.slack_signing_secret:
         raise InvalidConfigException(
             f"Your slack bot is missing a configured signing secret. Running a "
             f"bot without a signing secret is insecure and was removed. "
             f"You need to add a `slack_signing_secret` parameter to your channel "
             f"configuration. "
             f"More info at {DOCS_URL_CONNECTORS_SLACK} .")
Пример #17
0
 def provide_inference(self) -> Domain:
     """Provides the domain during inference."""
     if self._domain is None:
         # This can't really happen but if it happens then we fail early
         raise InvalidConfigException(
             "No domain was found. This is required for "
             "making model predictions. Please make sure to "
             "provide a valid domain during training.")
     return self._domain
Пример #18
0
 def validate_config(cls, config: Dict[Text, Any]) -> None:
     """Validates that the component is configured properly."""
     if FEATURES not in config:
         return  # will be replaced with default
     feature_config = config[FEATURES]
     message = (
         f"Expected configuration of `features` to be a list of lists that "
         f"that contain names of lexical and syntactic features "
         f"(i.e. {cls.SUPPORTED_FEATURES}). "
         f"Received {feature_config} instead. ")
     try:
         configured_feature_names = set(feature_name
                                        for pos_config in feature_config
                                        for feature_name in pos_config)
     except TypeError as e:
         raise InvalidConfigException(message) from e
     if configured_feature_names.difference(cls.SUPPORTED_FEATURES):
         raise InvalidConfigException(message)
Пример #19
0
    def validate_config(cls, config: Dict[Text, Any]) -> None:
        """Checks whether the given configuration is valid.

        Args:
          config: a configuration for a Mitie entity extractor component
        """
        num_threads = config.get("num_threads")
        if num_threads is None or num_threads <= 0:
            raise InvalidConfigException(
                f"Expected `num_threads` to be some value >= 1 (default: 1)."
                f"but received {num_threads}")
Пример #20
0
def validate_empty_pipeline(pipeline: List["Component"]) -> None:
    """Ensures the pipeline is not empty.

    Args:
        pipeline: the list of the :class:`rasa.nlu.components.Component`.
    """
    if len(pipeline) == 0:
        raise InvalidConfigException(
            "Can not train an empty pipeline. "
            "Make sure to specify a proper pipeline in "
            "the configuration using the 'pipeline' key.")
Пример #21
0
def _check_confidence_setting(component_config: Dict[Text, Any]) -> None:
    if component_config[MODEL_CONFIDENCE] == SOFTMAX:
        rasa.shared.utils.io.raise_warning(
            f"{MODEL_CONFIDENCE} is set to `softmax`. It is recommended "
            f"to set it to `cosine`. It will be set to `cosine` by default, "
            f"Rasa Open Source 3.0.0 onwards.",
            category=UserWarning,
        )
        if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]:
            raise InvalidConfigException(
                f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {LOSS_TYPE}={CROSS_ENTROPY}.")
        if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]:
            raise InvalidConfigException(
                f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {SIMILARITY_TYPE}={INNER}.")
Пример #22
0
def _check_confidence_setting(component_config: Dict[Text, Any]) -> None:
    if component_config[MODEL_CONFIDENCE] == COSINE:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={COSINE} was introduced in Rasa Open Source 2.3.0 "
            f"but post-release experiments revealed that using cosine similarity can "
            f"change the order of predicted labels. "
            f"Since this is not ideal, using `{MODEL_CONFIDENCE}={COSINE}` has been "
            f"removed in versions post `2.3.3`. "
            f"Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead.")
    if component_config[MODEL_CONFIDENCE] == INNER:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={INNER} is deprecated as it produces an unbounded "
            f"range of confidences which can break the logic of assistants in various "
            f"other places. "
            f"Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead. ")
    if component_config[MODEL_CONFIDENCE] not in [SOFTMAX, LINEAR_NORM, AUTO]:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={component_config[MODEL_CONFIDENCE]} is not a valid "
            f"setting. Possible values: `{SOFTMAX}`, `{LINEAR_NORM}`(deprecated)."
        )
    if component_config[MODEL_CONFIDENCE] == SOFTMAX:
        if component_config[LOSS_TYPE] not in [SOFTMAX, CROSS_ENTROPY]:
            raise InvalidConfigException(
                f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {LOSS_TYPE}={CROSS_ENTROPY}.")
        if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]:
            raise InvalidConfigException(
                f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {SIMILARITY_TYPE}={INNER}.")
    if component_config[MODEL_CONFIDENCE] == LINEAR_NORM:
        rasa.shared.utils.io.raise_deprecation_warning(
            f"{MODEL_CONFIDENCE} is set to `{LINEAR_NORM}`. We "
            f"introduced this option in Rasa Open Source 2.3.0, "
            f"but have identified multiple problems with it based "
            f"on user feedback. Therefore, `{MODEL_CONFIDENCE}={LINEAR_NORM}` "
            f"is now deprecated and will be removed in Rasa Open Source `3.0.0`."
            f"Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead.")
Пример #23
0
    def from_tag_and_sub_config(
        tag: Text,
        sub_config: Any,
        name: Optional[Text] = None,
    ) -> OperatorMarker:
        """Creates an operator marker from the given config.

        The configuration must consist of a list of marker configurations.
        See `Marker.from_config` for more details.

        Args:
            tag: the tag identifying an operator
            sub_config: a list of marker configs
            name: an optional custom name to be attached to the resulting marker
        Returns:
           the configured operator marker
        Raises:
            `InvalidMarkerConfig` if the given config or the tag are not well-defined
        """
        positive_tag, is_negation = MarkerRegistry.get_non_negated_tag(tag)
        operator_class = MarkerRegistry.operator_tag_to_marker_class.get(
            positive_tag)
        if operator_class is None:
            raise InvalidConfigException(f"Unknown operator '{tag}'.")

        if not isinstance(sub_config, list):
            raise InvalidMarkerConfig(
                f"Expected a list of sub-marker configurations under {tag}.")
        collected_sub_markers: List[Marker] = []
        for sub_marker_config in sub_config:
            try:
                sub_marker = Marker.from_config(sub_marker_config)
            except InvalidMarkerConfig as e:
                # we don't re-raise here because the stack trace would only be
                # printed when we run rasa evaluate with --debug flag
                raise InvalidMarkerConfig(
                    f"Could not create sub-marker for operator '{tag}' from "
                    f"{sub_marker_config}. Reason: {str(e)}")
            collected_sub_markers.append(sub_marker)
        try:
            marker = operator_class(markers=collected_sub_markers,
                                    negated=is_negation)
        except InvalidMarkerConfig as e:
            # we don't re-raise here because the stack trace would only be
            # printed when we run rasa evaluate with --debug flag
            raise InvalidMarkerConfig(
                f"Could not create operator '{tag}' with sub-markers "
                f"{collected_sub_markers}. Reason: {str(e)}")
        marker.name = name
        return marker
Пример #24
0
def _check_confidence_setting(component_config: Dict[Text, Any]) -> None:
    if component_config[MODEL_CONFIDENCE] == COSINE:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={COSINE} was introduced in Rasa Open Source 2.3.0 "
            f"but post-release experiments revealed that using cosine similarity can "
            f"change the order of predicted labels. "
            f"Since this is not ideal, using `{MODEL_CONFIDENCE}={COSINE}` has been "
            f"removed in versions post `2.3.3`. "
            f"Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead."
        )
    if component_config[MODEL_CONFIDENCE] == INNER:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={INNER} is deprecated as it produces an unbounded "
            f"range of confidences which can break the logic of assistants in various "
            f"other places. "
            f"Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead. "
        )
    if component_config[MODEL_CONFIDENCE] not in [SOFTMAX, AUTO]:
        raise InvalidConfigException(
            f"{MODEL_CONFIDENCE}={component_config[MODEL_CONFIDENCE]} is not a valid "
            f"setting. Please use `{MODEL_CONFIDENCE}={SOFTMAX}` instead."
        )
    if component_config[MODEL_CONFIDENCE] == SOFTMAX:
        if component_config[LOSS_TYPE] != CROSS_ENTROPY:
            raise InvalidConfigException(
                f"{LOSS_TYPE}={component_config[LOSS_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {LOSS_TYPE}={CROSS_ENTROPY}."
            )
        if component_config[SIMILARITY_TYPE] not in [INNER, AUTO]:
            raise InvalidConfigException(
                f"{SIMILARITY_TYPE}={component_config[SIMILARITY_TYPE]} and "
                f"{MODEL_CONFIDENCE}={SOFTMAX} is not a valid "
                f"combination. You can use {MODEL_CONFIDENCE}={SOFTMAX} "
                f"only with {SIMILARITY_TYPE}={INNER}."
            )
Пример #25
0
def validate_requirements(component_names: List[Optional[Text]]) -> None:
    """Validates that all required importable python packages are installed.

    Raises:
        InvalidConfigException: If one of the component names is `None`, likely
            indicates that a custom implementation is missing this property
            or that there is an invalid configuration file that we did not
            catch earlier.

    Args:
        component_names: The list of component names.
    """
    from rasa.nlu import registry

    # Validate that all required packages are installed
    failed_imports = {}
    for component_name in component_names:
        if component_name is None:
            raise InvalidConfigException(
                "Your pipeline configuration contains a component that is missing "
                "a name. Please double check your configuration or if this is a "
                "custom component make sure to implement the name property for "
                "the component."
            )
        component_class = registry.get_component_class(component_name)
        unavailable_packages = find_unavailable_packages(
            component_class.required_packages()
        )
        if unavailable_packages:
            failed_imports[component_name] = unavailable_packages
    if failed_imports:  # pragma: no cover
        dependency_component_map = defaultdict(list)
        for component, missing_dependencies in failed_imports.items():
            for dependency in missing_dependencies:
                dependency_component_map[dependency].append(component)

        missing_lines = [
            f"{d} (needed for {', '.join(cs)})"
            for d, cs in dependency_component_map.items()
        ]
        missing = "\n  - ".join(missing_lines)
        raise MissingDependencyException(
            f"Not all required importable packages are installed to use "
            f"the configured NLU pipeline. "
            f"To use this pipeline, you need to install the "
            f"missing modules: \n"
            f"  - {missing}\n"
            f"Please install the packages that contain the missing modules."
        )
Пример #26
0
    def combine_predictions(
        self,
        predictions: List[PolicyPrediction],
        tracker: DialogueStateTracker,
        domain: Domain,
    ) -> PolicyPrediction:
        """Derives a single prediction from the given list of predictions.

        Note that you might get unexpected results if the priorities are non-unique.
        Moreover, the order of events in the result is determined by the order of the
        predictions passed to this method.

        Args:
            predictions: a list of policy predictions that include "probabilities"
              which are non-negative but *do not* necessarily up to 1
            tracker: dialogue state tracker holding the state of the conversation
            domain: the common domain

        Returns:
            The "best" prediction.
        """
        if not predictions:
            raise InvalidConfigException(
                "Expected at least one prediction. Please check your model "
                "configuration.")
        # Reminder: If just a single policy is given, we do *not* just return it because
        # it is expected that the final prediction contains mandatory and optional
        # events in the `events` attribute and no optional events.

        winning_prediction = self._best_policy_prediction(
            predictions=predictions, domain=domain, tracker=tracker)

        if tracker.latest_action_name == ACTION_LISTEN_NAME:
            if winning_prediction.is_end_to_end_prediction:
                logger.debug("Made e2e prediction using user text.")
                logger.debug(
                    "Added `DefinePrevUserUtteredFeaturization(True)` event.")
                winning_prediction.events.append(
                    DefinePrevUserUtteredFeaturization(True))
            else:
                logger.debug("Made prediction using user intent.")
                logger.debug(
                    "Added `DefinePrevUserUtteredFeaturization(False)` event.")
                winning_prediction.events.append(
                    DefinePrevUserUtteredFeaturization(False))

        logger.debug(
            f"Predicted next action using {winning_prediction.policy_name}.")
        return winning_prediction
Пример #27
0
def extract_patterns(
    training_data: TrainingData,
    use_lookup_tables: bool = True,
    use_regexes: bool = True,
    use_only_entities: bool = False,
    use_word_boundaries: bool = True,
) -> List[Dict[Text, Text]]:
    r"""Extract a list of patterns from the training data.

    The patterns are constructed using the regex features and lookup tables defined
    in the training data.

    Args:
        training_data: The training data.
        use_only_entities: If True only lookup tables and regex features with a name
          equal to a entity are considered.
        use_regexes: Boolean indicating whether to use regex features or not.
        use_lookup_tables: Boolean indicating whether to use lookup tables or not.
        use_word_boundaries: Boolean indicating whether to use `\b` around the lookup
            table regex expressions

    Returns:
        The list of regex patterns.
    """
    if not training_data.lookup_tables and not training_data.regex_features:
        return []

    patterns = []

    if use_regexes:
        patterns.extend(
            _collect_regex_features(training_data, use_only_entities))
    if use_lookup_tables:
        patterns.extend(
            _convert_lookup_tables_to_regex(training_data, use_only_entities,
                                            use_word_boundaries))

    # validate regexes, raise Error when invalid
    for pattern in patterns:
        try:
            re.compile(pattern["pattern"])
        except re.error:
            raise InvalidConfigException(
                f"Model training failed. '{pattern['pattern']}' "
                "is not a valid regex. Please update your nlu "
                f"training data configuration at {pattern}.")

    return patterns
Пример #28
0
    def _from_registry(cls, name: Text) -> RegisteredComponent:
        # Importing all the default Rasa components will automatically register them
        from rasa.engine.recipes.default_components import DEFAULT_COMPONENTS  # noqa

        if name in cls._registered_components:
            return cls._registered_components[name]

        if "." in name:
            clazz = rasa.shared.utils.common.class_from_module_path(name)
            if clazz.__name__ in cls._registered_components:
                return cls._registered_components[clazz.__name__]

        raise InvalidConfigException(
            f"Can't load class for name '{name}'. Please make sure to provide "
            f"a valid name or module path and to register it using the "
            f"'@DefaultV1Recipe.register' decorator.")
Пример #29
0
    def aggregate_sequence_features(
        dense_sequence_features: np.ndarray,
        pooling_operation: Text,
        only_non_zero_vectors: bool = True,
    ) -> np.ndarray:
        """Aggregates the non-zero vectors of a dense sequence feature matrix.

        Args:
          dense_sequence_features: a 2-dimensional matrix where the first dimension
            is the sequence dimension over which we want to aggregate of shape
            [seq_len, feat_dim]
          pooling_operation: either max pooling or average pooling
          only_non_zero_vectors: determines whether the aggregation is done over
            non-zero vectors only
        Returns:
          a matrix of shape [1, feat_dim]
        """
        shape = dense_sequence_features.shape
        if len(shape) != 2 or min(shape) == 0:
            raise ValueError(
                f"Expected a non-empty 2-dimensional matrix (where the first "
                f"dimension is the sequence dimension which we want to aggregate), "
                f"but found a matrix of shape {dense_sequence_features.shape}."
            )

        if only_non_zero_vectors:
            # take only non zeros feature vectors into account
            is_non_zero_vector = [f.any() for f in dense_sequence_features]
            dense_sequence_features = dense_sequence_features[
                is_non_zero_vector]

            # if features are all zero, then we must continue with zeros
            if not len(dense_sequence_features):
                dense_sequence_features = np.zeros([1, shape[-1]])

        if pooling_operation == MEAN_POOLING:
            return np.mean(dense_sequence_features, axis=0, keepdims=True)
        elif pooling_operation == MAX_POOLING:
            return np.max(dense_sequence_features, axis=0, keepdims=True)
        else:
            raise InvalidConfigException(
                f"Invalid pooling operation specified. Available operations are "
                f"'{MEAN_POOLING}' or '{MAX_POOLING}', but provided value is "
                f"'{pooling_operation}'.")
Пример #30
0
def validate_only_one_tokenizer_is_used(pipeline: List["Component"]) -> None:
    """Validates that only one tokenizer is present in the pipeline.

    Args:
        pipeline: the list of the :class:`rasa.nlu.components.Component`.
    """

    from rasa.nlu.tokenizers.tokenizer import Tokenizer

    tokenizer_names = []
    for component in pipeline:
        if isinstance(component, Tokenizer):
            tokenizer_names.append(component.name)

    if len(tokenizer_names) > 1:
        raise InvalidConfigException(
            f"The pipeline configuration contains more than one tokenizer, "
            f"which is not possible at this time. You can only use one tokenizer. "
            f"The pipeline contains the following tokenizers: {tokenizer_names}. "
        )