Пример #1
0
 def provide(self, importer: TrainingDataImporter,) -> TrainingData:
     """Provides nlu training data during training."""
     if "language" in self._config:
         training_data = importer.get_nlu_data(language=self._config["language"])
     else:
         training_data = importer.get_nlu_data()
     if self._config["persist"]:
         self._persist(training_data)
     return training_data
Пример #2
0
def test_import_nlu_training_data_with_default_actions(
    default_importer: TrainingDataImporter, ):
    assert isinstance(default_importer, E2EImporter)
    importer_without_e2e = default_importer.importer

    # Check additional NLU training data from domain was added
    nlu_data = default_importer.get_nlu_data()

    assert len(nlu_data.training_examples) > len(
        importer_without_e2e.get_nlu_data().training_examples)

    extended_training_data = default_importer.get_nlu_data()
    assert all(
        Message(data={ACTION_NAME: action_name}) in
        extended_training_data.training_examples
        for action_name in rasa.shared.core.constants.DEFAULT_ACTION_NAMES)
Пример #3
0
    def from_importer(cls, importer: TrainingDataImporter) -> "Validator":
        """Create an instance from the domain, nlu and story files."""
        domain = importer.get_domain()
        story_graph = importer.get_stories()
        intents = importer.get_nlu_data()
        config = importer.get_config()

        return cls(domain, intents, story_graph, config)
Пример #4
0
def test_import_nlu_training_data_from_e2e_stories(
    default_importer: TrainingDataImporter,
):
    # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter`
    assert isinstance(default_importer, E2EImporter)
    importer_without_e2e = default_importer.importer

    stories = StoryGraph(
        [
            StoryStep(
                "name",
                events=[
                    SlotSet("some slot", "doesn't matter"),
                    UserUttered(intent={"name": "greet_from_stories"}),
                    ActionExecuted("utter_greet_from_stories"),
                ],
            ),
            StoryStep(
                "name",
                events=[
                    UserUttered("how are you doing?"),
                    ActionExecuted(action_text="Hi Joey."),
                ],
            ),
        ]
    )

    def mocked_stories(*_: Any, **__: Any) -> StoryGraph:
        return stories

    # Patch to return our test stories
    importer_without_e2e.get_stories = mocked_stories

    # The wrapping `E2EImporter` simply forwards these method calls
    assert (importer_without_e2e.get_stories()).fingerprint() == (
        default_importer.get_stories()
    ).fingerprint()
    assert (importer_without_e2e.get_config()) == (default_importer.get_config())

    # Check additional NLU training data from stories was added
    nlu_data = default_importer.get_nlu_data()

    # The `E2EImporter` adds NLU training data based on our training stories
    assert len(nlu_data.training_examples) > len(
        importer_without_e2e.get_nlu_data().training_examples
    )

    # Check if the NLU training data was added correctly from the story training data
    expected_additional_messages = [
        Message(data={INTENT: "greet_from_stories"}),
        Message(data={ACTION_NAME: "utter_greet_from_stories"}),
        Message(data={TEXT: "how are you doing?"}),
        Message(data={ACTION_TEXT: "Hi Joey."}),
    ]

    assert all(m in nlu_data.training_examples for m in expected_additional_messages)
Пример #5
0
async def test_use_of_interface():
    importer = TrainingDataImporter()

    functions_to_test = [
        lambda: importer.get_config(),
        lambda: importer.get_stories(),
        lambda: importer.get_nlu_data(),
        lambda: importer.get_domain(),
    ]
    for f in functions_to_test:
        with pytest.raises(NotImplementedError):
            await f()
Пример #6
0
    def validate(self, importer: TrainingDataImporter) -> TrainingDataImporter:
        """Validates the current graph schema against the training data and domain.

        Args:
            importer: the training data importer which can also load the domain
        Raises:
            `InvalidConfigException` or `InvalidDomain` in case there is some mismatch
        """
        nlu_data = importer.get_nlu_data()
        self._validate_nlu(nlu_data)

        story_graph = importer.get_stories()
        domain = importer.get_domain()
        self._validate_core(story_graph, domain)
        return importer
Пример #7
0
    def _validate(
        self,
        importer: TrainingDataImporter,
        nlu: bool = True,
        core: bool = True,
    ) -> None:
        """Validate whether the finetuning setting conflicts with other settings.

        Note that this validation always takes into account the configuration of
        nlu *and* core part, while the validation of aspects of the domain and
        the NLU training data only happen if we request to validate finetuning
        with respect to NLU/Core models, respectively.

        For more details, see docstring of this class.

        Args:
            importer: a training data importer
            domain: the domain
            nlu: set to `False` if NLU part should not be validated
            core: set to `False` if Core part should not be validated
        Raises:
            `InvalidConfigException` if there is a conflict
        """
        if self._is_finetuning and not self._fingerprints:
            raise InvalidConfigException(
                f"Finetuning is enabled but the {self.__class__.__name__} "
                f"does not remember seeing a training run. Ensure that you have "
                f"trained your model at least once (with finetuning disabled) "
                f"and ensure that the  {self.__class__.__name__} is part of the "
                f"training graph. ")

        rasa_version = rasa.__version__
        if self._is_finetuning:
            old_rasa_version = self._fingerprints[FINGERPRINT_VERSION]
            if version.parse(old_rasa_version) < version.parse(
                    MINIMUM_COMPATIBLE_VERSION):
                raise InvalidConfigException(
                    f"The minimum compatible Rasa Version is "
                    f"{MINIMUM_COMPATIBLE_VERSION} but the model we attempt to "
                    f"finetune has been generated with an older version "
                    f"({old_rasa_version}.")
        self._fingerprints[FINGERPRINT_VERSION] = rasa_version

        config = importer.get_config()
        self._compare_or_memorize(
            fingerprint_key=FINGERPRINT_CONFIG_WITHOUT_EPOCHS_KEY,
            new_fingerprint=self._get_fingerprint_of_config_without_epochs(
                config),
            error_message=
            ("Cannot finetune because more than just the 'epoch' keys have been "
             "changed in the configuration. "
             "Please revert your configuration and only change "
             "the 'epoch' settings where needed."),
        )

        if core:
            # NOTE: If there's a consistency check between domain and core training data
            # that ensures domain and core training data are consistent, then we can
            # drop this check.
            domain = importer.get_domain()
            self._compare_or_memorize(
                fingerprint_key=FINGERPRINT_CORE,
                new_fingerprint=self.
                _get_fingerprint_of_domain_without_responses(domain),
                error_message=
                ("Cannot finetune because more than just the responses have been "
                 "changed in the domain."
                 "Please revert all settings in your domain file (except the "
                 "'responses')."),
            )

        if nlu:
            nlu_data = importer.get_nlu_data()
            self._compare_or_memorize(
                fingerprint_key=FINGERPRINT_NLU,
                new_fingerprint=nlu_data.label_fingerprint(),
                error_message=
                ("Cannot finetune because NLU training data contains new labels "
                 "or does not contain any examples for some known labels. "
                 "Please make sure that the NLU data that you use "
                 "for finetuning contains at least one example for every label "
                 "(i.e. intent, action name, ...) that was included in the NLU "
                 "data used for training the model which we attempt to finetune "
                 "now. Moreover, you must not add labels that were not included "
                 "during training before. "),
            )

        self.persist()
Пример #8
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    dry_run: bool,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
    model_to_finetune: Optional[Text] = None,
    finetuning_epoch_fraction: float = 1.0,
) -> TrainingResult:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        dry_run: If `True` then no training will be done, and the information about
            whether the training needs to be done will be printed.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
            with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
            method of each NLU component.
        model_to_finetune: Optional path to a model which should be finetuned or
            a directory in case the latest trained model should be used.
        finetuning_epoch_fraction: The fraction currently specified training epochs
            in the model configuration which should be used for finetuning.

    Returns:
        An instance of `TrainingResult`.
    """
    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    fingerprint_comparison = model.should_retrain(
        new_fingerprint, old_model, train_path, force_training=force_training)

    if dry_run:
        code, texts = dry_run_result(fingerprint_comparison)
        for text in texts:
            print_warning(text) if code > 0 else print_success(text)
        return TrainingResult(code=code)

    if nlu_data.has_e2e_examples():
        rasa.shared.utils.common.mark_as_experimental_feature(
            "end-to-end training")

    if stories.is_empty() and nlu_data.contains_no_pure_nlu_data():
        rasa.shared.utils.cli.print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return TrainingResult()

    if stories.is_empty():
        rasa.shared.utils.cli.print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        trained_model = await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=nlu_additional_arguments,
            model_to_finetune=model_to_finetune,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )
        return TrainingResult(model=trained_model)

    # We will train nlu if there are any nlu example, including from e2e stories.
    if nlu_data.contains_no_pure_nlu_data(
    ) and not nlu_data.has_e2e_examples():
        rasa.shared.utils.cli.print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        trained_model = await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=core_additional_arguments,
            model_to_finetune=model_to_finetune,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )

        return TrainingResult(model=trained_model)

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    if not force_training:
        fingerprint_comparison = model.should_retrain(
            new_fingerprint,
            old_model,
            train_path,
            has_e2e_examples=nlu_data.has_e2e_examples(),
        )
    else:
        fingerprint_comparison = FingerprintComparisonResult(
            force_training=True)

    if fingerprint_comparison.is_training_required():
        async with telemetry.track_model_training(
                file_importer,
                model_type="rasa",
        ):
            await _do_training(
                file_importer,
                output_path=output_path,
                train_path=train_path,
                fingerprint_comparison_result=fingerprint_comparison,
                fixed_model_name=fixed_model_name,
                persist_nlu_training_data=persist_nlu_training_data,
                core_additional_arguments=core_additional_arguments,
                nlu_additional_arguments=nlu_additional_arguments,
                old_model_zip_path=old_model,
                model_to_finetune=model_to_finetune,
                finetuning_epoch_fraction=finetuning_epoch_fraction,
            )
        trained_model = model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )
        return TrainingResult(model=trained_model)

    rasa.shared.utils.cli.print_success(
        "Nothing changed. You can use the old model stored at '{}'."
        "".format(os.path.abspath(old_model)))
    return TrainingResult(model=old_model)
Пример #9
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
                                  method of each NLU component.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(file_importer.get_stories(),
                                             file_importer.get_nlu_data())

    if stories.is_empty() and nlu_data.is_empty():
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if stories.is_empty():
        print_warning(
            "No stories present. Just a Rasa NLU model will be trained.")
        return await _train_nlu_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            additional_arguments=nlu_additional_arguments,
        )

    if nlu_data.is_empty():
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            file_importer,
            output=output_path,
            fixed_model_name=fixed_model_name,
            additional_arguments=core_additional_arguments,
        )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)
    fingerprint_comparison = FingerprintComparisonResult(
        force_training=force_training)
    if not force_training:
        fingerprint_comparison = model.should_retrain(new_fingerprint,
                                                      old_model, train_path)

    if fingerprint_comparison.is_training_required():
        await _do_training(
            file_importer,
            output_path=output_path,
            train_path=train_path,
            fingerprint_comparison_result=fingerprint_comparison,
            fixed_model_name=fixed_model_name,
            persist_nlu_training_data=persist_nlu_training_data,
            core_additional_arguments=core_additional_arguments,
            nlu_additional_arguments=nlu_additional_arguments,
            old_model_zip_path=old_model,
        )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
Пример #10
0
async def _train_async_internal(
    file_importer: TrainingDataImporter,
    train_path: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    persist_nlu_training_data: bool,
    core_additional_arguments: Optional[Dict] = None,
    nlu_additional_arguments: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        file_importer: `TrainingDataImporter` which supplies the training data.
        train_path: Directory in which to train the model.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        persist_nlu_training_data: `True` if the NLU training data should be persisted
                                   with the model.
        core_additional_arguments: Additional training parameters for core training.
        nlu_additional_arguments: Additional training parameters forwarded to training
                                  method of each NLU component.

    Returns:
        Path of the trained model archive.
    """

    stories, nlu_data = await asyncio.gather(
        file_importer.get_stories(), file_importer.get_nlu_data()
    )

    # if stories.is_empty() and nlu_data.can_train_nlu_model():
    #     print_error(
    #         "No training data given. Please provide stories and NLU data in "
    #         "order to train a Rasa model using the '--data' argument."
    #     )
    #     return

    # if stories.is_empty():
    #     print_warning("No stories present. Just a Rasa NLU model will be trained.")
    #     return await _train_nlu_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         persist_nlu_training_data=persist_nlu_training_data,
    #         additional_arguments=nlu_additional_arguments,
    #     )

    # if nlu_data.can_train_nlu_model():
    #     print_warning("No NLU data present. Just a Rasa Core model will be trained.")
    #     return await _train_core_with_validated_data(
    #         file_importer,
    #         output=output_path,
    #         fixed_model_name=fixed_model_name,
    #         additional_arguments=core_additional_arguments,
    #     )

    new_fingerprint = await model.model_fingerprint(file_importer)
    old_model = model.get_latest_model(output_path)

    if not force_training:
        fingerprint_comparison = model.should_retrain(
            new_fingerprint, old_model, train_path
        )
    else:
        fingerprint_comparison = FingerprintComparisonResult(force_training=True)

    # bf mod >
    if fingerprint_comparison.nlu == True:  # replace True with list of all langs
        fingerprint_comparison.nlu = list(new_fingerprint.get("nlu-config", {}).keys())
    domain = await file_importer.get_domain()
    core_untrainable = domain.is_empty() or stories.is_empty()
    nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()]
    fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable
    fingerprint_comparison.nlu = [
        l for l in fingerprint_comparison.nlu if l not in nlu_untrainable
    ]

    if core_untrainable:
        print_color(
            "Skipping Core training since domain or stories are empty.",
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
    for lang in nlu_untrainable:
        print_color(
            "No NLU data found for language <{}>, skipping training...".format(lang),
            color=rasa.shared.utils.io.bcolors.OKBLUE,
        )
    # </ bf mod

    if fingerprint_comparison.is_training_required():
        async with telemetry.track_model_training(file_importer, model_type="rasa"):
            await _do_training(
                file_importer,
                output_path=output_path,
                train_path=train_path,
                fingerprint_comparison_result=fingerprint_comparison,
                fixed_model_name=fixed_model_name,
                persist_nlu_training_data=persist_nlu_training_data,
                core_additional_arguments=core_additional_arguments,
                nlu_additional_arguments=nlu_additional_arguments,
                old_model_zip_path=old_model,
            )

        return model.package_model(
            fingerprint=new_fingerprint,
            output_directory=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success(
        "Nothing changed. You can use the old model stored at '{}'."
        "".format(os.path.abspath(old_model))
    )
    return old_model