def provide(self, importer: TrainingDataImporter,) -> TrainingData: """Provides nlu training data during training.""" if "language" in self._config: training_data = importer.get_nlu_data(language=self._config["language"]) else: training_data = importer.get_nlu_data() if self._config["persist"]: self._persist(training_data) return training_data
def test_import_nlu_training_data_with_default_actions( default_importer: TrainingDataImporter, ): assert isinstance(default_importer, E2EImporter) importer_without_e2e = default_importer.importer # Check additional NLU training data from domain was added nlu_data = default_importer.get_nlu_data() assert len(nlu_data.training_examples) > len( importer_without_e2e.get_nlu_data().training_examples) extended_training_data = default_importer.get_nlu_data() assert all( Message(data={ACTION_NAME: action_name}) in extended_training_data.training_examples for action_name in rasa.shared.core.constants.DEFAULT_ACTION_NAMES)
def from_importer(cls, importer: TrainingDataImporter) -> "Validator": """Create an instance from the domain, nlu and story files.""" domain = importer.get_domain() story_graph = importer.get_stories() intents = importer.get_nlu_data() config = importer.get_config() return cls(domain, intents, story_graph, config)
def test_import_nlu_training_data_from_e2e_stories( default_importer: TrainingDataImporter, ): # The `E2EImporter` correctly wraps the underlying `CombinedDataImporter` assert isinstance(default_importer, E2EImporter) importer_without_e2e = default_importer.importer stories = StoryGraph( [ StoryStep( "name", events=[ SlotSet("some slot", "doesn't matter"), UserUttered(intent={"name": "greet_from_stories"}), ActionExecuted("utter_greet_from_stories"), ], ), StoryStep( "name", events=[ UserUttered("how are you doing?"), ActionExecuted(action_text="Hi Joey."), ], ), ] ) def mocked_stories(*_: Any, **__: Any) -> StoryGraph: return stories # Patch to return our test stories importer_without_e2e.get_stories = mocked_stories # The wrapping `E2EImporter` simply forwards these method calls assert (importer_without_e2e.get_stories()).fingerprint() == ( default_importer.get_stories() ).fingerprint() assert (importer_without_e2e.get_config()) == (default_importer.get_config()) # Check additional NLU training data from stories was added nlu_data = default_importer.get_nlu_data() # The `E2EImporter` adds NLU training data based on our training stories assert len(nlu_data.training_examples) > len( importer_without_e2e.get_nlu_data().training_examples ) # Check if the NLU training data was added correctly from the story training data expected_additional_messages = [ Message(data={INTENT: "greet_from_stories"}), Message(data={ACTION_NAME: "utter_greet_from_stories"}), Message(data={TEXT: "how are you doing?"}), Message(data={ACTION_TEXT: "Hi Joey."}), ] assert all(m in nlu_data.training_examples for m in expected_additional_messages)
async def test_use_of_interface(): importer = TrainingDataImporter() functions_to_test = [ lambda: importer.get_config(), lambda: importer.get_stories(), lambda: importer.get_nlu_data(), lambda: importer.get_domain(), ] for f in functions_to_test: with pytest.raises(NotImplementedError): await f()
def validate(self, importer: TrainingDataImporter) -> TrainingDataImporter: """Validates the current graph schema against the training data and domain. Args: importer: the training data importer which can also load the domain Raises: `InvalidConfigException` or `InvalidDomain` in case there is some mismatch """ nlu_data = importer.get_nlu_data() self._validate_nlu(nlu_data) story_graph = importer.get_stories() domain = importer.get_domain() self._validate_core(story_graph, domain) return importer
def _validate( self, importer: TrainingDataImporter, nlu: bool = True, core: bool = True, ) -> None: """Validate whether the finetuning setting conflicts with other settings. Note that this validation always takes into account the configuration of nlu *and* core part, while the validation of aspects of the domain and the NLU training data only happen if we request to validate finetuning with respect to NLU/Core models, respectively. For more details, see docstring of this class. Args: importer: a training data importer domain: the domain nlu: set to `False` if NLU part should not be validated core: set to `False` if Core part should not be validated Raises: `InvalidConfigException` if there is a conflict """ if self._is_finetuning and not self._fingerprints: raise InvalidConfigException( f"Finetuning is enabled but the {self.__class__.__name__} " f"does not remember seeing a training run. Ensure that you have " f"trained your model at least once (with finetuning disabled) " f"and ensure that the {self.__class__.__name__} is part of the " f"training graph. ") rasa_version = rasa.__version__ if self._is_finetuning: old_rasa_version = self._fingerprints[FINGERPRINT_VERSION] if version.parse(old_rasa_version) < version.parse( MINIMUM_COMPATIBLE_VERSION): raise InvalidConfigException( f"The minimum compatible Rasa Version is " f"{MINIMUM_COMPATIBLE_VERSION} but the model we attempt to " f"finetune has been generated with an older version " f"({old_rasa_version}.") self._fingerprints[FINGERPRINT_VERSION] = rasa_version config = importer.get_config() self._compare_or_memorize( fingerprint_key=FINGERPRINT_CONFIG_WITHOUT_EPOCHS_KEY, new_fingerprint=self._get_fingerprint_of_config_without_epochs( config), error_message= ("Cannot finetune because more than just the 'epoch' keys have been " "changed in the configuration. " "Please revert your configuration and only change " "the 'epoch' settings where needed."), ) if core: # NOTE: If there's a consistency check between domain and core training data # that ensures domain and core training data are consistent, then we can # drop this check. domain = importer.get_domain() self._compare_or_memorize( fingerprint_key=FINGERPRINT_CORE, new_fingerprint=self. _get_fingerprint_of_domain_without_responses(domain), error_message= ("Cannot finetune because more than just the responses have been " "changed in the domain." "Please revert all settings in your domain file (except the " "'responses')."), ) if nlu: nlu_data = importer.get_nlu_data() self._compare_or_memorize( fingerprint_key=FINGERPRINT_NLU, new_fingerprint=nlu_data.label_fingerprint(), error_message= ("Cannot finetune because NLU training data contains new labels " "or does not contain any examples for some known labels. " "Please make sure that the NLU data that you use " "for finetuning contains at least one example for every label " "(i.e. intent, action name, ...) that was included in the NLU " "data used for training the model which we attempt to finetune " "now. Moreover, you must not add labels that were not included " "during training before. "), ) self.persist()
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, dry_run: bool, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, model_to_finetune: Optional[Text] = None, finetuning_epoch_fraction: float = 1.0, ) -> TrainingResult: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. dry_run: If `True` then no training will be done, and the information about whether the training needs to be done will be printed. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. model_to_finetune: Optional path to a model which should be finetuned or a directory in case the latest trained model should be used. finetuning_epoch_fraction: The fraction currently specified training epochs in the model configuration which should be used for finetuning. Returns: An instance of `TrainingResult`. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path, force_training=force_training) if dry_run: code, texts = dry_run_result(fingerprint_comparison) for text in texts: print_warning(text) if code > 0 else print_success(text) return TrainingResult(code=code) if nlu_data.has_e2e_examples(): rasa.shared.utils.common.mark_as_experimental_feature( "end-to-end training") if stories.is_empty() and nlu_data.contains_no_pure_nlu_data(): rasa.shared.utils.cli.print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return TrainingResult() if stories.is_empty(): rasa.shared.utils.cli.print_warning( "No stories present. Just a Rasa NLU model will be trained.") trained_model = await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=nlu_additional_arguments, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) return TrainingResult(model=trained_model) # We will train nlu if there are any nlu example, including from e2e stories. if nlu_data.contains_no_pure_nlu_data( ) and not nlu_data.has_e2e_examples(): rasa.shared.utils.cli.print_warning( "No NLU data present. Just a Rasa Core model will be trained.") trained_model = await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, additional_arguments=core_additional_arguments, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) return TrainingResult(model=trained_model) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) if not force_training: fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path, has_e2e_examples=nlu_data.has_e2e_examples(), ) else: fingerprint_comparison = FingerprintComparisonResult( force_training=True) if fingerprint_comparison.is_training_required(): async with telemetry.track_model_training( file_importer, model_type="rasa", ): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, old_model_zip_path=old_model, model_to_finetune=model_to_finetune, finetuning_epoch_fraction=finetuning_epoch_fraction, ) trained_model = model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) return TrainingResult(model=trained_model) rasa.shared.utils.cli.print_success( "Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return TrainingResult(model=old_model)
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather(file_importer.get_stories(), file_importer.get_nlu_data()) if stories.is_empty() and nlu_data.is_empty(): print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if stories.is_empty(): print_warning( "No stories present. Just a Rasa NLU model will be trained.") return await _train_nlu_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, additional_arguments=nlu_additional_arguments, ) if nlu_data.is_empty(): print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( file_importer, output=output_path, fixed_model_name=fixed_model_name, additional_arguments=core_additional_arguments, ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) fingerprint_comparison = FingerprintComparisonResult( force_training=force_training) if not force_training: fingerprint_comparison = model.should_retrain(new_fingerprint, old_model, train_path) if fingerprint_comparison.is_training_required(): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, old_model_zip_path=old_model, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( file_importer: TrainingDataImporter, train_path: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], persist_nlu_training_data: bool, core_additional_arguments: Optional[Dict] = None, nlu_additional_arguments: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: file_importer: `TrainingDataImporter` which supplies the training data. train_path: Directory in which to train the model. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. persist_nlu_training_data: `True` if the NLU training data should be persisted with the model. core_additional_arguments: Additional training parameters for core training. nlu_additional_arguments: Additional training parameters forwarded to training method of each NLU component. Returns: Path of the trained model archive. """ stories, nlu_data = await asyncio.gather( file_importer.get_stories(), file_importer.get_nlu_data() ) # if stories.is_empty() and nlu_data.can_train_nlu_model(): # print_error( # "No training data given. Please provide stories and NLU data in " # "order to train a Rasa model using the '--data' argument." # ) # return # if stories.is_empty(): # print_warning("No stories present. Just a Rasa NLU model will be trained.") # return await _train_nlu_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # persist_nlu_training_data=persist_nlu_training_data, # additional_arguments=nlu_additional_arguments, # ) # if nlu_data.can_train_nlu_model(): # print_warning("No NLU data present. Just a Rasa Core model will be trained.") # return await _train_core_with_validated_data( # file_importer, # output=output_path, # fixed_model_name=fixed_model_name, # additional_arguments=core_additional_arguments, # ) new_fingerprint = await model.model_fingerprint(file_importer) old_model = model.get_latest_model(output_path) if not force_training: fingerprint_comparison = model.should_retrain( new_fingerprint, old_model, train_path ) else: fingerprint_comparison = FingerprintComparisonResult(force_training=True) # bf mod > if fingerprint_comparison.nlu == True: # replace True with list of all langs fingerprint_comparison.nlu = list(new_fingerprint.get("nlu-config", {}).keys()) domain = await file_importer.get_domain() core_untrainable = domain.is_empty() or stories.is_empty() nlu_untrainable = [l for l, d in nlu_data.items() if d.is_empty()] fingerprint_comparison.core = fingerprint_comparison.core and not core_untrainable fingerprint_comparison.nlu = [ l for l in fingerprint_comparison.nlu if l not in nlu_untrainable ] if core_untrainable: print_color( "Skipping Core training since domain or stories are empty.", color=rasa.shared.utils.io.bcolors.OKBLUE, ) for lang in nlu_untrainable: print_color( "No NLU data found for language <{}>, skipping training...".format(lang), color=rasa.shared.utils.io.bcolors.OKBLUE, ) # </ bf mod if fingerprint_comparison.is_training_required(): async with telemetry.track_model_training(file_importer, model_type="rasa"): await _do_training( file_importer, output_path=output_path, train_path=train_path, fingerprint_comparison_result=fingerprint_comparison, fixed_model_name=fixed_model_name, persist_nlu_training_data=persist_nlu_training_data, core_additional_arguments=core_additional_arguments, nlu_additional_arguments=nlu_additional_arguments, old_model_zip_path=old_model, ) return model.package_model( fingerprint=new_fingerprint, output_directory=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success( "Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model)) ) return old_model