def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train _train_path = train_path or tempfile.mkdtemp() print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train( config, nlu_data_directory, _train_path, fixed_model_name="nlu" ) print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
def trained_nlu_model(request): cfg = RasaNLUModelConfig({"pipeline": "keyword"}) trainer = Trainer(cfg) td = training_data.load_data(DEFAULT_DATA_PATH) trainer.train(td) model_path = trainer.persist(NLU_MODEL_PATH) nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH) output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME) new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH, nlu_data=nlu_data) model.create_package_rasa(model_path, output_path, new_fingerprint) def fin(): if os.path.exists(NLU_MODEL_PATH): shutil.rmtree(NLU_MODEL_PATH) if os.path.exists(output_path): shutil.rmtree(output_path) request.addfinalizer(fin) return output_path
def _core_model_for_finetuning( model_to_finetune: Text, file_importer: TrainingDataImporter, finetuning_epoch_fraction: float = 1.0, ) -> Optional[Agent]: path_to_archive = model.get_model_for_finetuning(model_to_finetune) if not path_to_archive: return None rasa.shared.utils.cli.print_info( f"Loading Core model from {path_to_archive} for finetuning...", ) with model.unpack_model(path_to_archive) as unpacked: new_fingerprint = model.model_fingerprint(file_importer) old_fingerprint = model.fingerprint_from_path(unpacked) if not model.can_finetune(old_fingerprint, new_fingerprint, core=True): rasa.shared.utils.cli.print_error_and_exit( "Core model can not be finetuned.") config = file_importer.get_config() agent = Agent.load( unpacked, new_config=config, finetuning_epoch_fraction=finetuning_epoch_fraction, ) # Agent might be empty if no underlying Core model was found. if agent.domain is not None and agent.policy_ensemble is not None: return agent return None
def _nlu_model_for_finetuning( model_to_finetune: Text, file_importer: TrainingDataImporter, finetuning_epoch_fraction: float = 1.0, called_from_combined_training: bool = False, ) -> Optional[Interpreter]: path_to_archive = model.get_model_for_finetuning(model_to_finetune) if not path_to_archive: return None rasa.shared.utils.cli.print_info( f"Loading NLU model from {path_to_archive} for finetuning...", ) with model.unpack_model(path_to_archive) as unpacked: _, old_nlu = model.get_model_subdirectories(unpacked) new_fingerprint = model.model_fingerprint(file_importer) old_fingerprint = model.fingerprint_from_path(unpacked) if not model.can_finetune( old_fingerprint, new_fingerprint, nlu=True, core=called_from_combined_training, ): rasa.shared.utils.cli.print_error_and_exit( "NLU model can not be finetuned.") config = file_importer.get_config() loaded_model_to_finetune = Interpreter.load( old_nlu, new_config=config, finetuning_epoch_fraction=finetuning_epoch_fraction, ) if not loaded_model_to_finetune: return None return loaded_model_to_finetune
def train_nlu(config: Text, nlu_data: Text, output: Text, train_path: Optional[Text]) -> Optional["Interpreter"]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa_nlu _train_path = train_path or tempfile.mkdtemp() _, nlu_model, _ = rasa_nlu.train(config, nlu_data, _train_path, project="", fixed_model_name="nlu") if not train_path: nlu_data = data.get_nlu_directory(nlu_data) output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success("Your Rasa NLU model is trained and saved at '{}'." "".format(output_path)) return nlu_model
async def train(request): # if set will not generate a model name but use the passed one model_name = request.args.get("model", None) try: model_config, data_dict = extract_data_and_config(request) except Exception as e: logger.debug(traceback.format_exc()) raise ErrorResponse( 500, "ServerError", "An unexpected error occurred.", details={"error": str(e)}, ) data_file = dump_to_data_file(data_dict) config_file = dump_to_data_file(model_config, "_config") try: path_to_model = await data_router.start_train_process( data_file, RasaNLUModelConfig(model_config), model_name) # store trained model as tar.gz file output_path = create_model_path(model_name, path_to_model) nlu_data = data.get_nlu_directory(data_file) new_fingerprint = model.model_fingerprint(config_file, nlu_data=nlu_data) model.create_package_rasa(path_to_model, output_path, new_fingerprint) logger.info("Rasa NLU model trained and persisted to '{}'.".format( output_path)) await data_router.load_model(output_path) return await response.file(output_path) except MaxWorkerProcessError as e: raise ErrorResponse( 403, "NoFreeProcess", "No process available for training.", details={"error": str(e)}, ) except InvalidModelError as e: raise ErrorResponse( 404, "ModelNotFound", "Model '{}' not found.".format(model_name), details={"error": str(e)}, ) except TrainingException as e: logger.debug(traceback.format_exc()) raise ErrorResponse( 500, "ServerError", "An unexpected error occurred.", details={"error": str(e)}, )
def test_can_finetune_min_version( project: Text, monkeypatch: MonkeyPatch, old_model_version: Text, min_compatible_version: Text, can_tune: bool, ): importer = _project_files(project) monkeypatch.setattr(rasa.constants, "MINIMUM_COMPATIBLE_VERSION", min_compatible_version) monkeypatch.setattr(rasa, "__version__", old_model_version) old_fingerprint = model_fingerprint(importer) new_fingerprint = model_fingerprint(importer) with mock.patch("rasa.model.MINIMUM_COMPATIBLE_VERSION", min_compatible_version): assert can_finetune(old_fingerprint, new_fingerprint) == can_tune
def test_fingerprinting_additional_action(project: Text): importer = _project_files(project) old_fingerprint = model_fingerprint(importer) old_domain = importer.get_domain() domain_with_new_action = old_domain.as_dict() domain_with_new_action[KEY_RESPONSES]["utter_new"] = [{"text": "hi"}] domain_with_new_action = Domain.from_dict(domain_with_new_action) importer.get_domain = lambda: domain_with_new_action new_fingerprint = model_fingerprint(importer) assert (old_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY] != new_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY]) assert old_fingerprint[FINGERPRINT_NLG_KEY] != new_fingerprint[ FINGERPRINT_NLG_KEY]
def _train_nlu_with_validated_data( config: Dict[Text, Text], nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, retrain_nlu: Union[bool, List[Text]] = True ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train import re with ExitStack() as stack: models = {} from rasa.nlu import config as cfg_loader if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp())) pattern = r'(\w\w)*(?=\.)' for file in os.listdir(nlu_data_directory): lang = re.search(pattern, file).groups()[0] if isinstance(retrain_nlu, bool) and retrain_nlu or lang in retrain_nlu: nlu_file_path = os.path.join(nlu_data_directory, file) print_color("Start training {} NLU model ...".format(lang), color=bcolors.OKBLUE) nlu_config = cfg_loader.load(config[lang]) nlu_config.language = lang _, models[lang], _ = rasa.nlu.train( nlu_config, nlu_file_path, _train_path, fixed_model_name="nlu-{}".format(lang) ) else: print_color("{} NLU data didn't change, skipping training...".format(lang), color=bcolors.OKBLUE) print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint( config, nlu_data=nlu_data_directory ) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
async def train_comparison_models( story_file: Text, domain: Text, output_path: Text = "", exclusion_percentages: Optional[List] = None, policy_configs: Optional[List] = None, runs: int = 1, dump_stories: bool = False, additional_arguments: Optional[Dict] = None, ): """Train multiple models for comparison of policies""" from rasa import model from rasa.importers.importer import TrainingDataImporter exclusion_percentages = exclusion_percentages or [] policy_configs = policy_configs or [] for r in range(runs): logging.info("Starting run {}/{}".format(r + 1, runs)) for current_run, percentage in enumerate(exclusion_percentages, 1): for policy_config in policy_configs: file_importer = TrainingDataImporter.load_core_importer_from_config( policy_config, domain, [story_file]) config_name = os.path.splitext( os.path.basename(policy_config))[0] logging.info("Starting to train {} round {}/{}" " with {}% exclusion" "".format(config_name, current_run, len(exclusion_percentages), percentage)) with TempDirectoryPath(tempfile.mkdtemp()) as train_path: _, new_fingerprint = await asyncio.gather( train( domain, file_importer, train_path, policy_config=policy_config, exclusion_percentage=percentage, additional_arguments=additional_arguments, dump_stories=dump_stories, ), model.model_fingerprint(file_importer), ) output_dir = os.path.join(output_path, "run_" + str(r + 1)) model_name = config_name + PERCENTAGE_KEY + str(percentage) model.package_model( fingerprint=new_fingerprint, output_directory=output_dir, train_path=train_path, fixed_model_name=model_name, )
def train_nlu(config: Text, nlu_data: Text, output: Text, train_path: Optional[Text]) -> Optional[Text]: """Trains a NLU model. Args: config: Path to the config file for NLU. nlu_data: Path to the NLU training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.nlu.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU) if not train_path: # training NLU only hence the training files still have to be selected skill_imports = SkillSelector.load(config) nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports) else: nlu_data_directory = nlu_data if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model.") return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if not train_path: output_path = create_output_path(output, prefix="nlu-") new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
def test_fingerprinting_changed_response_text(project: Text): importer = _project_files(project) old_fingerprint = model_fingerprint(importer) old_domain = importer.get_domain() # Change NLG content but keep actions the same domain_with_changed_nlg = old_domain.as_dict() domain_with_changed_nlg[KEY_RESPONSES]["utter_greet"].append( {"text": "hi"}) domain_with_changed_nlg = Domain.from_dict(domain_with_changed_nlg) importer.get_domain = lambda: domain_with_changed_nlg new_fingerprint = model_fingerprint(importer) assert (old_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY] == new_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY]) assert old_fingerprint[FINGERPRINT_NLG_KEY] != new_fingerprint[ FINGERPRINT_NLG_KEY]
async def _train_core_with_validated_data( domain: Domain, config: Text, story_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train if not os.listdir(story_directory): print_error( "No dialogue data given. Please provide dialogue data in order to " "train a Rasa Core model.") return _train_path = train_path or tempfile.mkdtemp() # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only Core was trained. output_path = create_output_path(output, prefix="core-", fixed_name=fixed_model_name) new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success( "Your Rasa Core model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
def test_create_fingerprint_from_invalid_paths(project, project_files): project_files = _project_files(project, *project_files) expected = _fingerprint([], [], rasa_version=rasa.__version__, stories=[], nlu=[]) actual = model_fingerprint(**project_files) assert actual[FINGERPRINT_TRAINED_AT_KEY] is not None del actual[FINGERPRINT_TRAINED_AT_KEY] del expected[FINGERPRINT_TRAINED_AT_KEY] assert actual == expected
async def train_core_async( domain: Text, config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.core.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE) _train_path = train_path or tempfile.mkdtemp() # normal (not compare) training core_model = await rasa.core.train( domain_file=domain, stories_file=data.get_core_directory(stories), output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) if not train_path: # Only Core was trained. stories = data.get_core_directory(stories) output_path = create_output_path(output, prefix="core-") new_fingerprint = model.model_fingerprint(config, domain, stories=stories) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa Core model is trained and saved at '{}'.".format( output_path)) return core_model
async def _train_core_with_validated_data( domain: Domain, config: Text, story_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) # normal (not compare) training print_color("Training Core model...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Core model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train if not os.listdir(nlu_data_directory): print_error( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model.") return _train_path = train_path or tempfile.mkdtemp() print_color("Start training NLU model ...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only NLU was trained output_path = create_output_path(output, prefix="nlu-", fixed_name=fixed_model_name) new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success( "Your Rasa NLU model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path
async def _train_core_with_validated_data( domain: Domain, config: Text, story_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Train Core with validated training and config data.""" import rasa.core.train _train_path = train_path or tempfile.mkdtemp() # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if train_path is None: # Only Core was trained. new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="core-", uncompress=uncompress, ) return _train_path
def _train_nlu_with_validated_data( config: Text, nlu_data_directory: Text, output: Text, train_path: Optional[Text] = None, fixed_model_name: Optional[Text] = None, ) -> Optional[Text]: """Train NLU with validated training and config data.""" import rasa.nlu.train with ExitStack() as stack: if train_path: # If the train path was provided, do nothing on exit. _train_path = train_path else: # Otherwise, create a temp train path and clean it up on exit. _train_path = stack.enter_context( TempDirectoryPath(tempfile.mkdtemp())) print_color("Training NLU model...", color=bcolors.OKBLUE) _, nlu_model, _ = rasa.nlu.train(config, nlu_data_directory, _train_path, fixed_model_name="nlu") print_color("NLU model training completed.", color=bcolors.OKBLUE) if train_path is None: # Only NLU was trained new_fingerprint = model.model_fingerprint( config, nlu_data=nlu_data_directory) return _package_model( new_fingerprint=new_fingerprint, output_path=output, train_path=_train_path, fixed_model_name=fixed_model_name, model_prefix="nlu-", ) return _train_path
def test_should_not_retrain_core(domain_path: Text, tmp_path: Path, stack_config_path: Text): # Don't use `stories_path` as checkpoints currently break fingerprinting story_file = tmp_path / "simple_story.yml" story_file.write_text(""" stories: - story: test_story steps: - intent: greet - action: utter_greet """) trained_model = train_core(domain_path, stack_config_path, str(story_file), str(tmp_path)) importer = TrainingDataImporter.load_from_config( stack_config_path, domain_path, training_data_paths=[str(story_file)]) new_fingerprint = model.model_fingerprint(importer) result = model.should_retrain(new_fingerprint, trained_model, tmp_path) assert not result.should_retrain_core()
def test_rasa_packaging(trained_model, project, use_fingerprint): unpacked_model_path = get_model(trained_model) os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH)) if use_fingerprint: fingerprint = model_fingerprint(**_project_files(project)) else: fingerprint = None tempdir = tempfile.mkdtemp() output_path = os.path.join(tempdir, "test.tar.gz") create_package_rasa(unpacked_model_path, output_path, fingerprint) unpacked = get_model(output_path) assert (os.path.exists(os.path.join( unpacked, FINGERPRINT_FILE_PATH)) == use_fingerprint) assert os.path.exists(os.path.join(unpacked, "core")) assert os.path.exists(os.path.join(unpacked, "nlu")) assert not os.path.exists(unpacked_model_path)
async def train_core_async(domain: Text, config: Text, stories: Text, output: Text, train_path: Optional[Text]) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa_core.train # normal (not compare) training core_model = await rasa_core.train(domain_file=domain, stories_file=stories, output_path=os.path.join( train_path, "core"), policy_config=config) if not train_path: # Only Core was trained. stories = data.get_core_directory(stories) output_path = create_output_path(output, prefix="core-") new_fingerprint = model.model_fingerprint(config, domain, stories=stories) model.create_package_rasa(train_path, output_path, new_fingerprint) print_success("Your Rasa Core model is trained and saved at '{}'." "".format(output_path)) return core_model
def test_rasa_packaging(trained_rasa_model: Text, project: Text, use_fingerprint: bool, tmp_path: Path): unpacked_model_path = get_model(trained_rasa_model) os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH)) if use_fingerprint: fingerprint = model_fingerprint(_project_files(project)) else: fingerprint = None output_path = str(tmp_path / "test.tar.gz") create_package_rasa(unpacked_model_path, output_path, fingerprint) unpacked = get_model(output_path) assert (os.path.exists(os.path.join( unpacked, FINGERPRINT_FILE_PATH)) == use_fingerprint) assert os.path.exists( os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME)) assert os.path.exists(os.path.join(unpacked, "nlu")) assert not os.path.exists(unpacked_model_path)
def test_create_fingerprint_from_paths(project): project_files = _project_files(project) assert model_fingerprint(**project_files)
async def train_async(domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. Returns: Path of the trained model archive. """ train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async(domain, config, story_directory, output, train_path) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print("NLU data / configuration did not change. " "No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print("Train path: '{}'.".format(train_path)) print_success("Your bot is trained and ready to take for a spin!") return output else: print("Nothing changed. You can use the old model stored at {}" "".format(os.path.abspath(old_model))) return old_model
async def _train_async_internal( domain: Union[Domain, Text], config: Text, train_path: Text, nlu_data_directory: Text, story_directory: Text, output_path: Text, force_training: bool, fixed_model_name: Optional[Text], kwargs: Optional[Dict], ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Use only from `train_async`. Args: domain: Path to the domain file. config: Path to the config for Core and NLU. train_path: Directory in which to train the model. nlu_data_directory: Path to NLU training files. story_directory: Path to Core training files. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_async( domain: Text, config: Text, training_files: Union[Text, List[Text]], output: Text = DEFAULT_MODELS_PATH, force_training: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output: Output path. force_training: If `True` retrain model even if data has not changed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output) retrain_core = True retrain_nlu = True story_directory, nlu_data_directory = data.get_core_nlu_directories(training_files) new_fingerprint = model.model_fingerprint( config, domain, nlu_data_directory, story_directory ) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model." ) return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained." ) return train_nlu(config, nlu_data_directory, output, None) if nlu_data_not_present: print_warning("No NLU data present. Just a Rasa Core model will be trained.") return await train_core_async( domain, config, story_directory, output, None, kwargs ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await train_core_async( domain, config, story_directory, output, train_path, kwargs ) else: print ( "Dialogue data / configuration did not change. " "No need to retrain dialogue model." ) if force_training or retrain_nlu: train_nlu(config, nlu_data_directory, output, train_path) else: print ("NLU data / configuration did not change. No need to retrain NLU model.") if retrain_core or retrain_nlu: output = create_output_path(output) model.create_package_rasa(train_path, output, new_fingerprint) print_success("Your bot is trained and ready to take for a spin!") return output else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model)) ) return old_model
async def train_async( domain: Optional, config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() old_model = model.get_latest_model(output_path) retrain_core = True retrain_nlu = True skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error(e) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide dialogue and NLU data in " "order to train a Rasa model.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) if not force_training and old_model: unpacked = model.unpack_model(old_model) old_core, old_nlu = model.get_model_subdirectories(unpacked) last_fingerprint = model.fingerprint_from_path(unpacked) if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "core") retrain_core = not model.merge_model(old_core, target_path) if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint): target_path = os.path.join(train_path, "nlu") retrain_nlu = not model.merge_model(old_nlu, target_path) if force_training or retrain_core: await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) else: print("Dialogue data / configuration did not change. " "No need to retrain dialogue model.") if force_training or retrain_nlu: _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) else: print( "NLU data / configuration did not change. No need to retrain NLU model." ) if retrain_core or retrain_nlu: output_path = create_output_path(output_path, fixed_name=fixed_model_name) model.create_package_rasa(train_path, output_path, new_fingerprint) if uncompress: output_path = decompress(output_path) print_success("Your Rasa model is trained and saved at '{}'.".format( output_path)) return output_path else: print_success( "Nothing changed. You can use the old model stored at '{}'" "".format(os.path.abspath(old_model))) return old_model
async def train_async( domain: Union[Domain, Text], config: Text, training_files: Optional[Union[Text, List[Text]]], output_path: Text = DEFAULT_MODELS_PATH, force_training: bool = False, fixed_model_name: Optional[Text] = None, uncompress: bool = False, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Rasa model (Core and NLU). Args: domain: Path to the domain file. config: Path to the config for Core and NLU. training_files: Paths to the training data for Core and NLU. output_path: Output path. force_training: If `True` retrain model even if data has not changed. fixed_model_name: Name of model to be stored. uncompress: If `True` the model will not be compressed. kwargs: Additional training parameters. Returns: Path of the trained model archive. """ config = _get_valid_config(config, CONFIG_MANDATORY_KEYS) train_path = tempfile.mkdtemp() skill_imports = SkillSelector.load(config) try: domain = Domain.load(domain, skill_imports) except InvalidDomain as e: print_error( "Could not load domain due to: '{}'. To specify a valid domain path use " "the '--domain' argument.".format(e)) return None story_directory, nlu_data_directory = data.get_core_nlu_directories( training_files, skill_imports) new_fingerprint = model.model_fingerprint(config, domain, nlu_data_directory, story_directory) dialogue_data_not_present = not os.listdir(story_directory) nlu_data_not_present = not os.listdir(nlu_data_directory) if dialogue_data_not_present and nlu_data_not_present: print_error( "No training data given. Please provide stories and NLU data in " "order to train a Rasa model using the '--data' argument.") return if dialogue_data_not_present: print_warning( "No dialogue data present. Just a Rasa NLU model will be trained.") return _train_nlu_with_validated_data( config=config, nlu_data_directory=nlu_data_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) if nlu_data_not_present: print_warning( "No NLU data present. Just a Rasa Core model will be trained.") return await _train_core_with_validated_data( domain=domain, config=config, story_directory=story_directory, output=output_path, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) old_model = model.get_latest_model(output_path) retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model, train_path) if force_training or retrain_core or retrain_nlu: await _do_training( domain=domain, config=config, output_path=output_path, train_path=train_path, nlu_data_directory=nlu_data_directory, story_directory=story_directory, force_training=force_training, retrain_core=retrain_core, retrain_nlu=retrain_nlu, fixed_model_name=fixed_model_name, uncompress=uncompress, kwargs=kwargs, ) return _package_model( new_fingerprint=new_fingerprint, output_path=output_path, train_path=train_path, fixed_model_name=fixed_model_name, uncompress=uncompress, ) print_success("Nothing changed. You can use the old model stored at '{}'." "".format(os.path.abspath(old_model))) return old_model
async def train_core_async( domain: Union[Domain, Text], config: Text, stories: Text, output: Text, train_path: Optional[Text] = None, kwargs: Optional[Dict] = None, ) -> Optional[Text]: """Trains a Core model. Args: domain: Path to the domain file. config: Path to the config file for Core. stories: Path to the Core training data. output: Output path. train_path: If `None` the model will be trained in a temporary directory, otherwise in the provided directory. kwargs: Additional training parameters. Returns: If `train_path` is given it returns the path to the model archive, otherwise the path to the directory with the trained model files. """ import rasa.core.train config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE) _train_path = train_path or tempfile.mkdtemp() if isinstance(Domain, str) or not train_path: skill_imports = SkillSelector.load(config) domain = Domain.load(domain, skill_imports) story_directory = data.get_core_directory(stories, skill_imports) else: story_directory = stories if not os.listdir(story_directory): print_error( "No dialogue data given. Please provide dialogue data in order to " "train a Rasa Core model.") return # normal (not compare) training print_color("Start training dialogue model ...", color=bcolors.OKBLUE) await rasa.core.train( domain_file=domain, stories_file=story_directory, output_path=os.path.join(_train_path, "core"), policy_config=config, kwargs=kwargs, ) print_color("Done.", color=bcolors.OKBLUE) if not train_path: # Only Core was trained. output_path = create_output_path(output, prefix="core-") new_fingerprint = model.model_fingerprint(config, domain, stories=story_directory) model.create_package_rasa(_train_path, output_path, new_fingerprint) print_success( "Your Rasa Core model is trained and saved at '{}'.".format( output_path)) return output_path return _train_path