def train_rasa(): print('TRAIN RASA') cmd = ['npx chatito --format rasa data/'] p = call(cmd, shell=True, cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito')) convert_training_data(data_file=os.path.join( os.path.dirname(__file__), '../rasa/chatito/rasa_dataset_training.json'), out_file=os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md'), output_format="md", language="") train(domain=os.path.join(os.path.dirname(__file__), '../rasa/domain.yml'), config=os.path.join(os.path.dirname(__file__), '../rasa/config.yml'), training_files=[ os.path.join(os.path.dirname(__file__), '../rasa/data/nlu.md'), os.path.join(os.path.dirname(__file__), '../rasa/data/stories.md'), os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md') ], output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
def test_train_temp_files( tmp_path: Path, monkeypatch: MonkeyPatch, default_domain_path: Text, default_stories_file: Text, default_stack_config: Text, default_nlu_data: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") output = str(tmp_path / "models") train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], output=output, force_training=True, ) assert count_temp_rasa_files(tempfile.tempdir) == 0 # After training the model, try to do it again. This shouldn't try to train # a new model because nothing has been changed. It also shouldn't create # any temp files. train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], output=output, ) assert count_temp_rasa_files(tempfile.tempdir) == 0
def test_train_temp_files( move_tempdir, default_domain_path, default_stories_file, default_stack_config, default_nlu_data, ): train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], force_training=True, ) assert len(os.listdir(TEST_TEMP)) == 0 # After training the model, try to do it again. This shouldn't try to train # a new model because nothing has been changed. It also shouldn't create # any temp files. train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], ) assert len(os.listdir(TEST_TEMP)) == 0
def test_trained_interpreter_passed_to_core_training( monkeypatch: MonkeyPatch, tmp_path: Path, unpacked_trained_moodbot_path: Text ): # Skip actual NLU training and return trained interpreter path from fixture _train_nlu_with_validated_data = Mock(return_value=unpacked_trained_moodbot_path) # Patching is bit more complicated as we have a module `train` and function # with the same name 😬 monkeypatch.setattr( sys.modules["rasa.train"], "_train_nlu_with_validated_data", asyncio.coroutine(_train_nlu_with_validated_data), ) # Mock the actual Core training _train_core = Mock() monkeypatch.setattr(rasa.core, "train", asyncio.coroutine(_train_core)) train( DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_CONFIG_PATH, [DEFAULT_STORIES_FILE, DEFAULT_NLU_DATA], str(tmp_path), ) _train_core.assert_called_once() _, _, kwargs = _train_core.mock_calls[0] assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
def test_interpreter_of_old_model_passed_to_core_training( monkeypatch: MonkeyPatch, tmp_path: Path, trained_moodbot_path: Text ): # NLU isn't retrained monkeypatch.setattr( rasa.model.FingerprintComparisonResult, rasa.model.FingerprintComparisonResult.should_retrain_nlu.__name__, lambda _: False, ) # An old model with an interpreter exists monkeypatch.setattr( rasa.model, rasa.model.get_latest_model.__name__, lambda _: trained_moodbot_path ) # Mock the actual Core training _train_core = Mock() monkeypatch.setattr(rasa.core, "train", asyncio.coroutine(_train_core)) train( DEFAULT_DOMAIN_PATH_WITH_SLOTS, DEFAULT_CONFIG_PATH, [DEFAULT_STORIES_FILE, DEFAULT_NLU_DATA], str(tmp_path), ) _train_core.assert_called_once() _, _, kwargs = _train_core.mock_calls[0] assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
def test_e2e_gives_experimental_warning( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, default_domain_path: Text, default_stack_config: Text, default_e2e_stories_file: Text, default_nlu_data: Text, caplog: LogCaptureFixture, ): mock_nlu_training(monkeypatch) mock_core_training(monkeypatch) with caplog.at_level(logging.WARNING): train( default_domain_path, default_stack_config, [default_e2e_stories_file, default_nlu_data], output=new_model_path_in_same_dir(trained_e2e_model), ) assert any([ "The end-to-end training is currently experimental" in record.message for record in caplog.records ])
def test_model_finetuning_new_domain_label_stops_all_training( tmp_path: Path, monkeypatch: MonkeyPatch, trained_moodbot_path: Text, ): mocked_core_training = AsyncMock() mocked_nlu_training = AsyncMock() monkeypatch.setattr(rasa.core, rasa.core.train.__name__, mocked_core_training) monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_domain = rasa.shared.utils.io.read_yaml_file( "examples/moodbot/domain.yml") old_domain["intents"].append("a_new_one") new_domain_path = tmp_path / "new_domain.yml" rasa.shared.utils.io.write_yaml(old_domain, new_domain_path) with pytest.raises(SystemExit): train( domain=str(new_domain_path), config="examples/moodbot/config.yml", training_files=[ "examples/moodbot/data/stories.yml", "examples/moodbot/data/nlu.yml", ], output=output, model_to_finetune=trained_moodbot_path, ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called()
def test_model_finetuning_with_invalid_model( tmp_path: Path, monkeypatch: MonkeyPatch, default_domain_path: Text, default_stories_file: Text, default_stack_config: Text, default_nlu_data: Text, model_to_fine_tune: Text, capsys: CaptureFixture, ): mocked_nlu_training = AsyncMock(return_value="") monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training) mocked_core_training = AsyncMock() monkeypatch.setattr(rasa.core, rasa.core.train.__name__, mocked_core_training) (tmp_path / "models").mkdir() output = str(tmp_path / "models") with pytest.raises(SystemExit): train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], output=output, force_training=True, model_to_finetune=model_to_fine_tune, finetuning_epoch_fraction=1, ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called() output = capsys.readouterr().out assert "No NLU model for finetuning found" in output
def train_rasa(): """ generate and/or train the RASA model """ print('TRAIN RASA') if ARGS.generate: cmd = ['npx chatito --format rasa data/'] call(cmd, shell=True, cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito')) print('CONVERT TO RASA MD') convert_training_data(data_file=os.path.join( os.path.dirname(__file__), '../rasa/chatito/rasa_dataset_training.json'), out_file=os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md'), output_format="md", language="") print('DONE CONVERT TO RASA MD') if ARGS.train: train(domain=os.path.join(os.path.dirname(__file__), '../rasa/domain.yml'), config=os.path.join(os.path.dirname(__file__), '../rasa/config.yml'), training_files=[ os.path.join(os.path.dirname(__file__), '../rasa/data/nlu.md'), os.path.join(os.path.dirname(__file__), '../rasa/data/stories.md'), os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md') ], output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
def train_nlu_core_model() -> None: train( domain=DEFAULT_DOMAIN_PATH, config=DEFAULT_CONFIG_PATH, training_files=DEFAULT_DATA_PATH, fixed_model_name="restaurant_rasa_model", force_training=False, )
def test_retrains_only_core_if_new_e2e_example_seen_before( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, default_domain_path: Text, default_stack_config: Text, default_e2e_stories_file: Text, default_nlu_data: Text, tmp_path: Path, ): stories_yaml = rasa.shared.utils.io.read_yaml_file( default_e2e_stories_file) stories_yaml["stories"][1]["steps"].append({"user": "******"}) new_stories_file = new_stories_file = tmp_path / "new_stories.yml" rasa.shared.utils.io.write_yaml(stories_yaml, new_stories_file) mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) new_model_path = train( default_domain_path, default_stack_config, [new_stories_file, default_nlu_data], output=new_model_path_in_same_dir(trained_e2e_model), ).model os.remove(new_model_path) mocked_core_training.assert_called_once() mocked_nlu_training.assert_not_called()
def train_rasa(pipeline_name): if pipeline_name == config_file: pipeline_name = config_file else: pipeline_file = './pipelines/' + pipeline_name with CodeTimer() as timer: train( domain=domain_file, config=pipeline_file, # one of the decided pipelines #config=config_file, # standard config.yml file training_files=nlu_data, #output=output_path, force_training=True, fixed_model_name=pipeline_name) time = str(timer.took * 0.001) return time
def test_new_nlu_data_does_not_retrain_core_if_there_are_no_e2e_stories( self, monkeypatch: MonkeyPatch, trained_simple_rasa_model: Text, default_domain_path: Text, default_stack_config: Text, simple_stories_file: Text, default_nlu_data: Text, tmp_path: Path, ): nlu_yaml = rasa.shared.utils.io.read_yaml_file(default_nlu_data) nlu_yaml["nlu"][0]["examples"] += "- surprise!\n" new_nlu_file = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(nlu_yaml, new_nlu_file) mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) new_model_path = train( default_domain_path, default_stack_config, [simple_stories_file, new_nlu_file], output=new_model_path_in_same_dir(trained_simple_rasa_model), ).model os.remove(new_model_path) mocked_core_training.assert_not_called() mocked_nlu_training.assert_called_once()
def train_model_for_bot(bot: str): """ loads bot data from mongo into individual files for training :param bot: bot id :return: model path """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) rules = processor.get_rules_for_training(bot) directory = Utility.write_training_data(nlu, domain, config, stories, rules) output = os.path.join(DEFAULT_MODELS_PATH, bot) model = train( domain=os.path.join(directory, DEFAULT_DOMAIN_PATH), config=os.path.join(directory, DEFAULT_CONFIG_PATH), training_files=os.path.join(directory, DEFAULT_DATA_PATH), output=output, ) Utility.delete_directory(directory) del processor del nlu del domain del stories del config return model
def train_model_for_bot(bot: str): """ Trains the rasa model, using the data that is loaded onto Mongo, through the bot files """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) directory = Utility.save_files( nlu.nlu_as_markdown().encode(), domain.as_yaml().encode(), stories.as_story_string().encode(), yaml.dump(config).encode(), ) output = os.path.join(DEFAULT_MODELS_PATH, bot) model = train(domain=os.path.join(directory,DEFAULT_DOMAIN_PATH), config=os.path.join(directory,DEFAULT_CONFIG_PATH), training_files=os.path.join(directory,DEFAULT_DATA_PATH), output=output) Utility.delete_directory(directory) return model
def test_nlu_and_core_trained_if_no_nlu_data_but_e2e_stories( self, monkeypatch: MonkeyPatch, default_domain_path: Text, default_stack_config: Text, default_e2e_stories_file: Text, tmp_path: Path, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) output = self.make_tmp_model_dir(tmp_path) train( default_domain_path, default_stack_config, [default_e2e_stories_file], output=output, ) mocked_core_training.assert_called_once() mocked_nlu_training.assert_called_once()
def test_models_not_retrained_if_no_new_data( self, monkeypatch: MonkeyPatch, trained_e2e_model: Text, default_domain_path: Text, default_stack_config: Text, default_e2e_stories_file: Text, default_nlu_data: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mocked_core_training = mock_core_training(monkeypatch) train( default_domain_path, default_stack_config, [default_e2e_stories_file, default_nlu_data], output=new_model_path_in_same_dir(trained_e2e_model), ) mocked_core_training.assert_not_called() mocked_nlu_training.assert_not_called()
def test_model_finetuning( tmp_path: Path, monkeypatch: MonkeyPatch, default_domain_path: Text, default_stories_file: Text, default_stack_config: Text, default_nlu_data: Text, trained_rasa_model: Text, use_latest_model: bool, ): mocked_nlu_training = Mock(wraps=rasa.nlu.train) monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training) mocked_core_training = Mock(wraps=rasa.core.train) monkeypatch.setattr(rasa.core, rasa.core.train.__name__, mocked_core_training) (tmp_path / "models").mkdir() output = str(tmp_path / "models") if use_latest_model: trained_rasa_model = str(Path(trained_rasa_model).parent) train( default_domain_path, default_stack_config, [default_stories_file, default_nlu_data], output=output, force_training=True, model_to_finetune=trained_rasa_model, finetuning_epoch_fraction=0.1, ) mocked_core_training.assert_called_once() _, kwargs = mocked_core_training.call_args assert isinstance(kwargs["model_to_finetune"], Agent) mocked_nlu_training.assert_called_once() _, kwargs = mocked_nlu_training.call_args assert isinstance(kwargs["model_to_finetune"], Interpreter)
def train_model_for_bot(bot: str): """ loads bot data from mongo into individual files for training :param bot: bot id :return: model path """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) directory = Utility.save_files( nlu.nlu_as_markdown().encode(), domain.as_yaml().encode(), stories.as_story_string().encode(), yaml.dump(config).encode(), ) output = os.path.join(DEFAULT_MODELS_PATH, bot) model = train( domain=os.path.join(directory, DEFAULT_DOMAIN_PATH), config=os.path.join(directory, DEFAULT_CONFIG_PATH), training_files=os.path.join(directory, DEFAULT_DATA_PATH), output=output, ) Utility.delete_directory(directory) del processor del nlu del domain del stories del config return model