def test_model_finetuning_nlu_new_label_to_domain_only( tmp_path: Path, monkeypatch: MonkeyPatch, trained_nlu_moodbot_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_domain = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/domain.yml") old_domain["intents"].append("a_new_one") new_domain_path = tmp_path / "new_domain.yml" rasa.shared.utils.io.write_yaml(old_domain, new_domain_path) train_nlu( "data/test_moodbot/config.yml", "data/test_moodbot/data/nlu.yml", domain=str(new_domain_path), output=output, model_to_finetune=trained_nlu_moodbot_path, ) mocked_nlu_training.assert_called()
def test_model_finetuning_nlu_new_label_already_in_domain( tmp_path: Path, monkeypatch: MonkeyPatch, trained_rasa_model: Text, nlu_data_path: Text, config_path: Text, domain_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_nlu = rasa.shared.utils.io.read_yaml_file(nlu_data_path) # This intent exists in `domain_path` but not yet in the nlu data old_nlu["nlu"].append({"intent": "why", "examples": "whyy??"}) new_nlu_path = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path) with pytest.raises(SystemExit): train_nlu( config_path, str(new_nlu_path), domain=domain_path, output=output, model_to_finetune=trained_rasa_model, ) mocked_nlu_training.assert_not_called()
def test_model_finetuning_with_invalid_model_nlu( tmp_path: Path, monkeypatch: MonkeyPatch, domain_path: Text, stack_config_path: Text, nlu_data_path: Text, model_to_fine_tune: Text, capsys: CaptureFixture, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") with pytest.raises(SystemExit): train_nlu( stack_config_path, nlu_data_path, domain=domain_path, output=output, model_to_finetune=model_to_fine_tune, finetuning_epoch_fraction=1, ) mocked_nlu_training.assert_not_called() assert "No NLU model for finetuning found" in capsys.readouterr().out
def test_model_finetuning_nlu_new_entity( tmp_path: Path, monkeypatch: MonkeyPatch, trained_nlu_moodbot_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_nlu = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/data/nlu.yml") old_nlu["nlu"][-1]["examples"] = "-[blah](something)" new_nlu_path = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path) with pytest.raises(SystemExit): train_nlu( "data/test_moodbot/config.yml", str(new_nlu_path), domain="data/test_moodbot/domain.yml", output=output, model_to_finetune=trained_nlu_moodbot_path, ) mocked_nlu_training.assert_not_called()
def test_train_nlu_autoconfig( tmp_path: Path, monkeypatch: MonkeyPatch, stack_config_path: Text, nlu_data_path: Text, ): monkeypatch.setattr(tempfile, "tempdir", tmp_path) # mock function that returns configuration mocked_get_configuration = Mock() monkeypatch.setattr(autoconfig, "get_configuration", mocked_get_configuration) monkeypatch.setattr(rasa.model_training, "_train_nlu_with_validated_data", AsyncMock()) # do training train_nlu( stack_config_path, nlu_data_path, output="test_train_nlu_temp_files_models", ) mocked_get_configuration.assert_called_once() _, args, _ = mocked_get_configuration.mock_calls[0] assert args[1] == autoconfig.TrainingType.NLU
def test_model_finetuning_nlu_with_default_epochs( tmp_path: Path, monkeypatch: MonkeyPatch, trained_nlu_moodbot_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") # Providing a new config with no epochs will mean the default amount are used # and then scaled by `finetuning_epoch_fraction`. old_config = rasa.shared.utils.io.read_yaml_file("data/test_moodbot/config.yml") del old_config["pipeline"][-1][EPOCHS] new_config_path = tmp_path / "new_config.yml" rasa.shared.utils.io.write_yaml(old_config, new_config_path) train_nlu( str(new_config_path), "data/test_moodbot/data/nlu.yml", output=output, model_to_finetune=trained_nlu_moodbot_path, finetuning_epoch_fraction=0.1, ) mocked_nlu_training.assert_called_once() _, nlu_train_kwargs = mocked_nlu_training.call_args model_to_finetune = nlu_train_kwargs["model_to_finetune"] new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][-1] assert new_diet_metadata["name"] == "DIETClassifier" assert new_diet_metadata[EPOCHS] == DIETClassifier.defaults[EPOCHS] * 0.1
def test_model_finetuning_nlu_new_label( tmp_path: Path, monkeypatch: MonkeyPatch, trained_nlu_moodbot_path: Text, ): mocked_nlu_training = mock_nlu_training(monkeypatch) (tmp_path / "models").mkdir() output = str(tmp_path / "models") old_nlu = rasa.shared.utils.io.read_yaml_file( "examples/moodbot/data/nlu.yml") old_nlu["nlu"].append({"intent": "a_new_one", "examples": "-blah"}) new_nlu_path = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path) with pytest.raises(SystemExit): train_nlu( "examples/moodbot/config.yml", str(new_nlu_path), domain="examples/moodbot/domain.yml", output=output, model_to_finetune=trained_nlu_moodbot_path, ) mocked_nlu_training.assert_not_called()
def test_model_finetuning_nlu( tmp_path: Path, monkeypatch: MonkeyPatch, trained_nlu_moodbot_path: Text, use_latest_model: bool, ): mocked_nlu_training = mock_nlu_training(monkeypatch) mock_interpreter_create = Mock(wraps=Interpreter.create) monkeypatch.setattr(Interpreter, "create", mock_interpreter_create) mock_DIET_load = Mock(wraps=DIETClassifier.load) monkeypatch.setattr(DIETClassifier, "load", mock_DIET_load) (tmp_path / "models").mkdir() output = str(tmp_path / "models") if use_latest_model: trained_nlu_moodbot_path = str(Path(trained_nlu_moodbot_path).parent) # Typically models will be fine-tuned with a smaller number of epochs than training # from scratch. # Fine-tuning will use the number of epochs in the new config. old_config = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/config.yml") old_config["pipeline"][-1][EPOCHS] = 10 new_config_path = tmp_path / "new_config.yml" rasa.shared.utils.io.write_yaml(old_config, new_config_path) old_nlu = rasa.shared.utils.io.read_yaml_file( "data/test_moodbot/data/nlu.yml") old_nlu["nlu"][-1]["examples"] = "-something else" new_nlu_path = tmp_path / "new_nlu.yml" rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path) train_nlu( str(new_config_path), str(new_nlu_path), domain="data/test_moodbot/domain.yml", output=output, model_to_finetune=trained_nlu_moodbot_path, finetuning_epoch_fraction=0.2, ) assert mock_interpreter_create.call_args[1]["should_finetune"] mocked_nlu_training.assert_called_once() _, nlu_train_kwargs = mocked_nlu_training.call_args model_to_finetune = nlu_train_kwargs["model_to_finetune"] assert isinstance(model_to_finetune, Interpreter) _, diet_kwargs = mock_DIET_load.call_args assert diet_kwargs["should_finetune"] is True new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][ -1] assert new_diet_metadata["name"] == "DIETClassifier" assert new_diet_metadata[EPOCHS] == 2
def test_warn_on_dense_features(): msg = "Dense features are being computed but not used in the SparseNaiveBayesIntentClassifier." with pytest.warns(UserWarning) as record: train_nlu( nlu_data=NLU_DATA_PATH, config= "tests/configs/sparse-dense-naive-bayes-intent-classifier-config.yml", output="models", ) assert any([str(w.message) == msg for w in record.list])
def test_train_nlu_with_responses_no_domain_warns(tmp_path: Path): data_path = "data/test_nlu_no_responses/nlu_no_responses.yml" with pytest.warns(UserWarning) as records: train_nlu( "data/test_config/config_response_selector_minimal.yml", data_path, output=str(tmp_path / "models"), ) assert any("You either need to add a response phrase or correct the intent" in record.message.args[0] for record in records)
def test_train_nlu_temp_files( tmp_path: Path, monkeypatch: MonkeyPatch, stack_config_path: Text, nlu_data_path: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") train_nlu(stack_config_path, nlu_data_path, output=str(tmp_path / "models")) assert count_temp_rasa_files(tempfile.tempdir) == 0
def test_train_nlu_no_nlu_file_error_message( capsys: CaptureFixture, tmp_path: Path, monkeypatch: MonkeyPatch, stack_config_path: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") train_nlu(stack_config_path, "", output=str(tmp_path / "models")) captured = capsys.readouterr() assert "No NLU data given" in captured.out
def _train_nlu(*args: Any, output_path: Optional[Text] = None, **kwargs: Any) -> Optional[Text]: if output_path is None: output_path = str(tmp_path_factory.mktemp("models")) return train_nlu(*args, output=output_path, **kwargs)
def test_predict(): model_path = train_nlu( nlu_data=NLU_DATA_PATH, config="tests/configs/sparse-naive-bayes-intent-classifier-config.yml", output="models", ) interpreter = load_interpreter(model_path) # Get features from the pipeline and prepare data in the format sklearn # expects. training_data = load_data(NLU_DATA_PATH) for example in training_data.intent_examples: interpreter.featurize_message(example) model = interpreter.interpreter.pipeline[-1] X, y = model.prepare_data(training_data) # Fit the equivalent sklearn classifier. from sklearn.naive_bayes import BernoulliNB clf = BernoulliNB(alpha=0.1, binarize=0.0, fit_prior=True) clf.fit(X, y) # Check that predictions agree. assert (clf.predict_proba(X) == model.predict_prob(X)).all() assert (clf.predict(X) == model.predict(X)[0][:, 0]).all()
def run_nlu_training(args: argparse.Namespace) -> Optional[Text]: """Trains an NLU model. Args: args: Namespace arguments. Returns: Path to a trained model or `None` if training was not successful. """ from rasa.model_training import train_nlu config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS_NLU) nlu_data = rasa.cli.utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH, none_is_valid=True) if args.domain: args.domain = rasa.cli.utils.get_validated_path(args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True) return train_nlu( config=config, nlu_data=nlu_data, output=args.out, fixed_model_name=args.fixed_model_name, persist_nlu_training_data=args.persist_nlu_data, additional_arguments=extract_nlu_additional_arguments(args), domain=args.domain, model_to_finetune=_model_for_finetuning(args), finetuning_epoch_fraction=args.epoch_fraction, )
def test_train_nlu_wrong_format_error_message( capsys: CaptureFixture, tmp_path: Path, monkeypatch: MonkeyPatch, stack_config_path: Text, incorrect_nlu_data_path: Text, ): (tmp_path / "training").mkdir() (tmp_path / "models").mkdir() monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training") train_nlu( stack_config_path, incorrect_nlu_data_path, output=str(tmp_path / "models") ) captured = capsys.readouterr() assert "Please verify the data format" in captured.out
def test_run_train_test_command_english(fp): if "flashtext" in fp: nlu_data = "tests/data/nlu/en/nlu_w_lookups.md" else: nlu_data = "tests/data/nlu/en/nlu.md" mod = train_nlu( nlu_data=nlu_data, config=f"tests/configs/{fp}", output="models", ) run_nlu(model=f"models/{mod}", nlu_data="tests/data/nlu/en/nlu.md")
def test_run_train_test_command_non_english(fp, nlu): mod = train_nlu(nlu_data=nlu, config=f"tests/configs/{fp}", output="models") run_nlu(model=f"models/{mod}", nlu_data=nlu)