示例#1
0
def test_model_finetuning_nlu_new_label_already_in_domain(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_rasa_model: Text,
    default_nlu_data: Text,
    default_config_path: Text,
    default_domain_path: Text,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    old_nlu = rasa.shared.utils.io.read_yaml_file(default_nlu_data)
    # This intent exists in `default_domain_path` but not yet in the nlu data
    old_nlu["nlu"].append({"intent": "why", "examples": "whyy??"})
    new_nlu_path = tmp_path / "new_nlu.yml"
    rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path)

    with pytest.raises(SystemExit):
        train_nlu(
            default_config_path,
            str(new_nlu_path),
            domain=default_domain_path,
            output=output,
            model_to_finetune=trained_rasa_model,
        )

    mocked_nlu_training.assert_not_called()
示例#2
0
def test_model_finetuning_nlu_new_entity(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_nlu_moodbot_path: Text,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    old_nlu = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/data/nlu.yml")
    old_nlu["nlu"][-1]["examples"] = "-[blah](something)"
    new_nlu_path = tmp_path / "new_nlu.yml"
    rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path)

    with pytest.raises(SystemExit):
        train_nlu(
            "examples/moodbot/config.yml",
            str(new_nlu_path),
            domain="examples/moodbot/domain.yml",
            output=output,
            model_to_finetune=trained_nlu_moodbot_path,
        )

    mocked_nlu_training.assert_not_called()
def train_nlu_model() -> None:
    train_nlu(
        "config.yml",
        DEFAULT_DATA_PATH,
        DEFAULT_MODELS_PATH,
        fixed_model_name="restaurant-nlu-model",
    )
示例#4
0
def test_model_finetuning_nlu_new_label_to_domain_only(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_nlu_moodbot_path: Text,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    old_domain = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/domain.yml")
    old_domain["intents"].append("a_new_one")
    new_domain_path = tmp_path / "new_domain.yml"
    rasa.shared.utils.io.write_yaml(old_domain, new_domain_path)

    train_nlu(
        "examples/moodbot/config.yml",
        "examples/moodbot/data/nlu.yml",
        domain=str(new_domain_path),
        output=output,
        model_to_finetune=trained_nlu_moodbot_path,
    )

    mocked_nlu_training.assert_called()
示例#5
0
def test_train_nlu_autoconfig(
    tmp_path: Text,
    monkeypatch: MonkeyPatch,
    default_stack_config: Text,
    default_nlu_data: Text,
):
    monkeypatch.setattr(tempfile, "tempdir", tmp_path)

    # mock function that returns configuration
    mocked_get_configuration = Mock()
    monkeypatch.setattr(autoconfig, "get_configuration", mocked_get_configuration)

    # skip actual NLU training
    _train_nlu_with_validated_data = Mock()
    monkeypatch.setattr(
        sys.modules["rasa.train"],
        "_train_nlu_with_validated_data",
        asyncio.coroutine(_train_nlu_with_validated_data),
    )

    # do training
    train_nlu(
        default_stack_config,
        default_nlu_data,
        output="test_train_nlu_temp_files_models",
    )

    mocked_get_configuration.assert_called_once()
    _, args, _ = mocked_get_configuration.mock_calls[0]
    assert args[1] == autoconfig.TrainingType.NLU
示例#6
0
def test_model_finetuning_with_invalid_model_nlu(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_domain_path: Text,
    default_stack_config: Text,
    default_nlu_data: Text,
    model_to_fine_tune: Text,
    capsys: CaptureFixture,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    with pytest.raises(SystemExit):
        train_nlu(
            default_stack_config,
            default_nlu_data,
            domain=default_domain_path,
            output=output,
            model_to_finetune=model_to_fine_tune,
            finetuning_epoch_fraction=1,
        )

    mocked_nlu_training.assert_not_called()

    assert "No NLU model for finetuning found" in capsys.readouterr().out
示例#7
0
def test_model_finetuning_nlu_with_default_epochs(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_nlu_moodbot_path: Text,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    # Providing a new config with no epochs will mean the default amount are used
    # and then scaled by `finetuning_epoch_fraction`.
    old_config = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/config.yml")
    del old_config["pipeline"][-1][EPOCHS]
    new_config_path = tmp_path / "new_config.yml"
    rasa.shared.utils.io.write_yaml(old_config, new_config_path)

    train_nlu(
        str(new_config_path),
        "examples/moodbot/data/nlu.yml",
        output=output,
        model_to_finetune=trained_nlu_moodbot_path,
        finetuning_epoch_fraction=0.1,
    )

    mocked_nlu_training.assert_called_once()
    _, nlu_train_kwargs = mocked_nlu_training.call_args
    model_to_finetune = nlu_train_kwargs["model_to_finetune"]
    new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][
        -1]
    assert new_diet_metadata["name"] == "DIETClassifier"
    assert new_diet_metadata[EPOCHS] == DIETClassifier.defaults[EPOCHS] * 0.1
示例#8
0
def test_model_finetuning_nlu(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_nlu_moodbot_path: Text,
    use_latest_model: bool,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    mock_interpreter_create = Mock(wraps=Interpreter.create)
    monkeypatch.setattr(Interpreter, "create", mock_interpreter_create)

    mock_DIET_load = Mock(wraps=DIETClassifier.load)
    monkeypatch.setattr(DIETClassifier, "load", mock_DIET_load)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    if use_latest_model:
        trained_nlu_moodbot_path = str(Path(trained_nlu_moodbot_path).parent)

    # Typically models will be fine-tuned with a smaller number of epochs than training
    # from scratch.
    # Fine-tuning will use the number of epochs in the new config.
    old_config = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/config.yml")
    old_config["pipeline"][-1][EPOCHS] = 10
    new_config_path = tmp_path / "new_config.yml"
    rasa.shared.utils.io.write_yaml(old_config, new_config_path)

    old_nlu = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/data/nlu.yml")
    old_nlu["nlu"][-1]["examples"] = "-something else"
    new_nlu_path = tmp_path / "new_nlu.yml"
    rasa.shared.utils.io.write_yaml(old_nlu, new_nlu_path)

    train_nlu(
        str(new_config_path),
        str(new_nlu_path),
        domain="examples/moodbot/domain.yml",
        output=output,
        model_to_finetune=trained_nlu_moodbot_path,
        finetuning_epoch_fraction=0.2,
    )

    assert mock_interpreter_create.call_args[1]["should_finetune"]

    mocked_nlu_training.assert_called_once()
    _, nlu_train_kwargs = mocked_nlu_training.call_args
    model_to_finetune = nlu_train_kwargs["model_to_finetune"]
    assert isinstance(model_to_finetune, Interpreter)

    _, diet_kwargs = mock_DIET_load.call_args
    assert diet_kwargs["should_finetune"] is True

    new_diet_metadata = model_to_finetune.model_metadata.metadata["pipeline"][
        -1]
    assert new_diet_metadata["name"] == "DIETClassifier"
    assert new_diet_metadata[EPOCHS] == 2
示例#9
0
 def train(self):
     from rasa.train import train_nlu
     prefix = '/pi/ws/sagas-ai/nlu_multilang'
     train_nlu(config=f"{prefix}/config_en.yml",
               nlu_data=f"{prefix}/en/",
               output=f'{prefix}/models',
               fixed_model_name='en_current',
               persist_nlu_training_data=True)
示例#10
0
def test_warn_on_dense_features():
    msg = "Dense features are being computed but not used in the SparseNaiveBayesIntentClassifier."
    with pytest.warns(UserWarning) as record:
        train_nlu(
            nlu_data=NLU_DATA_PATH,
            config=
            "tests/configs/sparse-dense-naive-bayes-intent-classifier-config.yml",
            output="models",
        )

        assert any([str(w.message) == msg for w in record.list])
示例#11
0
def train_mod(lang):
    from rasa.train import train_nlu, train_async
    from saai.tools.corpus_procs import CorpusProcs
    if lang in ('ja', 'zh'):
        # saai.tools.corpus_procs gen_datasets cn '/pi/ws/sagas-ai/nlu_multilang/zh/'
        CorpusProcs().gen_datasets('cn' if lang == 'zh' else lang,
                                   f'{prefix}/{lang}/')
    train_nlu(config=f"{prefix}/config_{lang}.yml",
              nlu_data=f"{prefix}/{lang}/",
              output=f'{prefix}/models',
              fixed_model_name=f'{lang}_current',
              persist_nlu_training_data=True)
示例#12
0
def test_train_nlu_with_responses_no_domain_warns(tmp_path: Path):
    data_path = "data/test_nlu_no_responses/nlu_no_responses.yml"

    with pytest.warns(UserWarning) as records:
        train_nlu(
            "data/test_config/config_response_selector_minimal.yml",
            data_path,
            output=str(tmp_path / "models"),
        )

    assert any("You either need to add a response phrase or correct the intent"
               in record.message.args[0] for record in records)
示例#13
0
def test_train_nlu_temp_files(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_stack_config: Text,
    default_nlu_data: Text,
):
    (tmp_path / "training").mkdir()
    (tmp_path / "models").mkdir()

    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")

    train_nlu(default_stack_config, default_nlu_data, output=str(tmp_path / "models"))

    assert count_temp_rasa_files(tempfile.tempdir) == 0
示例#14
0
def test_train_nlu_no_nlu_file_error_message(
    capsys: CaptureFixture,
    tmp_path: Text,
    monkeypatch: MonkeyPatch,
    default_stack_config: Text,
):
    monkeypatch.setattr(tempfile, "tempdir", tmp_path)

    train_nlu(default_stack_config,
              "",
              output="test_train_nlu_temp_files_models")

    captured = capsys.readouterr()
    assert "No NLU data given" in captured.out
示例#15
0
def test_train_nlu_temp_files(
    tmp_path: Text,
    monkeypatch: MonkeyPatch,
    default_stack_config: Text,
    default_nlu_data: Text,
):
    monkeypatch.setattr(tempfile, "tempdir", tmp_path)

    train_nlu(
        default_stack_config,
        default_nlu_data,
        output="test_train_nlu_temp_files_models",
    )

    assert count_temp_rasa_files(tempfile.tempdir) == 0
示例#16
0
def train_nlu(
    args: argparse.Namespace, train_path: Optional[Text] = None
) -> Optional[Text]:
    from rasa.train import train_nlu

    output = train_path or args.out

    config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS_NLU)
    nlu_data = rasa.cli.utils.get_validated_path(
        args.nlu, "nlu", DEFAULT_DATA_PATH, none_is_valid=True
    )

    if args.domain:
        args.domain = rasa.cli.utils.get_validated_path(
            args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True
        )

    return train_nlu(
        config=config,
        nlu_data=nlu_data,
        output=output,
        train_path=train_path,
        fixed_model_name=args.fixed_model_name,
        persist_nlu_training_data=args.persist_nlu_data,
        additional_arguments=extract_nlu_additional_arguments(args),
        domain=args.domain,
    )
示例#17
0
def test_predict():
    model_path = train_nlu(
        nlu_data=NLU_DATA_PATH,
        config="tests/configs/sparse-naive-bayes-intent-classifier-config.yml",
        output="models",
    )

    interpreter = load_interpreter(model_path)

    # Get features from the pipeline and prepare data in the format sklearn
    # expects.
    training_data = load_data(NLU_DATA_PATH)
    for example in training_data.intent_examples:
        interpreter.featurize_message(example)
    model = interpreter.interpreter.pipeline[-1]
    X, y = model.prepare_data(training_data)

    # Fit the equivalent sklearn classifier.
    from sklearn.naive_bayes import BernoulliNB
    clf = BernoulliNB(alpha=0.1, binarize=0.0, fit_prior=True)
    clf.fit(X, y)

    # Check that predictions agree.
    assert (clf.predict_proba(X) == model.predict_prob(X)).all()
    assert (clf.predict(X) == model.predict(X)[0][:, 0]).all()
示例#18
0
def test_run_train_test_command_english(fp):
    mod = train_nlu(
        nlu_data="tests/data/nlu/en/nlu.md",
        config=f"tests/configs/{fp}",
        output="models",
    )
    run_nlu(model=f"models/{mod}", nlu_data="tests/data/nlu/en/nlu.md")
示例#19
0
def test_train_nlu_wrong_format_error_message(
    capsys: CaptureFixture,
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_stack_config: Text,
    incorrect_nlu_data: Text,
):
    (tmp_path / "training").mkdir()
    (tmp_path / "models").mkdir()

    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")

    train_nlu(default_stack_config, incorrect_nlu_data, output=str(tmp_path / "models"))

    captured = capsys.readouterr()
    assert "Please verify the data format" in captured.out
def test_base_predict():
    mod = train_nlu(
        nlu_data="tests/data/nlu/en/nlu.md",
        config="tests/configs/printer-config.yml",
        output="models",
    )
    clf = RasaClassifier(model_path=f"{mod}")
    preds = clf.predict(["hello world", "hello there"])
    assert len(preds) == 2
示例#21
0
def train_nlu(args: argparse.Namespace, train_path: Optional[Text] = None
              ) -> Optional["Interpreter"]:
    from rasa.train import train_nlu

    output = train_path or args.out

    config = get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH)
    nlu_data = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)

    return train_nlu(config, nlu_data, output, train_path)
示例#22
0
def test_run_train_test_command_english(fp):
    if "flashtext" in fp:
        nlu_data = "tests/data/nlu/en/nlu_w_lookups.md"
    else:
        nlu_data = "tests/data/nlu/en/nlu.md"
    mod = train_nlu(
        nlu_data=nlu_data,
        config=f"tests/configs/{fp}",
        output="models",
    )
    run_nlu(model=f"models/{mod}", nlu_data="tests/data/nlu/en/nlu.md")
示例#23
0
def train_nlu(args: argparse.Namespace,
              train_path: Optional[Text] = None) -> Optional[Text]:
    from rasa.train import train_nlu

    output = train_path or args.out

    config = args.config or DEFAULT_CONFIG_PATH
    nlu_data = get_validated_path(args.nlu,
                                  "nlu",
                                  DEFAULT_DATA_PATH,
                                  none_is_valid=True)

    return train_nlu(config, nlu_data, output, train_path)
示例#24
0
def train_nlu(args: argparse.Namespace,
              train_path: Optional[Text] = None) -> Optional[Text]:
    from rasa.train import train_nlu

    output = train_path or args.out

    config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS_NLU)
    nlu_data = get_validated_path(args.nlu,
                                  "nlu",
                                  DEFAULT_DATA_PATH,
                                  none_is_valid=True)

    return train_nlu(
        config=config,
        nlu_data=nlu_data,
        output=output,
        train_path=train_path,
        fixed_model_name=args.fixed_model_name,
    )
示例#25
0
def test_base_predict():
    mod = train_nlu(
        nlu_data="tests/data/nlu/en/nlu.md",
        config="tests/configs/printer-config.yml",
        output="models",
    )
    clf = RasaClassifier(model_path=f"{mod}")
    clf.class_names_ = [
        "greet",
        "goodbye",
        "out_of_scope",
        "bot_challenge",
        "talk_code",
    ]
    preds = clf.predict(["hello world", "hello there"])
    assert len(preds) == 2

    pred_proba = clf.predict_proba(["hello world", "hello there"])
    assert pred_proba.shape[0] == 2
示例#26
0
def train_nlu(
    args: argparse.Namespace, train_path: Optional[Text] = None
) -> Optional[Text]:
    from rasa.train import train_nlu

    output = train_path or args.out

    config = args.config or DEFAULT_CONFIG_PATH
    nlu_data = get_validated_path(
        args.nlu, "nlu", DEFAULT_DATA_PATH, none_is_valid=True
    )

    return train_nlu(
        config=config,
        nlu_data=nlu_data,
        output=output,
        train_path=train_path,
        fixed_model_name=args.fixed_model_name,
        uncompress=args.store_uncompressed,
    )
示例#27
0
文件: train.py 项目: sohailalam2/rasa
def train_nlu(args: argparse.Namespace,
              train_path: Optional[Text] = None) -> Optional[Text]:
    """Trains an NLU model.

    Args:
        args: Namespace arguments.
        train_path: Directory where models should be stored.

    Returns:
        Path to a trained model or `None` if training was not successful.
    """
    from rasa.train import train_nlu

    output = train_path or args.out

    config = _get_valid_config(args.config, CONFIG_MANDATORY_KEYS_NLU)
    nlu_data = rasa.cli.utils.get_validated_path(args.nlu,
                                                 "nlu",
                                                 DEFAULT_DATA_PATH,
                                                 none_is_valid=True)

    if args.domain:
        args.domain = rasa.cli.utils.get_validated_path(args.domain,
                                                        "domain",
                                                        DEFAULT_DOMAIN_PATH,
                                                        none_is_valid=True)

    return train_nlu(
        config=config,
        nlu_data=nlu_data,
        output=output,
        train_path=train_path,
        fixed_model_name=args.fixed_model_name,
        persist_nlu_training_data=args.persist_nlu_data,
        additional_arguments=extract_nlu_additional_arguments(args),
        domain=args.domain,
        model_to_finetune=_model_for_finetuning(args),
        finetuning_epoch_fraction=args.epoch_fraction,
    )
示例#28
0
def compare_nlu(
    configs: List[Text],
    data: TrainingData,
    exclusion_percentages: List[int],
    f_score_results: Dict[Text, Any],
    model_names: List[Text],
    output: Text,
    runs: int,
) -> List[int]:
    """
    Trains and compares multiple NLU models.
    For each run and exclusion percentage a model per config file is trained.
    Thereby, the model is trained only on the current percentage of training data.
    Afterwards, the model is tested on the complete test data of that run.
    All results are stored in the provided output directory.

    Args:
        configs: config files needed for training
        data: training data
        exclusion_percentages: percentages of training data to exclude during comparison
        f_score_results: dictionary of model name to f-score results per run
        model_names: names of the models to train
        output: the output directory
        runs: number of comparison runs

    Returns: training examples per run
    """

    training_examples_per_run = []

    for run in range(runs):

        logger.info("Beginning comparison run {}/{}".format(run + 1, runs))

        run_path = os.path.join(output, "run_{}".format(run + 1))
        create_path(run_path)

        test_path = os.path.join(run_path, TEST_DATA_FILE)
        create_path(test_path)

        train, test = data.train_test_split()
        write_to_file(test_path, test.as_markdown())

        training_examples_per_run = []

        for percentage in exclusion_percentages:
            percent_string = "{}%_exclusion".format(percentage)

            _, train = train.train_test_split(percentage / 100)
            training_examples_per_run.append(len(train.training_examples))

            model_output_path = os.path.join(run_path, percent_string)
            train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE)
            create_path(train_split_path)
            write_to_file(train_split_path, train.as_markdown())

            for nlu_config, model_name in zip(configs, model_names):
                logger.info(
                    "Evaluating configuration '{}' with {} training data.".
                    format(model_name, percent_string))

                try:
                    model_path = train_nlu(
                        nlu_config,
                        train_split_path,
                        model_output_path,
                        fixed_model_name=model_name,
                    )
                except Exception as e:
                    logger.warning(
                        "Training model '{}' failed. Error: {}".format(
                            model_name, str(e)))
                    f_score_results[model_name][run].append(0.0)
                    continue

                model_path = os.path.join(get_model(model_path), "nlu")

                report_path = os.path.join(model_output_path,
                                           "{}_report".format(model_name))
                errors_path = os.path.join(report_path, "errors.json")
                result = run_evaluation(test_path,
                                        model_path,
                                        report=report_path,
                                        errors=errors_path)

                f1 = result["intent_evaluation"]["f1_score"]
                f_score_results[model_name][run].append(f1)

    return training_examples_per_run
def test_run_train_test_command_non_english(fp, nlu):
    mod = train_nlu(nlu_data=nlu, config=f"tests/configs/{fp}", output="models")
    run_nlu(model=f"models/{mod}", nlu_data=nlu)