Пример #1
0
def train_rasa():
    print('TRAIN RASA')

    cmd = ['npx chatito --format rasa data/']
    p = call(cmd,
             shell=True,
             cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito'))

    convert_training_data(data_file=os.path.join(
        os.path.dirname(__file__),
        '../rasa/chatito/rasa_dataset_training.json'),
                          out_file=os.path.join(os.path.dirname(__file__),
                                                '../rasa/chatito/nlu.md'),
                          output_format="md",
                          language="")

    train(domain=os.path.join(os.path.dirname(__file__), '../rasa/domain.yml'),
          config=os.path.join(os.path.dirname(__file__), '../rasa/config.yml'),
          training_files=[
              os.path.join(os.path.dirname(__file__), '../rasa/data/nlu.md'),
              os.path.join(os.path.dirname(__file__),
                           '../rasa/data/stories.md'),
              os.path.join(os.path.dirname(__file__), '../rasa/chatito/nlu.md')
          ],
          output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
Пример #2
0
def test_train_temp_files(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_domain_path: Text,
    default_stories_file: Text,
    default_stack_config: Text,
    default_nlu_data: Text,
):
    (tmp_path / "training").mkdir()
    (tmp_path / "models").mkdir()

    monkeypatch.setattr(tempfile, "tempdir", tmp_path / "training")
    output = str(tmp_path / "models")

    train(
        default_domain_path,
        default_stack_config,
        [default_stories_file, default_nlu_data],
        output=output,
        force_training=True,
    )

    assert count_temp_rasa_files(tempfile.tempdir) == 0

    # After training the model, try to do it again. This shouldn't try to train
    # a new model because nothing has been changed. It also shouldn't create
    # any temp files.
    train(
        default_domain_path,
        default_stack_config,
        [default_stories_file, default_nlu_data],
        output=output,
    )

    assert count_temp_rasa_files(tempfile.tempdir) == 0
Пример #3
0
def test_train_temp_files(
    move_tempdir,
    default_domain_path,
    default_stories_file,
    default_stack_config,
    default_nlu_data,
):
    train(
        default_domain_path,
        default_stack_config,
        [default_stories_file, default_nlu_data],
        force_training=True,
    )

    assert len(os.listdir(TEST_TEMP)) == 0

    # After training the model, try to do it again. This shouldn't try to train
    # a new model because nothing has been changed. It also shouldn't create
    # any temp files.
    train(
        default_domain_path,
        default_stack_config,
        [default_stories_file, default_nlu_data],
    )

    assert len(os.listdir(TEST_TEMP)) == 0
Пример #4
0
def test_trained_interpreter_passed_to_core_training(
    monkeypatch: MonkeyPatch, tmp_path: Path, unpacked_trained_moodbot_path: Text
):
    # Skip actual NLU training and return trained interpreter path from fixture
    _train_nlu_with_validated_data = Mock(return_value=unpacked_trained_moodbot_path)

    # Patching is bit more complicated as we have a module `train` and function
    # with the same name 😬
    monkeypatch.setattr(
        sys.modules["rasa.train"],
        "_train_nlu_with_validated_data",
        asyncio.coroutine(_train_nlu_with_validated_data),
    )

    # Mock the actual Core training
    _train_core = Mock()
    monkeypatch.setattr(rasa.core, "train", asyncio.coroutine(_train_core))

    train(
        DEFAULT_DOMAIN_PATH_WITH_SLOTS,
        DEFAULT_CONFIG_PATH,
        [DEFAULT_STORIES_FILE, DEFAULT_NLU_DATA],
        str(tmp_path),
    )

    _train_core.assert_called_once()
    _, _, kwargs = _train_core.mock_calls[0]
    assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
Пример #5
0
def test_interpreter_of_old_model_passed_to_core_training(
    monkeypatch: MonkeyPatch, tmp_path: Path, trained_moodbot_path: Text
):
    # NLU isn't retrained
    monkeypatch.setattr(
        rasa.model.FingerprintComparisonResult,
        rasa.model.FingerprintComparisonResult.should_retrain_nlu.__name__,
        lambda _: False,
    )

    # An old model with an interpreter exists
    monkeypatch.setattr(
        rasa.model, rasa.model.get_latest_model.__name__, lambda _: trained_moodbot_path
    )

    # Mock the actual Core training
    _train_core = Mock()
    monkeypatch.setattr(rasa.core, "train", asyncio.coroutine(_train_core))

    train(
        DEFAULT_DOMAIN_PATH_WITH_SLOTS,
        DEFAULT_CONFIG_PATH,
        [DEFAULT_STORIES_FILE, DEFAULT_NLU_DATA],
        str(tmp_path),
    )

    _train_core.assert_called_once()
    _, _, kwargs = _train_core.mock_calls[0]
    assert isinstance(kwargs["interpreter"], RasaNLUInterpreter)
Пример #6
0
    def test_e2e_gives_experimental_warning(
        self,
        monkeypatch: MonkeyPatch,
        trained_e2e_model: Text,
        default_domain_path: Text,
        default_stack_config: Text,
        default_e2e_stories_file: Text,
        default_nlu_data: Text,
        caplog: LogCaptureFixture,
    ):
        mock_nlu_training(monkeypatch)
        mock_core_training(monkeypatch)

        with caplog.at_level(logging.WARNING):
            train(
                default_domain_path,
                default_stack_config,
                [default_e2e_stories_file, default_nlu_data],
                output=new_model_path_in_same_dir(trained_e2e_model),
            )

        assert any([
            "The end-to-end training is currently experimental"
            in record.message for record in caplog.records
        ])
Пример #7
0
def test_model_finetuning_new_domain_label_stops_all_training(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    trained_moodbot_path: Text,
):
    mocked_core_training = AsyncMock()
    mocked_nlu_training = AsyncMock()
    monkeypatch.setattr(rasa.core, rasa.core.train.__name__,
                        mocked_core_training)
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    old_domain = rasa.shared.utils.io.read_yaml_file(
        "examples/moodbot/domain.yml")
    old_domain["intents"].append("a_new_one")
    new_domain_path = tmp_path / "new_domain.yml"
    rasa.shared.utils.io.write_yaml(old_domain, new_domain_path)

    with pytest.raises(SystemExit):
        train(
            domain=str(new_domain_path),
            config="examples/moodbot/config.yml",
            training_files=[
                "examples/moodbot/data/stories.yml",
                "examples/moodbot/data/nlu.yml",
            ],
            output=output,
            model_to_finetune=trained_moodbot_path,
        )

    mocked_core_training.assert_not_called()
    mocked_nlu_training.assert_not_called()
Пример #8
0
def test_model_finetuning_with_invalid_model(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_domain_path: Text,
    default_stories_file: Text,
    default_stack_config: Text,
    default_nlu_data: Text,
    model_to_fine_tune: Text,
    capsys: CaptureFixture,
):
    mocked_nlu_training = AsyncMock(return_value="")
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    mocked_core_training = AsyncMock()
    monkeypatch.setattr(rasa.core, rasa.core.train.__name__, mocked_core_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    with pytest.raises(SystemExit):
        train(
            default_domain_path,
            default_stack_config,
            [default_stories_file, default_nlu_data],
            output=output,
            force_training=True,
            model_to_finetune=model_to_fine_tune,
            finetuning_epoch_fraction=1,
        )

    mocked_core_training.assert_not_called()
    mocked_nlu_training.assert_not_called()
    output = capsys.readouterr().out
    assert "No NLU model for finetuning found" in output
Пример #9
0
def train_rasa():
    """ generate and/or train the RASA model """
    print('TRAIN RASA')

    if ARGS.generate:
        cmd = ['npx chatito --format rasa data/']
        call(cmd,
             shell=True,
             cwd=os.path.join(os.path.dirname(__file__), '../rasa/chatito'))
        print('CONVERT TO RASA MD')
        convert_training_data(data_file=os.path.join(
            os.path.dirname(__file__),
            '../rasa/chatito/rasa_dataset_training.json'),
                              out_file=os.path.join(os.path.dirname(__file__),
                                                    '../rasa/chatito/nlu.md'),
                              output_format="md",
                              language="")
        print('DONE CONVERT TO RASA MD')

    if ARGS.train:
        train(domain=os.path.join(os.path.dirname(__file__),
                                  '../rasa/domain.yml'),
              config=os.path.join(os.path.dirname(__file__),
                                  '../rasa/config.yml'),
              training_files=[
                  os.path.join(os.path.dirname(__file__),
                               '../rasa/data/nlu.md'),
                  os.path.join(os.path.dirname(__file__),
                               '../rasa/data/stories.md'),
                  os.path.join(os.path.dirname(__file__),
                               '../rasa/chatito/nlu.md')
              ],
              output=os.path.join(os.path.dirname(__file__), '../rasa/models'))
def train_nlu_core_model() -> None:
    train(
        domain=DEFAULT_DOMAIN_PATH,
        config=DEFAULT_CONFIG_PATH,
        training_files=DEFAULT_DATA_PATH,
        fixed_model_name="restaurant_rasa_model",
        force_training=False,
    )
Пример #11
0
    def test_retrains_only_core_if_new_e2e_example_seen_before(
        self,
        monkeypatch: MonkeyPatch,
        trained_e2e_model: Text,
        default_domain_path: Text,
        default_stack_config: Text,
        default_e2e_stories_file: Text,
        default_nlu_data: Text,
        tmp_path: Path,
    ):
        stories_yaml = rasa.shared.utils.io.read_yaml_file(
            default_e2e_stories_file)
        stories_yaml["stories"][1]["steps"].append({"user": "******"})

        new_stories_file = new_stories_file = tmp_path / "new_stories.yml"
        rasa.shared.utils.io.write_yaml(stories_yaml, new_stories_file)

        mocked_nlu_training = mock_nlu_training(monkeypatch)
        mocked_core_training = mock_core_training(monkeypatch)

        new_model_path = train(
            default_domain_path,
            default_stack_config,
            [new_stories_file, default_nlu_data],
            output=new_model_path_in_same_dir(trained_e2e_model),
        ).model
        os.remove(new_model_path)

        mocked_core_training.assert_called_once()
        mocked_nlu_training.assert_not_called()
Пример #12
0
def train_rasa(pipeline_name):
    if pipeline_name == config_file:
        pipeline_name = config_file
    else:
        pipeline_file = './pipelines/' + pipeline_name
    with CodeTimer() as timer:
        train(
            domain=domain_file,
            config=pipeline_file,   # one of the decided pipelines
            #config=config_file,    # standard config.yml file
            training_files=nlu_data,
            #output=output_path,
            force_training=True,
            fixed_model_name=pipeline_name)
    time = str(timer.took * 0.001)
    return time
Пример #13
0
    def test_new_nlu_data_does_not_retrain_core_if_there_are_no_e2e_stories(
        self,
        monkeypatch: MonkeyPatch,
        trained_simple_rasa_model: Text,
        default_domain_path: Text,
        default_stack_config: Text,
        simple_stories_file: Text,
        default_nlu_data: Text,
        tmp_path: Path,
    ):
        nlu_yaml = rasa.shared.utils.io.read_yaml_file(default_nlu_data)
        nlu_yaml["nlu"][0]["examples"] += "- surprise!\n"

        new_nlu_file = tmp_path / "new_nlu.yml"
        rasa.shared.utils.io.write_yaml(nlu_yaml, new_nlu_file)

        mocked_nlu_training = mock_nlu_training(monkeypatch)
        mocked_core_training = mock_core_training(monkeypatch)

        new_model_path = train(
            default_domain_path,
            default_stack_config,
            [simple_stories_file, new_nlu_file],
            output=new_model_path_in_same_dir(trained_simple_rasa_model),
        ).model
        os.remove(new_model_path)

        mocked_core_training.assert_not_called()
        mocked_nlu_training.assert_called_once()
Пример #14
0
def train_model_for_bot(bot: str):
    """
    loads bot data from mongo into individual files for training

    :param bot: bot id
    :return: model path

    """
    processor = MongoProcessor()
    nlu = processor.load_nlu(bot)
    if not nlu.training_examples:
        raise AppException("Training data does not exists!")
    domain = processor.load_domain(bot)
    stories = processor.load_stories(bot)
    config = processor.load_config(bot)
    rules = processor.get_rules_for_training(bot)

    directory = Utility.write_training_data(nlu, domain, config, stories,
                                            rules)

    output = os.path.join(DEFAULT_MODELS_PATH, bot)
    model = train(
        domain=os.path.join(directory, DEFAULT_DOMAIN_PATH),
        config=os.path.join(directory, DEFAULT_CONFIG_PATH),
        training_files=os.path.join(directory, DEFAULT_DATA_PATH),
        output=output,
    )
    Utility.delete_directory(directory)
    del processor
    del nlu
    del domain
    del stories
    del config
    return model
Пример #15
0
def train_model_for_bot(bot: str):
    """ Trains the rasa model, using the data that is loaded onto
            Mongo, through the bot files """
    processor = MongoProcessor()
    nlu = processor.load_nlu(bot)
    if not nlu.training_examples:
        raise AppException("Training data does not exists!")
    domain = processor.load_domain(bot)
    stories = processor.load_stories(bot)
    config = processor.load_config(bot)

    directory = Utility.save_files(
                nlu.nlu_as_markdown().encode(),
                domain.as_yaml().encode(),
                stories.as_story_string().encode(),
                yaml.dump(config).encode(),
            )

    output = os.path.join(DEFAULT_MODELS_PATH, bot)
    model = train(domain=os.path.join(directory,DEFAULT_DOMAIN_PATH),
                  config=os.path.join(directory,DEFAULT_CONFIG_PATH),
                  training_files=os.path.join(directory,DEFAULT_DATA_PATH),
                  output=output)
    Utility.delete_directory(directory)
    return model
Пример #16
0
    def test_nlu_and_core_trained_if_no_nlu_data_but_e2e_stories(
        self,
        monkeypatch: MonkeyPatch,
        default_domain_path: Text,
        default_stack_config: Text,
        default_e2e_stories_file: Text,
        tmp_path: Path,
    ):
        mocked_nlu_training = mock_nlu_training(monkeypatch)
        mocked_core_training = mock_core_training(monkeypatch)

        output = self.make_tmp_model_dir(tmp_path)
        train(
            default_domain_path,
            default_stack_config,
            [default_e2e_stories_file],
            output=output,
        )

        mocked_core_training.assert_called_once()
        mocked_nlu_training.assert_called_once()
Пример #17
0
    def test_models_not_retrained_if_no_new_data(
        self,
        monkeypatch: MonkeyPatch,
        trained_e2e_model: Text,
        default_domain_path: Text,
        default_stack_config: Text,
        default_e2e_stories_file: Text,
        default_nlu_data: Text,
    ):
        mocked_nlu_training = mock_nlu_training(monkeypatch)
        mocked_core_training = mock_core_training(monkeypatch)

        train(
            default_domain_path,
            default_stack_config,
            [default_e2e_stories_file, default_nlu_data],
            output=new_model_path_in_same_dir(trained_e2e_model),
        )

        mocked_core_training.assert_not_called()
        mocked_nlu_training.assert_not_called()
Пример #18
0
def test_model_finetuning(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    default_domain_path: Text,
    default_stories_file: Text,
    default_stack_config: Text,
    default_nlu_data: Text,
    trained_rasa_model: Text,
    use_latest_model: bool,
):
    mocked_nlu_training = Mock(wraps=rasa.nlu.train)
    monkeypatch.setattr(rasa.nlu, rasa.nlu.train.__name__, mocked_nlu_training)

    mocked_core_training = Mock(wraps=rasa.core.train)
    monkeypatch.setattr(rasa.core, rasa.core.train.__name__,
                        mocked_core_training)

    (tmp_path / "models").mkdir()
    output = str(tmp_path / "models")

    if use_latest_model:
        trained_rasa_model = str(Path(trained_rasa_model).parent)

    train(
        default_domain_path,
        default_stack_config,
        [default_stories_file, default_nlu_data],
        output=output,
        force_training=True,
        model_to_finetune=trained_rasa_model,
        finetuning_epoch_fraction=0.1,
    )

    mocked_core_training.assert_called_once()
    _, kwargs = mocked_core_training.call_args
    assert isinstance(kwargs["model_to_finetune"], Agent)

    mocked_nlu_training.assert_called_once()
    _, kwargs = mocked_nlu_training.call_args
    assert isinstance(kwargs["model_to_finetune"], Interpreter)
Пример #19
0
def train_model_for_bot(bot: str):
    """
    loads bot data from mongo into individual files for training

    :param bot: bot id
    :return: model path

    """
    processor = MongoProcessor()
    nlu = processor.load_nlu(bot)
    if not nlu.training_examples:
        raise AppException("Training data does not exists!")
    domain = processor.load_domain(bot)
    stories = processor.load_stories(bot)
    config = processor.load_config(bot)

    directory = Utility.save_files(
        nlu.nlu_as_markdown().encode(),
        domain.as_yaml().encode(),
        stories.as_story_string().encode(),
        yaml.dump(config).encode(),
    )

    output = os.path.join(DEFAULT_MODELS_PATH, bot)
    model = train(
        domain=os.path.join(directory, DEFAULT_DOMAIN_PATH),
        config=os.path.join(directory, DEFAULT_CONFIG_PATH),
        training_files=os.path.join(directory, DEFAULT_DATA_PATH),
        output=output,
    )
    Utility.delete_directory(directory)
    del processor
    del nlu
    del domain
    del stories
    del config
    return model