示例#1
0
def _train_nlu_with_validated_data(
    config: Text,
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    _train_path = train_path or tempfile.mkdtemp()

    print_color("Training NLU model...", color=bcolors.OKBLUE)
    _, nlu_model, _ = rasa.nlu.train(
        config, nlu_data_directory, _train_path, fixed_model_name="nlu"
    )
    print_color("NLU model training completed.", color=bcolors.OKBLUE)

    if train_path is None:
        # Only NLU was trained
        new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data_directory)

        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output,
            train_path=_train_path,
            fixed_model_name=fixed_model_name,
            model_prefix="nlu-",
        )

    return _train_path
示例#2
0
def trained_nlu_model(request):
    cfg = RasaNLUModelConfig({"pipeline": "keyword"})
    trainer = Trainer(cfg)
    td = training_data.load_data(DEFAULT_DATA_PATH)

    trainer.train(td)

    model_path = trainer.persist(NLU_MODEL_PATH)

    nlu_data = data.get_nlu_directory(DEFAULT_DATA_PATH)
    output_path = os.path.join(NLU_MODEL_PATH, NLU_MODEL_NAME)
    new_fingerprint = model.model_fingerprint(NLU_DEFAULT_CONFIG_PATH,
                                              nlu_data=nlu_data)
    model.create_package_rasa(model_path, output_path, new_fingerprint)

    def fin():
        if os.path.exists(NLU_MODEL_PATH):
            shutil.rmtree(NLU_MODEL_PATH)

        if os.path.exists(output_path):
            shutil.rmtree(output_path)

    request.addfinalizer(fin)

    return output_path
示例#3
0
def _core_model_for_finetuning(
    model_to_finetune: Text,
    file_importer: TrainingDataImporter,
    finetuning_epoch_fraction: float = 1.0,
) -> Optional[Agent]:
    path_to_archive = model.get_model_for_finetuning(model_to_finetune)
    if not path_to_archive:
        return None

    rasa.shared.utils.cli.print_info(
        f"Loading Core model from {path_to_archive} for finetuning...", )

    with model.unpack_model(path_to_archive) as unpacked:
        new_fingerprint = model.model_fingerprint(file_importer)
        old_fingerprint = model.fingerprint_from_path(unpacked)
        if not model.can_finetune(old_fingerprint, new_fingerprint, core=True):
            rasa.shared.utils.cli.print_error_and_exit(
                "Core model can not be finetuned.")

        config = file_importer.get_config()
        agent = Agent.load(
            unpacked,
            new_config=config,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )
        # Agent might be empty if no underlying Core model was found.
        if agent.domain is not None and agent.policy_ensemble is not None:
            return agent

        return None
示例#4
0
def _nlu_model_for_finetuning(
    model_to_finetune: Text,
    file_importer: TrainingDataImporter,
    finetuning_epoch_fraction: float = 1.0,
    called_from_combined_training: bool = False,
) -> Optional[Interpreter]:

    path_to_archive = model.get_model_for_finetuning(model_to_finetune)
    if not path_to_archive:
        return None

    rasa.shared.utils.cli.print_info(
        f"Loading NLU model from {path_to_archive} for finetuning...", )
    with model.unpack_model(path_to_archive) as unpacked:
        _, old_nlu = model.get_model_subdirectories(unpacked)
        new_fingerprint = model.model_fingerprint(file_importer)
        old_fingerprint = model.fingerprint_from_path(unpacked)
        if not model.can_finetune(
                old_fingerprint,
                new_fingerprint,
                nlu=True,
                core=called_from_combined_training,
        ):
            rasa.shared.utils.cli.print_error_and_exit(
                "NLU model can not be finetuned.")

        config = file_importer.get_config()
        loaded_model_to_finetune = Interpreter.load(
            old_nlu,
            new_config=config,
            finetuning_epoch_fraction=finetuning_epoch_fraction,
        )
        if not loaded_model_to_finetune:
            return None
    return loaded_model_to_finetune
示例#5
0
文件: train.py 项目: zhfneu/rasa_core
def train_nlu(config: Text, nlu_data: Text, output: Text,
              train_path: Optional[Text]) -> Optional["Interpreter"]:
    """Trains a NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa_nlu

    _train_path = train_path or tempfile.mkdtemp()
    _, nlu_model, _ = rasa_nlu.train(config, nlu_data, _train_path,
                                     project="",
                                     fixed_model_name="nlu")

    if not train_path:
        nlu_data = data.get_nlu_directory(nlu_data)
        output_path = create_output_path(output, prefix="nlu-")
        new_fingerprint = model.model_fingerprint(config, nlu_data=nlu_data)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success("Your Rasa NLU model is trained and saved at '{}'."
                      "".format(output_path))

    return nlu_model
示例#6
0
    async def train(request):
        # if set will not generate a model name but use the passed one
        model_name = request.args.get("model", None)

        try:
            model_config, data_dict = extract_data_and_config(request)
        except Exception as e:
            logger.debug(traceback.format_exc())
            raise ErrorResponse(
                500,
                "ServerError",
                "An unexpected error occurred.",
                details={"error": str(e)},
            )

        data_file = dump_to_data_file(data_dict)
        config_file = dump_to_data_file(model_config, "_config")

        try:
            path_to_model = await data_router.start_train_process(
                data_file, RasaNLUModelConfig(model_config), model_name)

            # store trained model as tar.gz file
            output_path = create_model_path(model_name, path_to_model)

            nlu_data = data.get_nlu_directory(data_file)
            new_fingerprint = model.model_fingerprint(config_file,
                                                      nlu_data=nlu_data)
            model.create_package_rasa(path_to_model, output_path,
                                      new_fingerprint)
            logger.info("Rasa NLU model trained and persisted to '{}'.".format(
                output_path))

            await data_router.load_model(output_path)

            return await response.file(output_path)
        except MaxWorkerProcessError as e:
            raise ErrorResponse(
                403,
                "NoFreeProcess",
                "No process available for training.",
                details={"error": str(e)},
            )
        except InvalidModelError as e:
            raise ErrorResponse(
                404,
                "ModelNotFound",
                "Model '{}' not found.".format(model_name),
                details={"error": str(e)},
            )
        except TrainingException as e:
            logger.debug(traceback.format_exc())
            raise ErrorResponse(
                500,
                "ServerError",
                "An unexpected error occurred.",
                details={"error": str(e)},
            )
示例#7
0
def test_can_finetune_min_version(
    project: Text,
    monkeypatch: MonkeyPatch,
    old_model_version: Text,
    min_compatible_version: Text,
    can_tune: bool,
):
    importer = _project_files(project)

    monkeypatch.setattr(rasa.constants, "MINIMUM_COMPATIBLE_VERSION",
                        min_compatible_version)
    monkeypatch.setattr(rasa, "__version__", old_model_version)
    old_fingerprint = model_fingerprint(importer)
    new_fingerprint = model_fingerprint(importer)

    with mock.patch("rasa.model.MINIMUM_COMPATIBLE_VERSION",
                    min_compatible_version):
        assert can_finetune(old_fingerprint, new_fingerprint) == can_tune
示例#8
0
def test_fingerprinting_additional_action(project: Text):
    importer = _project_files(project)

    old_fingerprint = model_fingerprint(importer)
    old_domain = importer.get_domain()

    domain_with_new_action = old_domain.as_dict()
    domain_with_new_action[KEY_RESPONSES]["utter_new"] = [{"text": "hi"}]
    domain_with_new_action = Domain.from_dict(domain_with_new_action)

    importer.get_domain = lambda: domain_with_new_action

    new_fingerprint = model_fingerprint(importer)

    assert (old_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY] !=
            new_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY])
    assert old_fingerprint[FINGERPRINT_NLG_KEY] != new_fingerprint[
        FINGERPRINT_NLG_KEY]
示例#9
0
def _train_nlu_with_validated_data(
    config: Dict[Text, Text],
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    retrain_nlu: Union[bool, List[Text]] = True
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train
    import re

    with ExitStack() as stack:
        models = {}
        from rasa.nlu import config as cfg_loader

        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(TempDirectoryPath(tempfile.mkdtemp()))

        pattern = r'(\w\w)*(?=\.)'
        for file in os.listdir(nlu_data_directory):
            lang = re.search(pattern, file).groups()[0]
            if isinstance(retrain_nlu, bool) and retrain_nlu or lang in retrain_nlu:
                nlu_file_path = os.path.join(nlu_data_directory, file)
                print_color("Start training {} NLU model ...".format(lang), color=bcolors.OKBLUE)
                nlu_config = cfg_loader.load(config[lang])
                nlu_config.language = lang
                _, models[lang], _ = rasa.nlu.train(
                    nlu_config, nlu_file_path, _train_path, fixed_model_name="nlu-{}".format(lang)
                )
            else:
                print_color("{} NLU data didn't change, skipping training...".format(lang), color=bcolors.OKBLUE)

        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = model.model_fingerprint(
                config, nlu_data=nlu_data_directory
            )

            return _package_model(
                new_fingerprint=new_fingerprint,
                output_path=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
示例#10
0
async def train_comparison_models(
    story_file: Text,
    domain: Text,
    output_path: Text = "",
    exclusion_percentages: Optional[List] = None,
    policy_configs: Optional[List] = None,
    runs: int = 1,
    dump_stories: bool = False,
    additional_arguments: Optional[Dict] = None,
):
    """Train multiple models for comparison of policies"""
    from rasa import model
    from rasa.importers.importer import TrainingDataImporter

    exclusion_percentages = exclusion_percentages or []
    policy_configs = policy_configs or []

    for r in range(runs):
        logging.info("Starting run {}/{}".format(r + 1, runs))

        for current_run, percentage in enumerate(exclusion_percentages, 1):
            for policy_config in policy_configs:

                file_importer = TrainingDataImporter.load_core_importer_from_config(
                    policy_config, domain, [story_file])

                config_name = os.path.splitext(
                    os.path.basename(policy_config))[0]
                logging.info("Starting to train {} round {}/{}"
                             " with {}% exclusion"
                             "".format(config_name, current_run,
                                       len(exclusion_percentages), percentage))

                with TempDirectoryPath(tempfile.mkdtemp()) as train_path:
                    _, new_fingerprint = await asyncio.gather(
                        train(
                            domain,
                            file_importer,
                            train_path,
                            policy_config=policy_config,
                            exclusion_percentage=percentage,
                            additional_arguments=additional_arguments,
                            dump_stories=dump_stories,
                        ),
                        model.model_fingerprint(file_importer),
                    )

                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
                    model_name = config_name + PERCENTAGE_KEY + str(percentage)
                    model.package_model(
                        fingerprint=new_fingerprint,
                        output_directory=output_dir,
                        train_path=train_path,
                        fixed_model_name=model_name,
                    )
示例#11
0
文件: train.py 项目: anusalva-md/rasa
def train_nlu(config: Text, nlu_data: Text, output: Text,
              train_path: Optional[Text]) -> Optional[Text]:
    """Trains a NLU model.

    Args:
        config: Path to the config file for NLU.
        nlu_data: Path to the NLU training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa.nlu.train

    config = get_valid_config(config, CONFIG_MANDATORY_KEYS_NLU)

    if not train_path:
        # training NLU only hence the training files still have to be selected
        skill_imports = SkillSelector.load(config)
        nlu_data_directory = data.get_nlu_directory(nlu_data, skill_imports)
    else:
        nlu_data_directory = nlu_data

    if not os.listdir(nlu_data_directory):
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model.")
        return

    _train_path = train_path or tempfile.mkdtemp()

    print_color("Start training NLU model ...", color=bcolors.OKBLUE)
    _, nlu_model, _ = rasa.nlu.train(config,
                                     nlu_data_directory,
                                     _train_path,
                                     fixed_model_name="nlu")
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        output_path = create_output_path(output, prefix="nlu-")
        new_fingerprint = model.model_fingerprint(config,
                                                  nlu_data=nlu_data_directory)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success(
            "Your Rasa NLU model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path
示例#12
0
def test_fingerprinting_changed_response_text(project: Text):
    importer = _project_files(project)

    old_fingerprint = model_fingerprint(importer)
    old_domain = importer.get_domain()

    # Change NLG content but keep actions the same
    domain_with_changed_nlg = old_domain.as_dict()
    domain_with_changed_nlg[KEY_RESPONSES]["utter_greet"].append(
        {"text": "hi"})
    domain_with_changed_nlg = Domain.from_dict(domain_with_changed_nlg)

    importer.get_domain = lambda: domain_with_changed_nlg

    new_fingerprint = model_fingerprint(importer)

    assert (old_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY] ==
            new_fingerprint[FINGERPRINT_DOMAIN_WITHOUT_NLG_KEY])
    assert old_fingerprint[FINGERPRINT_NLG_KEY] != new_fingerprint[
        FINGERPRINT_NLG_KEY]
示例#13
0
文件: train.py 项目: quickyue/rasa
async def _train_core_with_validated_data(
    domain: Domain,
    config: Text,
    story_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    if not os.listdir(story_directory):
        print_error(
            "No dialogue data given. Please provide dialogue data in order to "
            "train a Rasa Core model.")
        return

    _train_path = train_path or tempfile.mkdtemp()

    # normal (not compare) training
    print_color("Start training dialogue model ...", color=bcolors.OKBLUE)
    await rasa.core.train(
        domain_file=domain,
        stories_file=story_directory,
        output_path=os.path.join(_train_path, "core"),
        policy_config=config,
        kwargs=kwargs,
    )
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        # Only Core was trained.
        output_path = create_output_path(output,
                                         prefix="core-",
                                         fixed_name=fixed_model_name)
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=story_directory)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)

        if uncompress:
            output_path = decompress(output_path)

        print_success(
            "Your Rasa Core model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path
示例#14
0
def test_create_fingerprint_from_invalid_paths(project, project_files):
    project_files = _project_files(project, *project_files)

    expected = _fingerprint([], [], rasa_version=rasa.__version__, stories=[], nlu=[])

    actual = model_fingerprint(**project_files)
    assert actual[FINGERPRINT_TRAINED_AT_KEY] is not None

    del actual[FINGERPRINT_TRAINED_AT_KEY]
    del expected[FINGERPRINT_TRAINED_AT_KEY]

    assert actual == expected
示例#15
0
async def train_core_async(
    domain: Text,
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        kwargs: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa.core.train

    config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE)

    _train_path = train_path or tempfile.mkdtemp()

    # normal (not compare) training
    core_model = await rasa.core.train(
        domain_file=domain,
        stories_file=data.get_core_directory(stories),
        output_path=os.path.join(_train_path, "core"),
        policy_config=config,
        kwargs=kwargs,
    )

    if not train_path:
        # Only Core was trained.
        stories = data.get_core_directory(stories)
        output_path = create_output_path(output, prefix="core-")
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=stories)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success(
            "Your Rasa Core model is trained and saved at '{}'.".format(
                output_path))

    return core_model
示例#16
0
async def _train_core_with_validated_data(
    domain: Domain,
    config: Text,
    story_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        # normal (not compare) training
        print_color("Training Core model...", color=bcolors.OKBLUE)
        await rasa.core.train(
            domain_file=domain,
            stories_file=story_directory,
            output_path=os.path.join(_train_path, "core"),
            policy_config=config,
            kwargs=kwargs,
        )
        print_color("Core model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only Core was trained.
            new_fingerprint = model.model_fingerprint(config,
                                                      domain,
                                                      stories=story_directory)
            return _package_model(
                new_fingerprint=new_fingerprint,
                output_path=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="core-",
            )

        return _train_path
示例#17
0
文件: train.py 项目: quickyue/rasa
def _train_nlu_with_validated_data(
    config: Text,
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    if not os.listdir(nlu_data_directory):
        print_error(
            "No NLU data given. Please provide NLU data in order to train "
            "a Rasa NLU model.")
        return

    _train_path = train_path or tempfile.mkdtemp()

    print_color("Start training NLU model ...", color=bcolors.OKBLUE)
    _, nlu_model, _ = rasa.nlu.train(config,
                                     nlu_data_directory,
                                     _train_path,
                                     fixed_model_name="nlu")
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        # Only NLU was trained
        output_path = create_output_path(output,
                                         prefix="nlu-",
                                         fixed_name=fixed_model_name)
        new_fingerprint = model.model_fingerprint(config,
                                                  nlu_data=nlu_data_directory)

        model.create_package_rasa(_train_path, output_path, new_fingerprint)

        if uncompress:
            output_path = decompress(output_path)

        print_success(
            "Your Rasa NLU model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path
示例#18
0
async def _train_core_with_validated_data(
    domain: Domain,
    config: Text,
    story_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Train Core with validated training and config data."""

    import rasa.core.train

    _train_path = train_path or tempfile.mkdtemp()

    # normal (not compare) training
    print_color("Start training dialogue model ...", color=bcolors.OKBLUE)
    await rasa.core.train(
        domain_file=domain,
        stories_file=story_directory,
        output_path=os.path.join(_train_path, "core"),
        policy_config=config,
        kwargs=kwargs,
    )
    print_color("Done.", color=bcolors.OKBLUE)

    if train_path is None:
        # Only Core was trained.
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=story_directory)
        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output,
            train_path=_train_path,
            fixed_model_name=fixed_model_name,
            model_prefix="core-",
            uncompress=uncompress,
        )

    return _train_path
示例#19
0
def _train_nlu_with_validated_data(
    config: Text,
    nlu_data_directory: Text,
    output: Text,
    train_path: Optional[Text] = None,
    fixed_model_name: Optional[Text] = None,
) -> Optional[Text]:
    """Train NLU with validated training and config data."""

    import rasa.nlu.train

    with ExitStack() as stack:
        if train_path:
            # If the train path was provided, do nothing on exit.
            _train_path = train_path
        else:
            # Otherwise, create a temp train path and clean it up on exit.
            _train_path = stack.enter_context(
                TempDirectoryPath(tempfile.mkdtemp()))

        print_color("Training NLU model...", color=bcolors.OKBLUE)
        _, nlu_model, _ = rasa.nlu.train(config,
                                         nlu_data_directory,
                                         _train_path,
                                         fixed_model_name="nlu")
        print_color("NLU model training completed.", color=bcolors.OKBLUE)

        if train_path is None:
            # Only NLU was trained
            new_fingerprint = model.model_fingerprint(
                config, nlu_data=nlu_data_directory)

            return _package_model(
                new_fingerprint=new_fingerprint,
                output_path=output,
                train_path=_train_path,
                fixed_model_name=fixed_model_name,
                model_prefix="nlu-",
            )

        return _train_path
示例#20
0
def test_should_not_retrain_core(domain_path: Text, tmp_path: Path,
                                 stack_config_path: Text):
    # Don't use `stories_path` as checkpoints currently break fingerprinting
    story_file = tmp_path / "simple_story.yml"
    story_file.write_text("""
stories:
- story: test_story
  steps:
  - intent: greet
  - action: utter_greet
    """)
    trained_model = train_core(domain_path, stack_config_path, str(story_file),
                               str(tmp_path))

    importer = TrainingDataImporter.load_from_config(
        stack_config_path, domain_path, training_data_paths=[str(story_file)])

    new_fingerprint = model.model_fingerprint(importer)

    result = model.should_retrain(new_fingerprint, trained_model, tmp_path)

    assert not result.should_retrain_core()
示例#21
0
def test_rasa_packaging(trained_model, project, use_fingerprint):
    unpacked_model_path = get_model(trained_model)

    os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH))
    if use_fingerprint:
        fingerprint = model_fingerprint(**_project_files(project))
    else:
        fingerprint = None

    tempdir = tempfile.mkdtemp()
    output_path = os.path.join(tempdir, "test.tar.gz")

    create_package_rasa(unpacked_model_path, output_path, fingerprint)

    unpacked = get_model(output_path)

    assert (os.path.exists(os.path.join(
        unpacked, FINGERPRINT_FILE_PATH)) == use_fingerprint)
    assert os.path.exists(os.path.join(unpacked, "core"))
    assert os.path.exists(os.path.join(unpacked, "nlu"))

    assert not os.path.exists(unpacked_model_path)
示例#22
0
async def train_core_async(domain: Text, config: Text, stories: Text,
                           output: Text,
                           train_path: Optional[Text]) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa_core.train

    # normal (not compare) training
    core_model = await rasa_core.train(domain_file=domain,
                                       stories_file=stories,
                                       output_path=os.path.join(
                                           train_path, "core"),
                                       policy_config=config)

    if not train_path:
        # Only Core was trained.
        stories = data.get_core_directory(stories)
        output_path = create_output_path(output, prefix="core-")
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=stories)
        model.create_package_rasa(train_path, output_path, new_fingerprint)
        print_success("Your Rasa Core model is trained and saved at '{}'."
                      "".format(output_path))

    return core_model
示例#23
0
def test_rasa_packaging(trained_rasa_model: Text, project: Text,
                        use_fingerprint: bool, tmp_path: Path):
    unpacked_model_path = get_model(trained_rasa_model)

    os.remove(os.path.join(unpacked_model_path, FINGERPRINT_FILE_PATH))
    if use_fingerprint:
        fingerprint = model_fingerprint(_project_files(project))
    else:
        fingerprint = None

    output_path = str(tmp_path / "test.tar.gz")

    create_package_rasa(unpacked_model_path, output_path, fingerprint)

    unpacked = get_model(output_path)

    assert (os.path.exists(os.path.join(
        unpacked, FINGERPRINT_FILE_PATH)) == use_fingerprint)
    assert os.path.exists(
        os.path.join(unpacked, DEFAULT_CORE_SUBDIRECTORY_NAME))
    assert os.path.exists(os.path.join(unpacked, "nlu"))

    assert not os.path.exists(unpacked_model_path)
示例#24
0
def test_create_fingerprint_from_paths(project):
    project_files = _project_files(project)

    assert model_fingerprint(**project_files)
示例#25
0
文件: train.py 项目: zhfneu/rasa_core
async def train_async(domain: Text,
                      config: Text,
                      training_files: Union[Text, List[Text]],
                      output: Text = DEFAULT_MODELS_PATH,
                      force_training: bool = False) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output: Output path.
        force_training: If `True` retrain model even if data has not changed.

    Returns:
        Path of the trained model archive.
    """

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output)
    retrain_core = True
    retrain_nlu = True

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)
    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint,
                                              new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await train_core_async(domain, config, story_directory,
                               output, train_path)
    else:
        print("Dialogue data / configuration did not change. "
              "No need to retrain dialogue model.")

    if force_training or retrain_nlu:
        train_nlu(config, nlu_data_directory, output, train_path)
    else:
        print("NLU data / configuration did not change. "
              "No need to retrain NLU model.")

    if retrain_core or retrain_nlu:
        output = create_output_path(output)
        model.create_package_rasa(train_path, output, new_fingerprint)

        print("Train path: '{}'.".format(train_path))

        print_success("Your bot is trained and ready to take for a spin!")

        return output
    else:
        print("Nothing changed. You can use the old model stored at {}"
              "".format(os.path.abspath(old_model)))

        return old_model
示例#26
0
async def _train_async_internal(
    domain: Union[Domain, Text],
    config: Text,
    train_path: Text,
    nlu_data_directory: Text,
    story_directory: Text,
    output_path: Text,
    force_training: bool,
    fixed_model_name: Optional[Text],
    kwargs: Optional[Dict],
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU). Use only from `train_async`.

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        train_path: Directory in which to train the model.
        nlu_data_directory: Path to NLU training files.
        story_directory: Path to Core training files.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model,
                                               train_path)

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            domain=domain,
            config=config,
            output_path=output_path,
            train_path=train_path,
            nlu_data_directory=nlu_data_directory,
            story_directory=story_directory,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            kwargs=kwargs,
        )

        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
示例#27
0
async def train_async(
    domain: Text,
    config: Text,
    training_files: Union[Text, List[Text]],
    output: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output: Output path.
        force_training: If `True` retrain model even if data has not changed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = get_valid_config(config, CONFIG_MANDATORY_KEYS)

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output)
    retrain_core = True
    retrain_nlu = True

    story_directory, nlu_data_directory = data.get_core_nlu_directories(training_files)
    new_fingerprint = model.model_fingerprint(
        config, domain, nlu_data_directory, story_directory
    )

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide dialogue and NLU data in "
            "order to train a Rasa model."
        )
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained."
        )
        return train_nlu(config, nlu_data_directory, output, None)

    if nlu_data_not_present:
        print_warning("No NLU data present. Just a Rasa Core model will be trained.")
        return await train_core_async(
            domain, config, story_directory, output, None, kwargs
        )

    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint, new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await train_core_async(
            domain, config, story_directory, output, train_path, kwargs
        )
    else:
        print (
            "Dialogue data / configuration did not change. "
            "No need to retrain dialogue model."
        )

    if force_training or retrain_nlu:
        train_nlu(config, nlu_data_directory, output, train_path)
    else:
        print ("NLU data / configuration did not change. No need to retrain NLU model.")

    if retrain_core or retrain_nlu:
        output = create_output_path(output)
        model.create_package_rasa(train_path, output, new_fingerprint)

        print_success("Your bot is trained and ready to take for a spin!")

        return output
    else:
        print_success(
            "Nothing changed. You can use the old model stored at '{}'"
            "".format(os.path.abspath(old_model))
        )

        return old_model
示例#28
0
文件: train.py 项目: quickyue/rasa
async def train_async(
    domain: Optional,
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = get_valid_config(config, CONFIG_MANDATORY_KEYS)

    train_path = tempfile.mkdtemp()
    old_model = model.get_latest_model(output_path)
    retrain_core = True
    retrain_nlu = True

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(e)
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide dialogue and NLU data in "
            "order to train a Rasa model.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    if not force_training and old_model:
        unpacked = model.unpack_model(old_model)
        old_core, old_nlu = model.get_model_subdirectories(unpacked)
        last_fingerprint = model.fingerprint_from_path(unpacked)

        if not model.core_fingerprint_changed(last_fingerprint,
                                              new_fingerprint):
            target_path = os.path.join(train_path, "core")
            retrain_core = not model.merge_model(old_core, target_path)

        if not model.nlu_fingerprint_changed(last_fingerprint,
                                             new_fingerprint):
            target_path = os.path.join(train_path, "nlu")
            retrain_nlu = not model.merge_model(old_nlu, target_path)

    if force_training or retrain_core:
        await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )
    else:
        print("Dialogue data / configuration did not change. "
              "No need to retrain dialogue model.")

    if force_training or retrain_nlu:
        _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )
    else:
        print(
            "NLU data / configuration did not change. No need to retrain NLU model."
        )

    if retrain_core or retrain_nlu:
        output_path = create_output_path(output_path,
                                         fixed_name=fixed_model_name)
        model.create_package_rasa(train_path, output_path, new_fingerprint)

        if uncompress:
            output_path = decompress(output_path)

        print_success("Your Rasa model is trained and saved at '{}'.".format(
            output_path))

        return output_path
    else:
        print_success(
            "Nothing changed. You can use the old model stored at '{}'"
            "".format(os.path.abspath(old_model)))

        return old_model
示例#29
0
async def train_async(
    domain: Union[Domain, Text],
    config: Text,
    training_files: Optional[Union[Text, List[Text]]],
    output_path: Text = DEFAULT_MODELS_PATH,
    force_training: bool = False,
    fixed_model_name: Optional[Text] = None,
    uncompress: bool = False,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Rasa model (Core and NLU).

    Args:
        domain: Path to the domain file.
        config: Path to the config for Core and NLU.
        training_files: Paths to the training data for Core and NLU.
        output_path: Output path.
        force_training: If `True` retrain model even if data has not changed.
        fixed_model_name: Name of model to be stored.
        uncompress: If `True` the model will not be compressed.
        kwargs: Additional training parameters.

    Returns:
        Path of the trained model archive.
    """
    config = _get_valid_config(config, CONFIG_MANDATORY_KEYS)
    train_path = tempfile.mkdtemp()

    skill_imports = SkillSelector.load(config)
    try:
        domain = Domain.load(domain, skill_imports)
    except InvalidDomain as e:
        print_error(
            "Could not load domain due to: '{}'. To specify a valid domain path use "
            "the '--domain' argument.".format(e))
        return None

    story_directory, nlu_data_directory = data.get_core_nlu_directories(
        training_files, skill_imports)
    new_fingerprint = model.model_fingerprint(config, domain,
                                              nlu_data_directory,
                                              story_directory)

    dialogue_data_not_present = not os.listdir(story_directory)
    nlu_data_not_present = not os.listdir(nlu_data_directory)

    if dialogue_data_not_present and nlu_data_not_present:
        print_error(
            "No training data given. Please provide stories and NLU data in "
            "order to train a Rasa model using the '--data' argument.")
        return

    if dialogue_data_not_present:
        print_warning(
            "No dialogue data present. Just a Rasa NLU model will be trained.")
        return _train_nlu_with_validated_data(
            config=config,
            nlu_data_directory=nlu_data_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    if nlu_data_not_present:
        print_warning(
            "No NLU data present. Just a Rasa Core model will be trained.")
        return await _train_core_with_validated_data(
            domain=domain,
            config=config,
            story_directory=story_directory,
            output=output_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

    old_model = model.get_latest_model(output_path)
    retrain_core, retrain_nlu = should_retrain(new_fingerprint, old_model,
                                               train_path)

    if force_training or retrain_core or retrain_nlu:
        await _do_training(
            domain=domain,
            config=config,
            output_path=output_path,
            train_path=train_path,
            nlu_data_directory=nlu_data_directory,
            story_directory=story_directory,
            force_training=force_training,
            retrain_core=retrain_core,
            retrain_nlu=retrain_nlu,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
            kwargs=kwargs,
        )

        return _package_model(
            new_fingerprint=new_fingerprint,
            output_path=output_path,
            train_path=train_path,
            fixed_model_name=fixed_model_name,
            uncompress=uncompress,
        )

    print_success("Nothing changed. You can use the old model stored at '{}'."
                  "".format(os.path.abspath(old_model)))
    return old_model
示例#30
0
文件: train.py 项目: anusalva-md/rasa
async def train_core_async(
    domain: Union[Domain, Text],
    config: Text,
    stories: Text,
    output: Text,
    train_path: Optional[Text] = None,
    kwargs: Optional[Dict] = None,
) -> Optional[Text]:
    """Trains a Core model.

    Args:
        domain: Path to the domain file.
        config: Path to the config file for Core.
        stories: Path to the Core training data.
        output: Output path.
        train_path: If `None` the model will be trained in a temporary
            directory, otherwise in the provided directory.
        kwargs: Additional training parameters.

    Returns:
        If `train_path` is given it returns the path to the model archive,
        otherwise the path to the directory with the trained model files.

    """
    import rasa.core.train

    config = get_valid_config(config, CONFIG_MANDATORY_KEYS_CORE)

    _train_path = train_path or tempfile.mkdtemp()

    if isinstance(Domain, str) or not train_path:
        skill_imports = SkillSelector.load(config)
        domain = Domain.load(domain, skill_imports)
        story_directory = data.get_core_directory(stories, skill_imports)
    else:
        story_directory = stories

    if not os.listdir(story_directory):
        print_error(
            "No dialogue data given. Please provide dialogue data in order to "
            "train a Rasa Core model.")
        return

    # normal (not compare) training
    print_color("Start training dialogue model ...", color=bcolors.OKBLUE)
    await rasa.core.train(
        domain_file=domain,
        stories_file=story_directory,
        output_path=os.path.join(_train_path, "core"),
        policy_config=config,
        kwargs=kwargs,
    )
    print_color("Done.", color=bcolors.OKBLUE)

    if not train_path:
        # Only Core was trained.
        output_path = create_output_path(output, prefix="core-")
        new_fingerprint = model.model_fingerprint(config,
                                                  domain,
                                                  stories=story_directory)
        model.create_package_rasa(_train_path, output_path, new_fingerprint)
        print_success(
            "Your Rasa Core model is trained and saved at '{}'.".format(
                output_path))

        return output_path

    return _train_path