def _get_train_files_cmd(): """Get the raw train data by fetching the train file given in the command line arguments to the train script. When training the NLU model explicitly, the training data will be in the "nlu" argument, otherwise it will be in the "data" argument. """ cmdline_args = create_argument_parser().parse_args() try: files = list_files(cmdline_args.nlu) except AttributeError: files = list(get_core_nlu_files(cmdline_args.data)[1]) return [file for file in files if guess_format(file) == RASA_NLU]
def test_train_with_only_core_data(run_in_simple_project: Callable[..., RunResult]): temp_dir = os.getcwd() assert os.path.exists(os.path.join(temp_dir, "data/nlu.yml")) os.remove(os.path.join(temp_dir, "data/nlu.yml")) run_in_simple_project("train", "--fixed-model-name", "test-model") assert os.path.exists(os.path.join(temp_dir, "models")) files = io_utils.list_files(os.path.join(temp_dir, "models")) assert len(files) == 1 assert os.path.basename(files[0]) == "test-model.tar.gz"
def test_nlu(args: argparse.Namespace) -> None: from rasa import data from rasa.test import compare_nlu_models, perform_nlu_cross_validation, test_nlu nlu_data = cli_utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) nlu_data = data.get_nlu_directory(nlu_data) output = args.out or DEFAULT_RESULTS_PATH args.errors = not args.no_errors io_utils.create_directory(output) if args.config is not None and len(args.config) == 1: args.config = os.path.abspath(args.config[0]) if os.path.isdir(args.config): args.config = io_utils.list_files(args.config) if isinstance(args.config, list): logger.info( "Multiple configuration files specified, running nlu comparison mode." ) config_files = [] for file in args.config: try: validation_utils.validate_yaml_schema( io_utils.read_file(file), CONFIG_SCHEMA_FILE, show_validation_errors=False, ) config_files.append(file) except validation_utils.InvalidYamlFileError: logger.debug( "Ignoring file '{}' as it is not a valid config file.". format(file)) continue compare_nlu_models( configs=config_files, nlu=nlu_data, output=output, runs=args.runs, exclusion_percentages=args.percentages, ) elif args.cross_validation: logger.info("Test model using cross validation.") config = cli_utils.get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH) perform_nlu_cross_validation(config, nlu_data, output, vars(args)) else: model_path = cli_utils.get_validated_path(args.model, "model", DEFAULT_MODELS_PATH) test_nlu(model_path, nlu_data, output, vars(args))
def test_train_core_compare(run_in_simple_project: Callable[..., RunResult]): temp_dir = os.getcwd() io_utils.write_yaml_file( { "language": "en", "pipeline": "supervised_embeddings", "policies": [{ "name": "MemoizationPolicy" }], }, "config_1.yml", ) io_utils.write_yaml_file( { "language": "en", "pipeline": "supervised_embeddings", "policies": [{ "name": "MemoizationPolicy" }], }, "config_2.yml", ) run_in_simple_project( "train", "core", "-c", "config_1.yml", "config_2.yml", "--stories", "data/stories.md", "--out", "core_comparison_results", "--runs", "2", "--percentages", "25", "75", "--augmentation", "5", ) assert os.path.exists(os.path.join(temp_dir, "core_comparison_results")) run_directories = io_utils.list_subdirectories( os.path.join(temp_dir, "core_comparison_results")) assert len(run_directories) == 2 model_files = io_utils.list_files( os.path.join(temp_dir, "core_comparison_results", run_directories[0])) assert len(model_files) == 4 assert model_files[0].endswith("tar.gz")
def test_train_with_only_core_data(run_in_default_project_without_models): temp_dir = os.getcwd() assert os.path.exists(os.path.join(temp_dir, "data/nlu.md")) os.remove(os.path.join(temp_dir, "data/nlu.md")) run_in_default_project_without_models("train", "--fixed-model-name", "test-model") assert os.path.exists(os.path.join(temp_dir, "models")) files = io_utils.list_files(os.path.join(temp_dir, "models")) assert len(files) == 1 assert os.path.basename(files[0]) == "test-model.tar.gz"
def test_train_with_only_nlu_data(run_in_simple_project: Callable[..., RunResult]): temp_dir = Path.cwd() for core_file in ["stories.yml", "rules.yml"]: assert (temp_dir / "data" / core_file).exists() (temp_dir / "data" / core_file).unlink() run_in_simple_project("train", "--fixed-model-name", "test-model") assert os.path.exists(os.path.join(temp_dir, "models")) files = io_utils.list_files(os.path.join(temp_dir, "models")) assert len(files) == 1 assert os.path.basename(files[0]) == "test-model.tar.gz"
def test_test_core_comparison(run_in_default_project): files = list_files("models") copyfile(files[0], "models/copy-model.tar.gz") run_in_default_project( "test", "core", "-m", files[0], "models/copy-model.tar.gz", "--stories", "data/stories.md", ) assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
def test_test_core_comparison( run_in_simple_project_with_model: Callable[..., RunResult]): files = list_files("models") copyfile(files[0], "models/copy-model.tar.gz") run_in_simple_project_with_model( "test", "core", "-m", files[0], "models/copy-model.tar.gz", "--stories", "data/stories.md", ) assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
def test_train_nlu(run_in_default_project): run_in_default_project( "train", "nlu", "-c", "config.yml", "--nlu", "data/nlu.md", "--out", "train_models", ) assert os.path.exists("train_models") files = io_utils.list_files("train_models") assert len(files) == 1 assert os.path.basename(files[0]).startswith("nlu-")
def get_file_format(resource_name: Text) -> Text: from rasa.nlu.training_data import loading if resource_name is None or not os.path.exists(resource_name): raise AttributeError(f"Resource '{resource_name}' does not exist.") files = io_utils.list_files(resource_name) file_formats = list(map(lambda f: loading.guess_format(f), files)) if not file_formats: return "json" fformat = file_formats[0] if fformat == "md" and all(f == fformat for f in file_formats): return fformat return "json"
def load_data(resource_name: Text, language: Optional[Text] = "en") -> "TrainingData": """Load training data from disk. Merges them if loaded from disk and multiple files are found.""" from rasa.nlu.training_data import TrainingData if not os.path.exists(resource_name): raise ValueError(f"File '{resource_name}' does not exist.") files = io_utils.list_files(resource_name) data_sets = [_load(f, language) for f in files] data_sets = [ds for ds in data_sets if ds] if len(data_sets) == 0: training_data = TrainingData() elif len(data_sets) == 1: training_data = data_sets[0] else: training_data = data_sets[0].merge(*data_sets[1:]) return training_data
def test_train(run_in_default_project): temp_dir = os.getcwd() run_in_default_project( "train", "-c", "config.yml", "-d", "domain.yml", "--data", "data", "--out", "train_models", "--fixed-model-name", "test-model", ) assert os.path.exists(os.path.join(temp_dir, "train_models")) files = io_utils.list_files(os.path.join(temp_dir, "train_models")) assert len(files) == 1 assert os.path.basename(files[0]) == "test-model.tar.gz"
def test_train_nlu(run_in_simple_project: Callable[..., RunResult]): run_in_simple_project( "train", "nlu", "-c", "config.yml", "--nlu", "data/nlu.md", "--out", "train_models", ) assert os.path.exists("train_models") files = io_utils.list_files("train_models") assert len(files) == 1 assert os.path.basename(files[0]).startswith("nlu-") model_dir = model.get_model("train_models") assert model_dir is not None metadata = Metadata.load(os.path.join(model_dir, "nlu")) assert metadata.get("training_data") is None assert not os.path.exists( os.path.join(model_dir, "nlu", training_data.DEFAULT_TRAINING_DATA_OUTPUT_PATH))
async def read_from_folder( resource_name: Text, domain: Domain, interpreter: NaturalLanguageInterpreter = RegexInterpreter(), template_variables: Optional[Dict] = None, use_e2e: bool = False, exclusion_percentage: Optional[int] = None, ) -> List[StoryStep]: """Given a path reads all contained story files.""" if not os.path.exists(resource_name): raise ValueError("Story file or folder could not be found. Make " "sure '{}' exists and points to a story folder " "or file.".format(os.path.abspath(resource_name))) files = io_utils.list_files(resource_name) return await StoryFileReader.read_from_files( files, domain, interpreter, template_variables, use_e2e, exclusion_percentage, )
def test_train_no_domain_exists(run_in_default_project): os.remove("domain.yml") run_in_default_project( "train", "-c", "config.yml", "--data", "data", "--out", "train_models_no_domain", "--fixed-model-name", "nlu-model-only", ) assert os.path.exists("train_models_no_domain") files = io_utils.list_files("train_models_no_domain") assert len(files) == 1 trained_model_path = "train_models_no_domain/nlu-model-only.tar.gz" unpacked = model.unpack_model(trained_model_path) metadata_path = os.path.join(unpacked, "nlu", "metadata.json") assert os.path.exists(metadata_path)
def test_list_files_invalid_resource(): with pytest.raises(ValueError) as execinfo: io_utils.list_files(None) assert "must be a string type" in str(execinfo.value)
def test_list_files_non_existing_dir(): with pytest.raises(ValueError) as execinfo: io_utils.list_files("my/made_up/path") assert "Could not locate the resource" in str(execinfo.value)