def test_removing_no_longer_compatible_cache_entries( tmp_path: Path, monkeypatch: MonkeyPatch, local_cache_creator: Callable[..., LocalTrainingCache], default_model_storage: ModelStorage, ): cache = local_cache_creator(tmp_path) # Cache an entry including serialized output which will be incompatible later fingerprint_key1 = uuid.uuid4().hex output1 = TestCacheableOutput({"something to cache": "dasdaasda"}) output_fingerprint1 = uuid.uuid4().hex cache.cache_output(fingerprint_key1, output1, output_fingerprint1, default_model_storage) # Cache an entry without serialized output which will be incompatible later fingerprint_key2 = uuid.uuid4().hex output_fingerprint2 = uuid.uuid4().hex cache.cache_output(fingerprint_key2, None, output_fingerprint2, default_model_storage) # Cache a second entry with a newer Rasa version monkeypatch.setattr(rasa, "__version__", "99999.9.5") fingerprint_key3 = uuid.uuid4().hex output3 = TestCacheableOutput({"something to cache2": "dasdaasda"}) output_fingerprint3 = uuid.uuid4().hex cache.cache_output(fingerprint_key3, output3, output_fingerprint3, default_model_storage) # Pretend we updated Rasa Open Source to a no longer compatible version monkeypatch.setattr(rasa.engine.caching, "MINIMUM_COMPATIBLE_VERSION", "99999.8.10") cache_run_by_future_rasa = LocalTrainingCache() # Cached fingerprints can no longer be retrieved assert (cache_run_by_future_rasa.get_cached_output_fingerprint( fingerprint_key1) is None) assert (cache_run_by_future_rasa.get_cached_output_fingerprint( fingerprint_key2) is None) assert (cache_run_by_future_rasa.get_cached_result( output_fingerprint1, "some_node", default_model_storage) is None) assert (cache_run_by_future_rasa.get_cached_result( output_fingerprint2, "some_node", default_model_storage) is None) # Entry 3 wasn't deleted from cache as it's still compatible assert (cache_run_by_future_rasa.get_cached_output_fingerprint( fingerprint_key3) == output_fingerprint3) restored = cache_run_by_future_rasa.get_cached_result( output_fingerprint3, "some_node", default_model_storage) assert isinstance(restored, TestCacheableOutput) assert restored == output3 # Cached output of no longer compatible stuff was deleted from disk assert set(tmp_path.glob("*")) == { tmp_path / DEFAULT_CACHE_NAME, restored.cache_dir, }
def test_cache_creates_location_if_missing(tmp_path: Path, monkeypatch: MonkeyPatch): cache_location = tmp_path / "directory does not exist yet" monkeypatch.setenv(CACHE_LOCATION_ENV, str(cache_location)) _ = LocalTrainingCache() assert cache_location.is_dir()
def test_get_cached_result_when_result_no_longer_available( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path)) cache = LocalTrainingCache() output = TestCacheableOutput({"something to cache": "dasdaasda"}) output_fingerprint = uuid.uuid4().hex cache.cache_output(uuid.uuid4().hex, output, output_fingerprint, default_model_storage) # Pretend something deleted the cache in between for path in tmp_path.glob("*"): if path.is_dir(): shutil.rmtree(path) assert (cache.get_cached_result(output_fingerprint, "some_node", default_model_storage) is None)
def test_cache_exceeds_size_but_not_in_database( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path)) max_cache_size = 5 # Pretend we have a cache of size `max_cached_size` monkeypatch.setenv(CACHE_SIZE_ENV, str(max_cache_size)) # Fill cache with something which is not in the cache metadata sub_dir = tmp_path / "some dir" sub_dir.mkdir() # one subdirectory which needs deletion tests.conftest.create_test_file_with_size(sub_dir, max_cache_size) # one file which needs deletion tests.conftest.create_test_file_with_size(tmp_path, max_cache_size) cache = LocalTrainingCache() # Cache an item fingerprint_key = uuid.uuid4().hex output = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint = uuid.uuid4().hex cache.cache_output(fingerprint_key, output, output_fingerprint, default_model_storage) assert cache.get_cached_output_fingerprint( fingerprint_key) == output_fingerprint assert cache.get_cached_result(output_fingerprint, "some_node", default_model_storage)
def test_skip_caching_if_cache_size_is_zero( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): cache_location = tmp_path / "cache" monkeypatch.setenv(CACHE_LOCATION_ENV, str(cache_location)) # Disable cache monkeypatch.setenv(CACHE_SIZE_ENV, "0") cache = LocalTrainingCache() # Cache something fingerprint_key1 = uuid.uuid4().hex output1 = TestCacheableOutput({"something to cache": "dasdaasda"}) output_fingerprint1 = uuid.uuid4().hex cache.cache_output(fingerprint_key1, output1, output_fingerprint1, default_model_storage) # not even the database and no subdirectory was created ⛔️ assert list(tmp_path.glob("*")) == [] assert cache.get_cached_output_fingerprint(fingerprint_key1) is None assert (cache.get_cached_result(output_fingerprint1, "some_node", default_model_storage) is None)
def test_clean_up_of_cached_result_if_database_fails( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): database_name = "test.db" monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path)) monkeypatch.setenv(CACHE_DB_NAME_ENV, database_name) cache = LocalTrainingCache() # Deleting the database will cause an error when caching the result (tmp_path / database_name).unlink() # Cache an item fingerprint_key = uuid.uuid4().hex output = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint = uuid.uuid4().hex with pytest.raises(OperationalError): cache.cache_output(fingerprint_key, output, output_fingerprint, default_model_storage) assert list(tmp_path.glob("*")) == [tmp_path / database_name]
def test_skip_caching_if_result_exceeds_max_size( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path)) # Pretend we have a cache of size "1" monkeypatch.setenv(CACHE_SIZE_ENV, "1") cache = LocalTrainingCache() # Cache something fingerprint_key1 = uuid.uuid4().hex output1 = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint1 = uuid.uuid4().hex cache.cache_output(fingerprint_key1, output1, output_fingerprint1, default_model_storage) assert cache.get_cached_output_fingerprint( fingerprint_key1) == output_fingerprint1 assert (cache.get_cached_result(output_fingerprint1, "some_node", default_model_storage) is None)
def test_delete_using_lru_if_cache_exceeds_size( tmp_path: Path, monkeypatch: MonkeyPatch, default_model_storage: ModelStorage): monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path)) # Pretend we have a cache of certain size monkeypatch.setenv(CACHE_SIZE_ENV, "5") cache = LocalTrainingCache() # Cache an item fingerprint_key1 = uuid.uuid4().hex output1 = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint1 = uuid.uuid4().hex cache.cache_output(fingerprint_key1, output1, output_fingerprint1, default_model_storage) # Cache an non cacheable item to spice it up 🔥 fingerprint_key2 = uuid.uuid4().hex output2 = TestCacheableOutput(None) output_fingerprint2 = uuid.uuid4().hex cache.cache_output(fingerprint_key2, output2, output_fingerprint2, default_model_storage) # Cache another item fingerprint_key3 = uuid.uuid4().hex output3 = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint3 = uuid.uuid4().hex cache.cache_output(fingerprint_key3, output3, output_fingerprint3, default_model_storage) # Assert both are there for output_fingerprint in [output_fingerprint1, output_fingerprint2]: assert cache.get_cached_result(output_fingerprint, "some_node", default_model_storage) # Checkout the first item as this updates `last_used` and hence affects LRU cache.get_cached_output_fingerprint(fingerprint_key1) # Now store something which requires a deletion fingerprint_key4 = uuid.uuid4().hex output4 = TestCacheableOutput({"something to cache": "dasdaasda"}, size_in_mb=2) output_fingerprint4 = uuid.uuid4().hex cache.cache_output(fingerprint_key4, output4, output_fingerprint4, default_model_storage) # Assert cached result 1 and 3 are there for output_fingerprint in [output_fingerprint1, output_fingerprint4]: assert cache.get_cached_result(output_fingerprint, "some_node", default_model_storage) # Cached result 2 and 3 were deleted assert cache.get_cached_output_fingerprint(fingerprint_key2) is None assert (cache.get_cached_result(output_fingerprint3, "some_node", default_model_storage) is None)
def create_local_cache(path: Path) -> LocalTrainingCache: monkeypatch.setenv(CACHE_LOCATION_ENV, str(path)) return LocalTrainingCache()
def _train_graph( file_importer: TrainingDataImporter, training_type: TrainingType, output_path: Text, fixed_model_name: Text, model_to_finetune: Optional[Text] = None, force_full_training: bool = False, dry_run: bool = False, **kwargs: Any, ) -> TrainingResult: if model_to_finetune: model_to_finetune = rasa.model.get_model_for_finetuning( model_to_finetune) if not model_to_finetune: rasa.shared.utils.cli.print_error_and_exit( f"No model for finetuning found. Please make sure to either " f"specify a path to a previous model or to have a finetunable " f"model within the directory '{output_path}'.") rasa.shared.utils.common.mark_as_experimental_feature( "Incremental Training feature") is_finetuning = model_to_finetune is not None config = file_importer.get_config() recipe = Recipe.recipe_for_name(config.get("recipe")) config, _missing_keys, _configured_keys = recipe.auto_configure( file_importer.get_config_file_for_auto_config(), config, training_type, ) model_configuration = recipe.graph_config_for_recipe( config, kwargs, training_type=training_type, is_finetuning=is_finetuning, ) rasa.engine.validation.validate(model_configuration) with tempfile.TemporaryDirectory() as temp_model_dir: model_storage = _create_model_storage(is_finetuning, model_to_finetune, Path(temp_model_dir)) cache = LocalTrainingCache() trainer = GraphTrainer(model_storage, cache, DaskGraphRunner) if dry_run: fingerprint_status = trainer.fingerprint( model_configuration.train_schema, file_importer) return _dry_run_result(fingerprint_status, force_full_training) model_name = _determine_model_name(fixed_model_name, training_type) full_model_path = Path(output_path, model_name) with telemetry.track_model_training( file_importer, model_type=training_type.model_type): trainer.train( model_configuration, file_importer, full_model_path, force_retraining=force_full_training, is_finetuning=is_finetuning, ) rasa.shared.utils.cli.print_success( f"Your Rasa model is trained and saved at '{full_model_path}'." ) return TrainingResult(str(full_model_path), 0)
def create_local_cache(path: Path) -> LocalTrainingCache: monkeypatch.setattr(LocalTrainingCache, "_get_cache_location", lambda: path) return LocalTrainingCache()