Пример #1
0
def test_removing_no_longer_compatible_cache_entries(
    tmp_path: Path,
    monkeypatch: MonkeyPatch,
    local_cache_creator: Callable[..., LocalTrainingCache],
    default_model_storage: ModelStorage,
):
    cache = local_cache_creator(tmp_path)

    # Cache an entry including serialized output which will be incompatible later
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # Cache an entry without serialized output which will be incompatible later
    fingerprint_key2 = uuid.uuid4().hex
    output_fingerprint2 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key2, None, output_fingerprint2,
                       default_model_storage)

    # Cache a second entry with a newer Rasa version
    monkeypatch.setattr(rasa, "__version__", "99999.9.5")
    fingerprint_key3 = uuid.uuid4().hex
    output3 = TestCacheableOutput({"something to cache2": "dasdaasda"})
    output_fingerprint3 = uuid.uuid4().hex

    cache.cache_output(fingerprint_key3, output3, output_fingerprint3,
                       default_model_storage)

    # Pretend we updated Rasa Open Source to a no longer compatible version
    monkeypatch.setattr(rasa.engine.caching, "MINIMUM_COMPATIBLE_VERSION",
                        "99999.8.10")

    cache_run_by_future_rasa = LocalTrainingCache()

    # Cached fingerprints can no longer be retrieved
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key1) is None)
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key2) is None)

    assert (cache_run_by_future_rasa.get_cached_result(
        output_fingerprint1, "some_node", default_model_storage) is None)
    assert (cache_run_by_future_rasa.get_cached_result(
        output_fingerprint2, "some_node", default_model_storage) is None)

    # Entry 3 wasn't deleted from cache as it's still compatible
    assert (cache_run_by_future_rasa.get_cached_output_fingerprint(
        fingerprint_key3) == output_fingerprint3)
    restored = cache_run_by_future_rasa.get_cached_result(
        output_fingerprint3, "some_node", default_model_storage)
    assert isinstance(restored, TestCacheableOutput)
    assert restored == output3

    # Cached output of no longer compatible stuff was deleted from disk
    assert set(tmp_path.glob("*")) == {
        tmp_path / DEFAULT_CACHE_NAME,
        restored.cache_dir,
    }
Пример #2
0
def test_delete_using_lru_if_cache_exceeds_size(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    # Pretend we have a cache of certain size
    monkeypatch.setenv(CACHE_SIZE_ENV, "5")

    cache = LocalTrainingCache()

    # Cache an item
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # Cache an non cacheable item to spice it up 🔥
    fingerprint_key2 = uuid.uuid4().hex
    output2 = TestCacheableOutput(None)
    output_fingerprint2 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key2, output2, output_fingerprint2,
                       default_model_storage)

    # Cache another item
    fingerprint_key3 = uuid.uuid4().hex
    output3 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint3 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key3, output3, output_fingerprint3,
                       default_model_storage)

    # Assert both are there
    for output_fingerprint in [output_fingerprint1, output_fingerprint2]:
        assert cache.get_cached_result(output_fingerprint, "some_node",
                                       default_model_storage)

    # Checkout the first item as this updates `last_used` and hence affects LRU
    cache.get_cached_output_fingerprint(fingerprint_key1)

    # Now store something which requires a deletion
    fingerprint_key4 = uuid.uuid4().hex
    output4 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint4 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key4, output4, output_fingerprint4,
                       default_model_storage)

    # Assert cached result 1 and 3 are there
    for output_fingerprint in [output_fingerprint1, output_fingerprint4]:
        assert cache.get_cached_result(output_fingerprint, "some_node",
                                       default_model_storage)

    # Cached result 2 and 3 were deleted
    assert cache.get_cached_output_fingerprint(fingerprint_key2) is None
    assert (cache.get_cached_result(output_fingerprint3, "some_node",
                                    default_model_storage) is None)
Пример #3
0
def test_cache_exceeds_size_but_not_in_database(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    max_cache_size = 5
    # Pretend we have a cache of size `max_cached_size`
    monkeypatch.setenv(CACHE_SIZE_ENV, str(max_cache_size))

    # Fill cache with something which is not in the cache metadata
    sub_dir = tmp_path / "some dir"
    sub_dir.mkdir()

    # one subdirectory which needs deletion
    tests.conftest.create_test_file_with_size(sub_dir, max_cache_size)
    # one file which needs deletion
    tests.conftest.create_test_file_with_size(tmp_path, max_cache_size)

    cache = LocalTrainingCache()

    # Cache an item
    fingerprint_key = uuid.uuid4().hex
    output = TestCacheableOutput({"something to cache": "dasdaasda"},
                                 size_in_mb=2)
    output_fingerprint = uuid.uuid4().hex
    cache.cache_output(fingerprint_key, output, output_fingerprint,
                       default_model_storage)

    assert cache.get_cached_output_fingerprint(
        fingerprint_key) == output_fingerprint
    assert cache.get_cached_result(output_fingerprint, "some_node",
                                   default_model_storage)
Пример #4
0
def test_skip_caching_if_cache_size_is_zero(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    cache_location = tmp_path / "cache"
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(cache_location))

    # Disable cache
    monkeypatch.setenv(CACHE_SIZE_ENV, "0")

    cache = LocalTrainingCache()

    # Cache something
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    # not even the database and no subdirectory was created ⛔️
    assert list(tmp_path.glob("*")) == []

    assert cache.get_cached_output_fingerprint(fingerprint_key1) is None

    assert (cache.get_cached_result(output_fingerprint1, "some_node",
                                    default_model_storage) is None)
Пример #5
0
def test_get_cached_result_when_result_no_longer_available(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    cache = LocalTrainingCache()

    output = TestCacheableOutput({"something to cache": "dasdaasda"})
    output_fingerprint = uuid.uuid4().hex

    cache.cache_output(uuid.uuid4().hex, output, output_fingerprint,
                       default_model_storage)

    # Pretend something deleted the cache in between
    for path in tmp_path.glob("*"):
        if path.is_dir():
            shutil.rmtree(path)

    assert (cache.get_cached_result(output_fingerprint, "some_node",
                                    default_model_storage) is None)
Пример #6
0
def test_skip_caching_if_result_exceeds_max_size(
        tmp_path: Path, monkeypatch: MonkeyPatch,
        default_model_storage: ModelStorage):
    monkeypatch.setenv(CACHE_LOCATION_ENV, str(tmp_path))

    # Pretend we have a cache of size "1"
    monkeypatch.setenv(CACHE_SIZE_ENV, "1")

    cache = LocalTrainingCache()

    # Cache something
    fingerprint_key1 = uuid.uuid4().hex
    output1 = TestCacheableOutput({"something to cache": "dasdaasda"},
                                  size_in_mb=2)
    output_fingerprint1 = uuid.uuid4().hex
    cache.cache_output(fingerprint_key1, output1, output_fingerprint1,
                       default_model_storage)

    assert cache.get_cached_output_fingerprint(
        fingerprint_key1) == output_fingerprint1

    assert (cache.get_cached_result(output_fingerprint1, "some_node",
                                    default_model_storage) is None)