def test_cache_miss_value_error_size_of_table(): """ ValueError should be raised if we try to get the size of something not in cache. """ connection_mock = Mock() connection_mock.fetch.return_value = [] with pytest.raises(ValueError): get_size_of_table(connection_mock, "DUMMY_SCHEMA", "DUMMY_NAME")
def test_size_of_table(flowmachine_connect): """ Test that table size is reported correctly. """ dl = daily_location("2016-01-01").store().result() total_cache_size = get_size_of_cache(flowmachine_connect) table_size = get_size_of_table(flowmachine_connect, dl.table_name, "cache") assert total_cache_size == table_size
def test_scoring(flowmachine_connect): """ Test that score updating algorithm is correct by comparing to cachey as reference implementation """ dl = daily_location("2016-01-01").store().result() dl_time = get_compute_time(get_db(), dl.query_id) dl_size = get_size_of_table(get_db(), dl.table_name, "cache") initial_score = get_score(get_db(), dl.query_id) cachey_scorer = Scorer(halflife=1000.0) cache_score = cachey_scorer.touch("dl", dl_time / dl_size) assert cache_score == pytest.approx(initial_score) # Touch again new_score = touch_cache(get_db(), dl.query_id) updated_cache_score = cachey_scorer.touch("dl") assert updated_cache_score == pytest.approx(new_score) # Add another unrelated cache record, which should have a higher initial score dl_2 = daily_location("2016-01-02").store().result() dl_time = get_compute_time(get_db(), dl_2.query_id) dl_size = get_size_of_table(get_db(), dl_2.table_name, "cache") cache_score = cachey_scorer.touch("dl_2", dl_time / dl_size) assert cache_score == pytest.approx(get_score(get_db(), dl_2.query_id))
def test_shrink_to_size_dry_run_reflects_wet_run(flowmachine_connect): """ Test that shrink_below_size dry run is an accurate report. """ dl = daily_location("2016-01-01").store().result() dl2 = daily_location("2016-01-02").store().result() shrink_to = get_size_of_table(flowmachine_connect, dl.table_name, "cache") queries_that_would_be_removed = shrink_below_size(flowmachine_connect, shrink_to, dry_run=True) removed_queries = shrink_below_size(flowmachine_connect, shrink_to, dry_run=False) assert [q.md5 for q in removed_queries ] == [q.md5 for q in queries_that_would_be_removed]
def test_shrink_to_size_uses_score(flowmachine_connect): """ Test that shrink_below_size removes cache records in ascending score order. """ dl = daily_location("2016-01-01").store().result() dl_aggregate = dl.aggregate().store().result() flowmachine_connect.engine.execute( f"UPDATE cache.cached SET cache_score_multiplier = 100 WHERE query_id='{dl_aggregate.md5}'" ) flowmachine_connect.engine.execute( f"UPDATE cache.cached SET cache_score_multiplier = 0.5 WHERE query_id='{dl.md5}'" ) table_size = get_size_of_table(flowmachine_connect, dl.table_name, "cache") removed_queries = shrink_below_size(flowmachine_connect, table_size) assert 1 == len(removed_queries) assert not dl.is_stored assert dl_aggregate.is_stored