示例#1
0
    def test_export_import_cache_with_queries_list(self, tmp_path, query,
                                                   metadata, results):
        queries = [
            "select top 10 * from Receipts",
            "select top 20 * from Receipts",
            "select top 5 * from Receipts",
            "select top 25 * from Receipts",
        ]

        cache_store1 = tmp_path / "cache1"
        cache_store2 = tmp_path / "cache2"
        cache_export_file = tmp_path / "cache.zip"

        store1 = store.FileStore(cache_store=cache_store1)
        store2 = store.FileStore(cache_store=cache_store2)
        for query in queries:
            store1.dump(query, results, metadata)

        store1.export(cache_export_file, queries=queries[:2])
        store2.import_cache(cache_export_file)

        store1_cache = store1.list()
        store2_cache = store2.list()
        assert store1_cache.shape[0] == 4
        assert store2_cache.shape[0] == 2
        assert set(store2_cache.loc[:, "query"]) == set(queries[:2])
示例#2
0
    def test_export_import_cache(self, tmp_path, query, metadata, results):
        cache_store1 = tmp_path / "cache1"
        cache_store2 = tmp_path / "cache2"
        cache_export_file = tmp_path / "cache.zip"

        store1 = store.FileStore(cache_store=cache_store1)
        store2 = store.FileStore(cache_store=cache_store2)

        store1.dump(query, results, metadata)

        store1.export(cache_export_file)
        store2.import_cache(cache_export_file)

        assert store1.list().equals(store2.list())
示例#3
0
 def test_get_filepaths_joblib(self, tmp_path, query):
     """Test the metadata and results cache file"""
     s = store.FileStore(cache_store=tmp_path, backend="joblib")
     metadata_file = s.get_metadata_filepath(query)
     cache_file = s.get_cache_filepath(query)
     assert metadata_file.stem == store.hash_query(query)
     assert cache_file.stem == store.hash_query(query)
     assert metadata_file == tmp_path / s.serializer.fmt / (
         store.hash_query(query) + ".json")
     assert cache_file == tmp_path / s.serializer.fmt / (
         store.hash_query(query) + ".joblib")
示例#4
0
    def test_cache_independent_from_format(self, tmp_path, metadata, results):
        query1 = "select top 3 * from receipts"
        query2 = "SELECT top 3 * FROM receipts"

        assert utils.normalize_query(query1) == utils.normalize_query(query2)

        parquet_store = store.FileStore(cache_store=tmp_path, normalize=True)

        for query in (query1, query2):
            assert not parquet_store.get_metadata_filepath(query).exists()
            assert not parquet_store.get_cache_filepath(query).exists()
            assert not parquet_store.exists(query)

        parquet_store.dump(query1, results, metadata)

        for query in (query1, query2):
            assert parquet_store.get_metadata_filepath(query).exists()
            assert parquet_store.get_cache_filepath(query).exists()
            assert parquet_store.exists(query)
示例#5
0
 def test_init(self, tmp_path):
     s = store.FileStore(cache_store=tmp_path)
     assert s.cache_store.exists()
示例#6
0
 def test_init_joblib(self, tmp_path):
     s = store.FileStore(cache_store=tmp_path, backend="joblib")
     assert isinstance(s.serializer, serializer.JoblibSerializer)
     s.serializer.compression == 0
     assert s.cache_store.exists()
示例#7
0
 def test_init_parquet(self, tmp_path):
     s = store.FileStore(cache_store=tmp_path, backend="parquet")
     assert isinstance(s.serializer, serializer.ParquetSerializer)
     s.serializer.compression == "snappy"
     assert s.cache_store.exists()
示例#8
0
def file_store(tmp_path):
    return store.FileStore(cache_store=tmp_path, normalize=True)