def setUp(self): """Constructs two LocalFileCache layers pointed at temporary directories.""" self.layered_cache = resource_cache.LayeredCache() self.test_dir_1 = tempfile.mkdtemp() self.test_dir_2 = tempfile.mkdtemp() self.cache_1 = resource_cache.LocalFileCache(self.test_dir_1) self.cache_2 = resource_cache.LocalFileCache(self.test_dir_2)
def __init__(self, local_cache_path: Optional[Path] = None, gcs_cache_path: Optional[str] = None, sandbox: bool = False, timeout: float = 15): # TODO(rousik): figure out an efficient way to configure datastore caching """ Datastore manages file retrieval for PUDL datasets. Args: local_cache_path (Path): if provided, LocalFileCache pointed at the data subdirectory of this path will be used with this Datastore. gcs_cache_path (str): if provided, GoogleCloudStorageCache will be used to retrieve data files. The path is expected to have the following format: gs://bucket[/path_prefix] sandbox (bool): if True, use sandbox zenodo backend when retrieving files, otherwise use production. This affects which zenodo servers are contacted as well as dois used for each dataset. timeout (floaTR): connection timeouts (in seconds) to use when connecting to Zenodo servers. """ self._cache = resource_cache.LayeredCache() self._datapackage_descriptors = { } # type: Dict[str, DatapackageDescriptor] if local_cache_path: self._cache.add_cache_layer( resource_cache.LocalFileCache(local_cache_path)) if gcs_cache_path: self._cache.add_cache_layer( resource_cache.GoogleCloudStorageCache(gcs_cache_path)) self._zenodo_fetcher = ZenodoFetcher(sandbox=sandbox, timeout=timeout)
def test_that_two_cache_objects_share_storage(self): """Two LocalFileCache instances with the same path share the object storage.""" second_cache = resource_cache.LocalFileCache(Path(self.test_dir)) res = PudlResourceKey("dataset", "doi", "file.txt") self.assertFalse(self.cache.contains(res)) self.assertFalse(second_cache.contains(res)) self.cache.add(res, b"testContents") self.assertTrue(self.cache.contains(res)) self.assertTrue(second_cache.contains(res)) self.assertEqual(b"testContents", second_cache.get(res))
def test_read_only_layers_skipped_when_adding(self): """When add() is called, layers that are marked as read_only are skipped.""" c1 = resource_cache.LocalFileCache(self.test_dir_1, read_only=True) c2 = resource_cache.LocalFileCache(self.test_dir_2) lc = resource_cache.LayeredCache(c1, c2) res = PudlResourceKey("a", "b", "c") self.assertFalse(lc.contains(res)) self.assertFalse(c1.contains(res)) self.assertFalse(c2.contains(res)) lc.add(res, b"test") self.assertTrue(lc.contains(res)) self.assertFalse(c1.contains(res)) self.assertTrue(c2.contains(res)) lc.delete(res) self.assertFalse(lc.contains(res)) self.assertFalse(c1.contains(res)) self.assertFalse(c2.contains(res))
def test_read_only_add_and_delete_do_nothing(self): """When cache is in read_only mode, add() and delete() calls should be ignored.""" res = PudlResourceKey("a", "b", "c") ro_cache = resource_cache.LocalFileCache(Path(self.test_dir), read_only=True) self.assertTrue(ro_cache.is_read_only()) ro_cache.add(res, b"sample") self.assertFalse(ro_cache.contains(res)) # Use read-write cache to insert resource self.cache.add(res, b"sample") self.assertFalse(self.cache.is_read_only()) self.assertTrue(ro_cache.contains(res)) # Deleting via ro cache should not happen ro_cache.delete(res) self.assertTrue(ro_cache.contains(res))
def setUp(self): """Prepares temporary directory for storing cache contents.""" self.test_dir = tempfile.mkdtemp() self.cache = resource_cache.LocalFileCache(Path(self.test_dir))