示例#1
0
 def setUp(self):
     """Constructs two LocalFileCache layers pointed at temporary directories."""
     self.layered_cache = resource_cache.LayeredCache()
     self.test_dir_1 = tempfile.mkdtemp()
     self.test_dir_2 = tempfile.mkdtemp()
     self.cache_1 = resource_cache.LocalFileCache(self.test_dir_1)
     self.cache_2 = resource_cache.LocalFileCache(self.test_dir_2)
示例#2
0
    def __init__(self,
                 local_cache_path: Optional[Path] = None,
                 gcs_cache_path: Optional[str] = None,
                 sandbox: bool = False,
                 timeout: float = 15):
        # TODO(rousik): figure out an efficient way to configure datastore caching
        """
        Datastore manages file retrieval for PUDL datasets.

        Args:
            local_cache_path (Path): if provided, LocalFileCache pointed at the data
              subdirectory of this path will be used with this Datastore.
            gcs_cache_path (str): if provided, GoogleCloudStorageCache will be used
              to retrieve data files. The path is expected to have the following
              format: gs://bucket[/path_prefix]
            sandbox (bool): if True, use sandbox zenodo backend when retrieving files,
              otherwise use production. This affects which zenodo servers are contacted
              as well as dois used for each dataset.
            timeout (floaTR): connection timeouts (in seconds) to use when connecting
              to Zenodo servers.

        """
        self._cache = resource_cache.LayeredCache()
        self._datapackage_descriptors = {
        }  # type: Dict[str, DatapackageDescriptor]

        if local_cache_path:
            self._cache.add_cache_layer(
                resource_cache.LocalFileCache(local_cache_path))
        if gcs_cache_path:
            self._cache.add_cache_layer(
                resource_cache.GoogleCloudStorageCache(gcs_cache_path))

        self._zenodo_fetcher = ZenodoFetcher(sandbox=sandbox, timeout=timeout)
示例#3
0
 def test_that_two_cache_objects_share_storage(self):
     """Two LocalFileCache instances with the same path share the object storage."""
     second_cache = resource_cache.LocalFileCache(Path(self.test_dir))
     res = PudlResourceKey("dataset", "doi", "file.txt")
     self.assertFalse(self.cache.contains(res))
     self.assertFalse(second_cache.contains(res))
     self.cache.add(res, b"testContents")
     self.assertTrue(self.cache.contains(res))
     self.assertTrue(second_cache.contains(res))
     self.assertEqual(b"testContents", second_cache.get(res))
示例#4
0
    def test_read_only_layers_skipped_when_adding(self):
        """When add() is called, layers that are marked as read_only are skipped."""
        c1 = resource_cache.LocalFileCache(self.test_dir_1, read_only=True)
        c2 = resource_cache.LocalFileCache(self.test_dir_2)
        lc = resource_cache.LayeredCache(c1, c2)

        res = PudlResourceKey("a", "b", "c")

        self.assertFalse(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertFalse(c2.contains(res))

        lc.add(res, b"test")
        self.assertTrue(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertTrue(c2.contains(res))

        lc.delete(res)
        self.assertFalse(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertFalse(c2.contains(res))
示例#5
0
    def test_read_only_add_and_delete_do_nothing(self):
        """When cache is in read_only mode, add() and delete() calls should be ignored."""
        res = PudlResourceKey("a", "b", "c")
        ro_cache = resource_cache.LocalFileCache(Path(self.test_dir),
                                                 read_only=True)
        self.assertTrue(ro_cache.is_read_only())

        ro_cache.add(res, b"sample")
        self.assertFalse(ro_cache.contains(res))

        # Use read-write cache to insert resource
        self.cache.add(res, b"sample")
        self.assertFalse(self.cache.is_read_only())
        self.assertTrue(ro_cache.contains(res))

        # Deleting via ro cache should not happen
        ro_cache.delete(res)
        self.assertTrue(ro_cache.contains(res))
示例#6
0
 def setUp(self):
     """Prepares temporary directory for storing cache contents."""
     self.test_dir = tempfile.mkdtemp()
     self.cache = resource_cache.LocalFileCache(Path(self.test_dir))