Exemplo n.º 1
0
    def test_read_only_cache_ignores_modifications(self):
        """When cache is marked as read_only, add() and delete() calls are ignored."""
        r1 = PudlResourceKey("a", "b", "r1")
        r2 = PudlResourceKey("a", "b", "r2")
        self.cache_1.add(r1, b"xxx")
        self.cache_2.add(r2, b"yyy")
        self.assertTrue(self.cache_1.contains(r1))
        self.assertTrue(self.cache_2.contains(r2))
        lc = resource_cache.LayeredCache(self.cache_1,
                                         self.cache_2,
                                         read_only=True)

        self.assertTrue(lc.contains(r1))
        self.assertTrue(lc.contains(r2))

        lc.delete(r1)
        lc.delete(r2)
        self.assertTrue(lc.contains(r1))
        self.assertTrue(lc.contains(r2))
        self.assertTrue(self.cache_1.contains(r1))
        self.assertTrue(self.cache_2.contains(r2))

        r_new = PudlResourceKey("a", "b", "new")
        lc.add(r_new, b"xyz")
        self.assertFalse(lc.contains(r_new))
        self.assertFalse(self.cache_1.contains(r_new))
        self.assertFalse(self.cache_2.contains(r_new))
Exemplo n.º 2
0
 def test_get_resources_filtering(self):
     """Verifies correct operation of get_resources()."""
     self.assertEqual([
         PudlResourceKey("epacems", "123", "first-red"),
         PudlResourceKey("epacems", "123", "second-blue")
     ], list(self.descriptor.get_resources()))
     self.assertEqual([PudlResourceKey("epacems", "123", "first-red")],
                      list(self.descriptor.get_resources(color="red")))
     self.assertEqual(
         [], list(self.descriptor.get_resources(flavor="blueberry")))
Exemplo n.º 3
0
 def test_add_single_resource(self):
     """Adding resource has the expected effect on subsequent get() and contains() calls."""
     res = PudlResourceKey("ds", "doi", "file.txt")
     self.assertFalse(self.cache.contains(res))
     self.cache.add(res, b"blah")
     self.assertTrue(self.cache.contains(res))
     self.assertEqual(b"blah", self.cache.get(res))
Exemplo n.º 4
0
 def test_get_resource(self):
     """Tests that get_resource() calls the expected http request and gives back the content."""
     responses.add(responses.GET,
                   "http://localhost/first", body="blah")
     res = self.fetcher.get_resource(
         PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "first"))
     self.assertEqual(b"blah", res)
Exemplo n.º 5
0
 def test_add_with_no_layers_does_nothing(self):
     """When add() is called on cache with no layers nothing happens."""
     res = PudlResourceKey("a", "b", "c")
     self.assertFalse(self.layered_cache.contains(res))
     self.layered_cache.add(res, b"sample")
     self.assertFalse(self.layered_cache.contains(res))
     self.layered_cache.delete(res)
Exemplo n.º 6
0
 def test_get_resource_with_invalid_checksum(self):
     """Retrieving resource where content does nto match the checksum will throw ChecksumMismatch."""
     responses.add(responses.GET,
                   "http://localhost/first",
                   body="wrongContent")
     res = PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "first")
     self.assertRaises(datastore.ChecksumMismatch,
                       self.fetcher.get_resource, res)
Exemplo n.º 7
0
 def test_deletion(self):
     """Deletion of resources has the expected effect on subsequent get()and contains() calls."""
     res = PudlResourceKey("a", "b", "c")
     self.assertFalse(self.cache.contains(res))
     self.cache.add(res, b"sampleContents")
     self.assertTrue(self.cache.contains(res))
     self.cache.delete(res)
     self.assertFalse(self.cache.contains(res))
Exemplo n.º 8
0
 def test_that_two_cache_objects_share_storage(self):
     """Two LocalFileCache instances with the same path share the object storage."""
     second_cache = resource_cache.LocalFileCache(Path(self.test_dir))
     res = PudlResourceKey("dataset", "doi", "file.txt")
     self.assertFalse(self.cache.contains(res))
     self.assertFalse(second_cache.contains(res))
     self.cache.add(res, b"testContents")
     self.assertTrue(self.cache.contains(res))
     self.assertTrue(second_cache.contains(res))
     self.assertEqual(b"testContents", second_cache.get(res))
Exemplo n.º 9
0
 def test_add_to_first_layer(self):
     """Adding to layered cache by default stores entires in the first layer."""
     self.layered_cache.add_cache_layer(self.cache_1)
     self.layered_cache.add_cache_layer(self.cache_2)
     res = PudlResourceKey("a", "b", "x.txt")
     self.assertFalse(self.layered_cache.contains(res))
     self.layered_cache.add(res, b"sampleContent")
     self.assertTrue(self.layered_cache.contains(res))
     self.assertTrue(self.cache_1.contains(res))
     self.assertFalse(self.cache_2.contains(res))
Exemplo n.º 10
0
 def get_datapackage_descriptor(self, dataset: str) -> DatapackageDescriptor:
     """Fetch datapackage descriptor for given dataset either from cache or from zenodo."""
     doi = self._zenodo_fetcher.get_doi(dataset)
     if doi not in self._datapackage_descriptors:
         res = PudlResourceKey(dataset, doi, "datapackage.json")
         if self._cache.contains(res):
             self._datapackage_descriptors[doi] = DatapackageDescriptor(
                 json.loads(self._cache.get(res).decode('utf-8')),
                 dataset=dataset,
                 doi=doi)
         else:
             desc = self._zenodo_fetcher.get_descriptor(dataset)
             self._datapackage_descriptors[doi] = desc
             self._cache.add(res, bytes(desc.get_json_string(), "utf-8"))
     return self._datapackage_descriptors[doi]
Exemplo n.º 11
0
    def get_resources(self, name: str = None, **filters: Any) -> Iterator[PudlResourceKey]:
        """Returns series of PudlResourceKey identifiers for matching resources.

        Args:
          name (str): if specified, find resource(s) with this name.
          filters (dict): if specified, find resoure(s) matching these key=value constraints.
            The constraints are matched against the 'parts' field of the resource
            entry in the datapackage.json.
        """
        for res in self.datapackage_json["resources"]:
            if name and res["name"] != name:
                continue
            if self._matches(res, **filters):
                yield PudlResourceKey(
                    dataset=self.dataset,
                    doi=self.doi,
                    name=res["name"])
Exemplo n.º 12
0
    def test_read_only_add_and_delete_do_nothing(self):
        """When cache is in read_only mode, add() and delete() calls should be ignored."""
        res = PudlResourceKey("a", "b", "c")
        ro_cache = resource_cache.LocalFileCache(Path(self.test_dir),
                                                 read_only=True)
        self.assertTrue(ro_cache.is_read_only())

        ro_cache.add(res, b"sample")
        self.assertFalse(ro_cache.contains(res))

        # Use read-write cache to insert resource
        self.cache.add(res, b"sample")
        self.assertFalse(self.cache.is_read_only())
        self.assertTrue(ro_cache.contains(res))

        # Deleting via ro cache should not happen
        ro_cache.delete(res)
        self.assertTrue(ro_cache.contains(res))
Exemplo n.º 13
0
    def test_read_only_layers_skipped_when_adding(self):
        """When add() is called, layers that are marked as read_only are skipped."""
        c1 = resource_cache.LocalFileCache(self.test_dir_1, read_only=True)
        c2 = resource_cache.LocalFileCache(self.test_dir_2)
        lc = resource_cache.LayeredCache(c1, c2)

        res = PudlResourceKey("a", "b", "c")

        self.assertFalse(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertFalse(c2.contains(res))

        lc.add(res, b"test")
        self.assertTrue(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertTrue(c2.contains(res))

        lc.delete(res)
        self.assertFalse(lc.contains(res))
        self.assertFalse(c1.contains(res))
        self.assertFalse(c2.contains(res))
Exemplo n.º 14
0
    def test_get_uses_innermost_layer(self):
        """Resource is retrieved from the leftmost layer that contains it."""
        res = PudlResourceKey("a", "b", "x.txt")
        self.layered_cache.add_cache_layer(self.cache_1)
        self.layered_cache.add_cache_layer(self.cache_2)
        # self.cache_1.add(res, "firstLayer")
        self.cache_2.add(res, b"secondLayer")
        self.assertEqual(b"secondLayer", self.layered_cache.get(res))

        self.cache_1.add(res, b"firstLayer")
        self.assertEqual(b"firstLayer", self.layered_cache.get(res))
        # Set on layered cache updates innermost layer
        self.layered_cache.add(res, b"newContents")
        self.assertEqual(b"newContents", self.layered_cache.get(res))
        self.assertEqual(b"newContents", self.cache_1.get(res))
        self.assertEqual(b"secondLayer", self.cache_2.get(res))

        # Deletion also only affects innermost layer
        self.layered_cache.delete(res)
        self.assertTrue(self.layered_cache.contains(res))
        self.assertFalse(self.cache_1.contains(res))
        self.assertTrue(self.cache_2.contains(res))
        self.assertEqual(b"secondLayer", self.layered_cache.get(res))
Exemplo n.º 15
0
 def test_get_resources_by_name(self):
     """Verifies that get_resources() work when name is specified."""
     self.assertEqual(
         [PudlResourceKey("epacems", "123", "second-blue")],
         list(self.descriptor.get_resources(name="second-blue")))
Exemplo n.º 16
0
 def get_resource_key(self, dataset: str, name: str) -> PudlResourceKey:
     """Returns PudlResourceKey for given resource."""
     return PudlResourceKey(dataset, self._dataset_to_doi[dataset], name)
Exemplo n.º 17
0
 def test_get_resource_with_nonexistent_resource_fails(self):
     """If resource does not exist, get_resource() throws KeyError."""
     res = PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "nonexistent")
     self.assertRaises(KeyError, self.fetcher.get_resource, res)
Exemplo n.º 18
0
 def test_get_resource_key(self):
     """Tests normal operation of get_resource_key()."""
     self.assertEqual(
         PudlResourceKey("epacems", self.PROD_EPACEMS_DOI, "blob.zip"),
         self.fetcher.get_resource_key("epacems", "blob.zip"))