Exemplo n.º 1
0
 def __enter__(self):
     existing_zarr_checksum = self.read_checksum_file()
     if existing_zarr_checksum:
         self._checksums = existing_zarr_checksum.checksums
     else:
         self._checksums = ZarrChecksums()
     return self
Exemplo n.º 2
0
def test_zarr_checkums_is_empty():
    assert ZarrChecksums(directories=[], files=[]).is_empty
    assert not ZarrChecksums(
        directories=[ZarrChecksum(digest="checksum", name="name", size=1)],
        files=[]).is_empty
    assert not ZarrChecksums(
        directories=[],
        files=[ZarrChecksum(digest="checksum", name="name", size=1)]).is_empty
Exemplo n.º 3
0
def test_zarr_checkums_remove_checksums(
    initial_files,
    initial_directories,
    removed_checksums,
    expected_files,
    expected_directories,
):
    checksums = ZarrChecksums(files=initial_files,
                              directories=initial_directories)
    checksums.remove_checksums(removed_checksums)
    assert checksums.files == expected_files
    assert checksums.directories == expected_directories
Exemplo n.º 4
0
def test_zarr_serialize():
    serializer = ZarrJSONChecksumSerializer()
    assert (
        serializer.serialize(
            ZarrChecksumListing(
                checksums=ZarrChecksums(
                    files=[
                        ZarrChecksum(
                            name="bar",
                            digest="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
                            size=1,
                        )
                    ],
                    directories=[
                        ZarrChecksum(
                            name="foo",
                            digest="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-1--2",
                            size=2,
                        )
                    ],
                ),
                digest="cccccccccccccccccccccccccccccccc-2--3",
                size=3,
            )) ==
        '{"checksums":{"directories":[{"digest":"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-1--2","name":"foo","size":2}],"files":[{"digest":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa","name":"bar","size":1}]},"digest":"cccccccccccccccccccccccccccccccc-2--3","size":3}'  # noqa: E501
    )
Exemplo n.º 5
0
def test_zarr_checksum_serializer_aggregate_digest(file_checksums,
                                                   directory_checksums,
                                                   digest):
    serializer = ZarrJSONChecksumSerializer()
    assert (serializer.aggregate_digest(
        ZarrChecksums(files=file_checksums,
                      directories=directory_checksums)) == digest)
Exemplo n.º 6
0
def test_zarr_checksum_serializer_generate_listing():
    serializer = ZarrJSONChecksumSerializer()
    checksums = ZarrChecksums(
        files=[
            ZarrChecksum(name="bar",
                         digest="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
                         size=1)
        ],
        directories=[
            ZarrChecksum(name="baz",
                         digest="bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb-1--2",
                         size=2)
        ],
    )
    assert serializer.generate_listing(checksums) == ZarrChecksumListing(
        checksums=checksums,
        digest="baf791d7bac84947c14739b1684ec5ab-2--3",
        size=3,
    )
Exemplo n.º 7
0
def test_zarr_checkums_add_directory_checksums(initial, new_checksums,
                                               expected):
    checksums = ZarrChecksums(directories=initial, files=[])
    checksums.add_directory_checksums(new_checksums)
    assert checksums.directories == expected
    assert checksums.files == []
Exemplo n.º 8
0
class ZarrChecksumFileUpdater(AbstractContextManager):
    """
    A utility class specifically for updating zarr checksum files.

    When used as a context manager, the checksum file will be loaded and written to automatically.
    """

    _default_serializer = ZarrJSONChecksumSerializer()

    def __init__(
        self,
        zarr_archive: ZarrArchive | EmbargoedZarrArchive,
        zarr_directory_path: str | Path,
        serializer=_default_serializer,
    ):
        self.zarr_archive = zarr_archive
        self.zarr_directory_path = f'{str(zarr_directory_path)}/'
        if self.zarr_directory_path in ['/', './']:
            self.zarr_directory_path = ''
        self._serializer = serializer

        # This is loaded when an instance is used as a context manager,
        # then saved when the context manager exits.
        self._checksums = None

    def __enter__(self):
        existing_zarr_checksum = self.read_checksum_file()
        if existing_zarr_checksum:
            self._checksums = existing_zarr_checksum.checksums
        else:
            self._checksums = ZarrChecksums()
        return self

    def __exit__(self, exc_type, *exc):
        # If there was an exception, do not write anything
        if exc_type:
            return None  # this means throw the exception as normal
        if not self.checksum_listing.checksums.is_empty:
            self.write_checksum_file(self.checksum_listing)
        else:
            # If there are no checksums to write, simply delete the checksum file.
            self.delete_checksum_file()

    @property
    def checksum_file_path(self):
        """Generate the path of the checksum file to update."""
        return f'{settings.DANDI_DANDISETS_BUCKET_PREFIX}{settings.DANDI_ZARR_CHECKSUM_PREFIX_NAME}/{self.zarr_archive.zarr_id}/{self.zarr_directory_path}.checksum'  # noqa: E501

    @property
    def checksum_listing(self) -> ZarrChecksumListing:
        """Get the current state of the updater."""
        if self._checksums is None:
            raise ValueError(
                'This method is only valid when used by a context manager')
        return self._serializer.generate_listing(self._checksums)

    def read_checksum_file(self) -> Optional[ZarrChecksumListing]:
        """Load a checksum listing from the checksum file."""
        storage = self.zarr_archive.storage
        checksum_path = self.checksum_file_path
        if storage.exists(checksum_path):
            with storage.open(checksum_path) as f:
                x = f.read()
                return self._serializer.deserialize(x)
        else:
            return None

    def write_checksum_file(self, zarr_checksum: ZarrChecksumListing):
        """Write a checksum listing to the checksum file."""
        storage = self.zarr_archive.storage
        content_file = ContentFile(
            self._serializer.serialize(zarr_checksum).encode('utf-8'))
        # save() will never overwrite an existing file, it simply appends some garbage to ensure
        # uniqueness. _save() is an internal storage API that will overwite existing files.
        storage._save(self.checksum_file_path, content_file)

    def delete_checksum_file(self):
        """Delete the checksum file."""
        storage = self.zarr_archive.storage
        storage.delete(self.checksum_file_path)

    def add_file_checksums(self, checksums: List[ZarrChecksum]):
        """Add a list of file checksums to the listing."""
        if self._checksums is None:
            raise ValueError(
                'This method is only valid when used by a context manager')
        self._checksums.add_file_checksums(checksums)

    def add_directory_checksums(self, checksums: List[ZarrChecksum]):
        """Add a list of directory checksums to the listing."""
        if self._checksums is None:
            raise ValueError(
                'This method is only valid when used by a context manager')
        self._checksums.add_directory_checksums(checksums)

    def remove_checksums(self, paths: List[str]):
        """Remove a list of paths from the listing."""
        if self._checksums is None:
            raise ValueError(
                'This method is only valid when used by a context manager')
        self._checksums.remove_checksums(paths)