def placements_to_filtered_paths_set(placements_list): """ Returns the list of paths to remove from storage, filtered from the given placements query by removing any CAS paths that are still referenced by storage(s) in the database. """ if not placements_list: return set() with ensure_under_transaction(): # Find the content checksums not referenced by other storages. Any that are, we cannot # remove. content_checksums = set([ placement.storage.content_checksum for placement in placements_list if placement.storage.cas_path ]) unreferenced_checksums = set() if content_checksums: # Check the current image storage. query = ImageStorage.select( ImageStorage.content_checksum ).where( ImageStorage.content_checksum << list(content_checksums)) is_referenced_checksums = set([ image_storage.content_checksum for image_storage in query ]) if is_referenced_checksums: logger.warning( "GC attempted to remove CAS checksums %s, which are still IS referenced", is_referenced_checksums, ) # Check the ApprBlob table as well. query = ApprBlob.select(ApprBlob.digest).where( ApprBlob.digest << list(content_checksums)) appr_blob_referenced_checksums = set( [blob.digest for blob in query]) if appr_blob_referenced_checksums: logger.warning( "GC attempted to remove CAS checksums %s, which are ApprBlob referenced", appr_blob_referenced_checksums, ) unreferenced_checksums = (content_checksums - appr_blob_referenced_checksums - is_referenced_checksums) # Return all placements for all image storages found not at a CAS path or with a content # checksum that is referenced. return { ( get_image_location_for_id(placement.location_id).name, get_layer_path(placement.storage), placement.storage.content_checksum, ) for placement in placements_list if not placement.storage.cas_path or placement.storage.content_checksum in unreferenced_checksums }
def assert_gc_integrity(expect_storage_removed=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] remove_callback = model.config.register_image_cleanup_callback( removed_image_storages.extend) # Store existing storages. We won't verify these for existence because they # were likely created as test data. existing_digests = set() for storage_row in ImageStorage.select(): if storage_row.cas_path: existing_digests.add(storage_row.content_checksum) for blob_row in ApprBlob.select(): existing_digests.add(blob_row.digest) # Store the number of dangling objects. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() # Yield to the GC test. with check_transitive_modifications(): try: yield finally: remove_callback() # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: assert isinstance(removed_image_and_storage, Image) try: # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row # with the same ID, make sure it does not have the same Docker Image ID. # See: https://www.sqlite.org/autoinc.html found_image = Image.get(id=removed_image_and_storage.id) assert (found_image.docker_image_id != removed_image_and_storage.docker_image_id ), "Found unexpected removed image %s under repo %s" % ( found_image.id, found_image.repository, ) except Image.DoesNotExist: pass # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: shared = (UploadedBlob.select().where( UploadedBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.content_checksum in existing_digests: continue if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): if blob_row.digest in existing_digests: continue storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True): """ Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes the callback for images removed and doesn't invoke the callback for images *not* removed. """ # Add a callback for when images are removed. removed_image_storages = [] model.config.register_image_cleanup_callback(removed_image_storages.extend) # Store the number of dangling storages and labels. existing_storage_count = _get_dangling_storage_count() existing_label_count = _get_dangling_label_count() existing_manifest_count = _get_dangling_manifest_count() yield # Ensure the number of dangling storages, manifests and labels has not changed. updated_storage_count = _get_dangling_storage_count() assert updated_storage_count == existing_storage_count updated_label_count = _get_dangling_label_count() assert updated_label_count == existing_label_count, _get_dangling_labels() updated_manifest_count = _get_dangling_manifest_count() assert updated_manifest_count == existing_manifest_count # Ensure that for each call to the image+storage cleanup callback, the image and its # storage is not found *anywhere* in the database. for removed_image_and_storage in removed_image_storages: with pytest.raises(Image.DoesNotExist): Image.get(id=removed_image_and_storage.id) # Ensure that image storages are only removed if not shared. shared = Image.select().where( Image.storage == removed_image_and_storage.storage_id).count() if shared == 0: shared = (ManifestBlob.select().where( ManifestBlob.blob == removed_image_and_storage.storage_id).count()) if shared == 0: with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(id=removed_image_and_storage.storage_id) with pytest.raises(ImageStorage.DoesNotExist): ImageStorage.get(uuid=removed_image_and_storage.storage.uuid) # Ensure all CAS storage is in the storage engine. preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): if storage_row.cas_path: storage.get_content({preferred}, storage.blob_path( storage_row.content_checksum)) for blob_row in ApprBlob.select(): storage.get_content({preferred}, storage.blob_path(blob_row.digest)) # Ensure there are no danglings OCI tags. if check_oci_tags: oci_tags = {t.id for t in Tag.select()} referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()} assert not oci_tags - referenced_oci_tags # Ensure all tags have valid manifests. for manifest in {t.manifest for t in Tag.select()}: # Ensure that the manifest's blobs all exist. found_blobs = { b.blob.content_checksum for b in ManifestBlob.select().where( ManifestBlob.manifest == manifest) } parsed = parse_manifest_from_bytes( Bytes.for_string_or_unicode(manifest.manifest_bytes), manifest.media_type.name) assert set(parsed.local_blob_digests) == found_blobs