def test_images_shared_cas(default_tag_policy, initialized_db): """ A repository, each two tags, pointing to the same image, which has image storage with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the first image, and its storage, but not the file in storage, as it shares its CAS path. """ with assert_gc_integrity(expect_storage_removed=True): repository = create_repository() # Create two image storage records with the same content checksum. content = "hello world" digest = "sha256:" + hashlib.sha256(content).hexdigest() preferred = storage.preferred_locations[0] storage.put_content({preferred}, storage.blob_path(digest), content) is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) is2 = database.ImageStorage.create(content_checksum=digest, uploading=False) location = database.ImageStorageLocation.get(name=preferred) database.ImageStoragePlacement.create(location=location, storage=is1) database.ImageStoragePlacement.create(location=location, storage=is2) # Ensure the CAS path exists. assert storage.exists({preferred}, storage.blob_path(digest)) # Create two images in the repository, and two tags, each pointing to one of the storages. first_image = Image.create( docker_image_id="i1", repository=repository, storage=is1, ancestors="/" ) second_image = Image.create( docker_image_id="i2", repository=repository, storage=is2, ancestors="/" ) store_tag_manifest( repository.namespace_user.username, repository.name, "first", first_image.docker_image_id, ) store_tag_manifest( repository.namespace_user.username, repository.name, "second", second_image.docker_image_id, ) assert_not_deleted(repository, "i1", "i2") # Delete the first tag. delete_tag(repository, "first") assert_deleted(repository, "i1") assert_not_deleted(repository, "i2") # Ensure the CAS path still exists. assert storage.exists({preferred}, storage.blob_path(digest))
def test_image_with_cas(default_tag_policy, initialized_db): """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag should result in the storage and its CAS data being removed. """ with assert_gc_integrity(expect_storage_removed=True): repository = create_repository() # Create an image storage record under CAS. content = b"hello world" digest = "sha256:" + hashlib.sha256(content).hexdigest() preferred = storage.preferred_locations[0] storage.put_content({preferred}, storage.blob_path(digest), content) image_storage = database.ImageStorage.create(content_checksum=digest) location = database.ImageStorageLocation.get(name=preferred) database.ImageStoragePlacement.create(location=location, storage=image_storage) # Temp link so its available. model.blob.store_blob_record_and_temp_link_in_repo( repository, digest, location, len(content), 120 ) # Ensure the CAS path exists. assert storage.exists({preferred}, storage.blob_path(digest)) # Store a manifest pointing to that path. builder = DockerSchema1ManifestBuilder( repository.namespace_user.username, repository.name, "first" ) builder.insert_layer( digest, json.dumps( { "id": "i1", } ), ) # Store the manifest. manifest = builder.build(docker_v2_signing_key) repo_ref = RepositoryReference.for_repo_obj(repository) registry_model.create_manifest_and_retarget_tag( repo_ref, manifest, "first", storage, raise_on_error=True ) # Delete the temp reference. _delete_temp_links(repository) # Delete the tag. delete_tag(repository, "first") assert_deleted(repository, "i1") # Ensure the CAS path is gone. assert not storage.exists({preferred}, storage.blob_path(digest))
def test_purge_repository_storage_blob(default_tag_policy, initialized_db): with populate_storage_for_gc(): expected_blobs_removed_from_storage = set() preferred = storage.preferred_locations[0] # Check that existing uploadedblobs has an object in storage for repo in database.Repository.select().order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): assert storage.exists( {preferred}, storage.blob_path(uploadedblob.blob.content_checksum) ) # Remove eveyrhing for repo in database.Repository.select(): # .order_by(database.Repository.id): for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo): # Check if only this repository is referencing the uploadedblob # If so, the blob should be removed from storage has_depedent_manifestblob = ( ManifestBlob.select() .where( ManifestBlob.blob == uploadedblob.blob, ManifestBlob.repository != repo, ) .count() ) has_dependent_image = ( Image.select() .where( Image.storage == uploadedblob.blob, Image.repository != repo, ) .count() ) has_dependent_uploadedblobs = ( UploadedBlob.select() .where( UploadedBlob == uploadedblob, UploadedBlob.repository != repo, ) .count() ) if ( not has_depedent_manifestblob and not has_dependent_image and not has_dependent_uploadedblobs ): expected_blobs_removed_from_storage.add(uploadedblob.blob) assert model.gc.purge_repository(repo, force=True) for removed_blob_from_storage in expected_blobs_removed_from_storage: assert not storage.exists( {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum) )
def test_images_shared_cas_with_new_blob_table(default_tag_policy, initialized_db): """ A repository with a tag and image that shares its CAS path with a record in the new Blob table. Deleting the first tag should delete the first image, and its storage, but not the file in storage, as it shares its CAS path with the blob row. """ with assert_gc_integrity(expect_storage_removed=True): repository = create_repository() # Create two image storage records with the same content checksum. content = "hello world" digest = "sha256:" + hashlib.sha256(content).hexdigest() preferred = storage.preferred_locations[0] storage.put_content({preferred}, storage.blob_path(digest), content) media_type = database.MediaType.get(name="text/plain") is1 = database.ImageStorage.create(content_checksum=digest, uploading=False) database.ApprBlob.create(digest=digest, size=0, media_type=media_type) location = database.ImageStorageLocation.get(name=preferred) database.ImageStoragePlacement.create(location=location, storage=is1) # Ensure the CAS path exists. assert storage.exists({preferred}, storage.blob_path(digest)) # Create the image in the repository, and the tag. first_image = Image.create(docker_image_id="i1", repository=repository, storage=is1, ancestors="/") store_tag_manifest( repository.namespace_user.username, repository.name, "first", first_image.docker_image_id, ) assert_not_deleted(repository, "i1") # Delete the tag. delete_tag(repository, "first") assert_deleted(repository, "i1") # Ensure the CAS path still exists, as it is referenced by the Blob table assert storage.exists({preferred}, storage.blob_path(digest))
def exportedlogs(file_id): # Only enable this endpoint if local storage is available. has_local_storage = False for storage_type, _ in app.config.get("DISTRIBUTED_STORAGE_CONFIG", {}).values(): if storage_type == "LocalStorage": has_local_storage = True break if not has_local_storage: abort(404) JSON_MIMETYPE = "application/json" exported_logs_storage_path = app.config.get( "EXPORT_ACTION_LOGS_STORAGE_PATH", "exportedactionlogs") export_storage_path = os.path.join(exported_logs_storage_path, file_id) if not storage.exists(storage.preferred_locations, export_storage_path): abort(404) try: return send_file( storage.stream_read_file(storage.preferred_locations, export_storage_path), mimetype=JSON_MIMETYPE, ) except IOError: logger.exception("Could not read exported logs") abort(403)
def test_store_blob_on_first_time_download(self, proxy_manifest_response): proxy_mock = proxy_manifest_response( self.tag, HELLO_WORLD_SCHEMA2_MANIFEST_JSON, DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE) params = { "repository": self.repository, "digest": self.digest, } with patch("data.registry_model.registry_proxy_model.Proxy", MagicMock(return_value=proxy_mock)): with patch("endpoints.v2.blob.model_cache", NoopDataModelCache(TEST_CACHE_CONFIG)): conduct_call( self.client, "v2.download_blob", url_for, "GET", params, expected_code=200, headers=self.headers, ) path = get_layer_path(self.blob) assert path is not None placements = ImageStoragePlacement.filter( ImageStoragePlacement.storage == self.blob) locations = [placements.get().location.name] assert storage.exists( locations, path), f"blob not found in storage at path {path}"
def test_logrotateworker_with_cutoff(logs_model): days = 60 start_timestamp = datetime(2019, 1, 1) # Make sure there are no existing logs found = _lookup_logs(logs_model, start_timestamp - timedelta(days=365), start_timestamp + timedelta(days=365)) assert not found # Create a new set of logs/indices. for day in range(0, days): logs_model.log_action( "push_repo", namespace_name="devtable", repository_name="simple", ip="1.2.3.4", timestamp=start_timestamp + timedelta(days=day), ) # Get all logs logs = _lookup_logs( logs_model, start_timestamp - timedelta(days=days - 1), start_timestamp + timedelta(days=days + 1), ) assert len(logs) == days # Set the cutoff datetime to be the midpoint of the logs midpoint = logs[0:len(logs) // 2] assert midpoint assert len(midpoint) < len(logs) worker = LogRotateWorker() cutoff_date = midpoint[-1].datetime # Archive the indices at or older than the cutoff date archived_files = worker._perform_archiving(cutoff_date) # Ensure the eariler logs were archived found = _lookup_logs(logs_model, start_timestamp, cutoff_date - timedelta(seconds=1)) assert not found # Check that the files were written to storage for archived_file in archived_files: assert storage.exists([SAVE_LOCATION], os.path.join(SAVE_PATH, archived_file)) # If current model uses ES, check that the indices were also deleted if isinstance(logs_model, DocumentLogsModel): assert len(logs_model.list_indices()) == days - (len(logs) // 2) for index in logs_model.list_indices(): dt = datetime.strptime(index[len(INDEX_NAME_PREFIX):], INDEX_DATE_FORMAT) assert dt >= cutoff_date
def test_image_with_cas(default_tag_policy, initialized_db): """ A repository with a tag pointing to an image backed by CAS. Deleting and GCing the tag should result in the storage and its CAS data being removed. """ with assert_gc_integrity(expect_storage_removed=True): repository = create_repository() # Create an image storage record under CAS. content = "hello world" digest = "sha256:" + hashlib.sha256(content).hexdigest() preferred = storage.preferred_locations[0] storage.put_content({preferred}, storage.blob_path(digest), content) image_storage = database.ImageStorage.create(content_checksum=digest, uploading=False) location = database.ImageStorageLocation.get(name=preferred) database.ImageStoragePlacement.create(location=location, storage=image_storage) # Ensure the CAS path exists. assert storage.exists({preferred}, storage.blob_path(digest)) # Create the image and the tag. first_image = Image.create(docker_image_id="i1", repository=repository, storage=image_storage, ancestors="/") store_tag_manifest( repository.namespace_user.username, repository.name, "first", first_image.docker_image_id, ) assert_not_deleted(repository, "i1") # Delete the tag. delete_tag(repository, "first") assert_deleted(repository, "i1") # Ensure the CAS path is gone. assert not storage.exists({preferred}, storage.blob_path(digest))
def populate_storage_for_gc(): """ Populate FakeStorage with dummy data for each ImageStorage row. """ preferred = storage.preferred_locations[0] for storage_row in ImageStorage.select(): content = b"hello world" storage.put_content({preferred}, storage.blob_path(storage_row.content_checksum), content) assert storage.exists({preferred}, storage.blob_path(storage_row.content_checksum)) yield
def process_queue_item(self, job_details): logger.debug('Got chunk cleanup queue item: %s', job_details) storage_location = job_details['location'] storage_path = job_details['path'] if not storage.exists([storage_location], storage_path): logger.debug('Chunk already deleted') return try: storage.remove([storage_location], storage_path) except IOError: raise JobException()
def find_broken_storages(): broken_storages = set() print("Checking storages...") placement_count = ImageStoragePlacement.select().count() placements = (ImageStoragePlacement.select().join(ImageStorage).switch( ImageStoragePlacement).join(ImageStorageLocation)) for placement in tqdm(placements, total=placement_count): path = model.storage.get_layer_path(placement.storage) if not storage_system.exists([placement.location.name], path): broken_storages.add(placement.storage.id) return list(broken_storages)
def test_garbage_collect_storage(default_tag_policy, initialized_db): with populate_storage_for_gc(): preferred = storage.preferred_locations[0] # Get a random sample of storages uploadedblobs = list(UploadedBlob.select()) random_uploadedblobs = random.sample( uploadedblobs, random.randrange(1, len(uploadedblobs) + 1) ) model.storage.garbage_collect_storage([b.blob.id for b in random_uploadedblobs]) # Ensure that the blobs' storage weren't removed, since we didn't GC anything for uploadedblob in random_uploadedblobs: assert storage.exists( {preferred}, storage.blob_path(uploadedblob.blob.content_checksum) )
def verify_placements(): encountered = set() iterator = yield_random_entries( lambda: ImageStorage.select().where(ImageStorage.uploading == False), ImageStorage.id, 1000, ImageStorage.select(fn.Max(ImageStorage.id)).scalar(), 1, ) for storage_row, abt, _ in iterator: if storage_row.id in encountered: continue encountered.add(storage_row.id) logger.info("Checking placements for storage `%s`", storage_row.uuid) try: with_locations = model.storage.get_storage_by_uuid( storage_row.uuid) except model.InvalidImageException: logger.exception("Could not find storage `%s`", storage_row.uuid) continue storage_path = model.storage.get_layer_path(storage_row) locations_to_check = set(with_locations.locations) if locations_to_check: logger.info("Checking locations `%s` for storage `%s`", locations_to_check, storage_row.uuid) for location in locations_to_check: logger.info("Checking location `%s` for storage `%s`", location, storage_row.uuid) if not storage.exists([location], storage_path): location_row = _get_location_row(location) logger.info( "Location `%s` is missing for storage `%s`; removing", location, storage_row.uuid, ) (ImageStoragePlacement.delete().where( ImageStoragePlacement.storage == storage_row, ImageStoragePlacement.location == location_row, ).execute())
def test_logarchiving(app): worker = ArchiveBuildLogsWorker() logs_mock = Mock() logs_mock.get_log_entries = Mock(return_value=(1, [{'some': 'entry'}])) # Add a build that is ready for archiving. build = model.create_build_for_testing() with patch('workers.buildlogsarchiver.buildlogsarchiver.build_logs', logs_mock): worker._archive_redis_buildlogs() # Ensure the get method was called. logs_mock.get_log_entries.assert_called_once() logs_mock.expire_status.assert_called_once() logs_mock.delete_log_entries.assert_called_once() # Ensure the build was marked as archived. assert model.get_build(build.uuid).logs_archived # Ensure a file was written to storage. assert storage.exists(['local_us'], 'logarchive/%s' % build.uuid)
def test_images_shared_cas(default_tag_policy, initialized_db): """ A repository, each two tags, pointing to the same image, which has image storage with the same *CAS path*, but *distinct records*. Deleting the first tag should delete the first image, and its storage, but not the file in storage, as it shares its CAS path. """ with assert_gc_integrity(expect_storage_removed=True): repository = create_repository() # Create two image storage records with the same content checksum. content = b"hello world" digest = "sha256:" + hashlib.sha256(content).hexdigest() preferred = storage.preferred_locations[0] storage.put_content({preferred}, storage.blob_path(digest), content) is1 = database.ImageStorage.create(content_checksum=digest) is2 = database.ImageStorage.create(content_checksum=digest) location = database.ImageStorageLocation.get(name=preferred) database.ImageStoragePlacement.create(location=location, storage=is1) database.ImageStoragePlacement.create(location=location, storage=is2) # Temp link so its available. model.blob.store_blob_record_and_temp_link_in_repo( repository, digest, location, len(content), 120) # Ensure the CAS path exists. assert storage.exists({preferred}, storage.blob_path(digest)) repo_ref = RepositoryReference.for_repo_obj(repository) # Store a manifest pointing to that path as `first`. builder = DockerSchema1ManifestBuilder( repository.namespace_user.username, repository.name, "first") builder.insert_layer( digest, json.dumps({ "id": "i1", }), ) manifest = builder.build(docker_v2_signing_key) registry_model.create_manifest_and_retarget_tag(repo_ref, manifest, "first", storage, raise_on_error=True) tag_ref = registry_model.get_repo_tag(repo_ref, "first") manifest_ref = registry_model.get_manifest_for_tag(tag_ref) registry_model.populate_legacy_images_for_testing( manifest_ref, storage) # Store another as `second`. builder = DockerSchema1ManifestBuilder( repository.namespace_user.username, repository.name, "second") builder.insert_layer( digest, json.dumps({ "id": "i2", }), ) manifest = builder.build(docker_v2_signing_key) created, _ = registry_model.create_manifest_and_retarget_tag( repo_ref, manifest, "second", storage, raise_on_error=True) tag_ref = registry_model.get_repo_tag(repo_ref, "second") manifest_ref = registry_model.get_manifest_for_tag(tag_ref) registry_model.populate_legacy_images_for_testing( manifest_ref, storage) # Manually retarget the second manifest's blob to the second row. try: second_blob = ManifestBlob.get(manifest=created._db_id, blob=is1) second_blob.blob = is2 second_blob.save() except ManifestBlob.DoesNotExist: second_blob = ManifestBlob.get(manifest=created._db_id, blob=is2) second_blob.blob = is1 second_blob.save() # Delete the temp reference. _delete_temp_links(repository) # Ensure the legacy images exist. assert_not_deleted(repository, "i1", "i2") # Delete the first tag. delete_tag(repository, "first") assert_deleted(repository, "i1") assert_not_deleted(repository, "i2") # Ensure the CAS path still exists. assert storage.exists({preferred}, storage.blob_path(digest))