def _check_image_used(legacy_image_id): assert legacy_image_id is not None with db_transaction(): # Check if the image is referenced by a manifest. try: ManifestLegacyImage.select().where(ManifestLegacyImage.image == legacy_image_id).get() return True except ManifestLegacyImage.DoesNotExist: pass # Check if the image is referenced by a tag. try: RepositoryTag.select().where(RepositoryTag.image == legacy_image_id).get() return True except RepositoryTag.DoesNotExist: pass # Check if the image is referenced by another image. try: Image.select().where(Image.parent == legacy_image_id).get() return True except Image.DoesNotExist: pass return False
def find_repository_with_garbage(limit_to_gc_policy_s): expiration_timestamp = get_epoch_timestamp() - limit_to_gc_policy_s try: candidates = ( RepositoryTag.select(RepositoryTag.repository) .join(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where( ~(RepositoryTag.lifetime_end_ts >> None), (RepositoryTag.lifetime_end_ts <= expiration_timestamp), (Namespace.removed_tag_expiration_s == limit_to_gc_policy_s), ) .limit(500) .distinct() .alias("candidates") ) found = ( RepositoryTag.select(candidates.c.repository_id) .from_(candidates) .order_by(db_random_func()) .get() ) if found is None: return return Repository.get(Repository.id == found.repository_id) except RepositoryTag.DoesNotExist: return None except Repository.DoesNotExist: return None
def list_active_repo_tags(repo, start_id=None, limit=None, include_images=True): """ Returns all of the active, non-hidden tags in a repository, joined to they images and (if present), their manifest. """ if include_images: query = _tag_alive( RepositoryTag.select( RepositoryTag, Image, ImageStorage, TagManifest.digest).join(Image).join(ImageStorage).where( RepositoryTag.repository == repo, RepositoryTag.hidden == False).switch(RepositoryTag).join( TagManifest, JOIN.LEFT_OUTER).order_by(RepositoryTag.id)) else: query = _tag_alive( RepositoryTag.select(RepositoryTag).where( RepositoryTag.repository == repo, RepositoryTag.hidden == False).order_by(RepositoryTag.id)) if start_id is not None: query = query.where(RepositoryTag.id >= start_id) if limit is not None: query = query.limit(limit) return query
def create_temporary_hidden_tag(repo, image_obj, expiration_s): """ Create a tag with a defined timeline, that will not appear in the UI or CLI. Returns the name of the temporary tag or None on error. """ now_ts = get_epoch_timestamp() expire_ts = now_ts + expiration_s tag_name = str(uuid4()) # Ensure the repository is not marked for deletion. with db_transaction(): current = Repository.get(id=repo) if current.state == RepositoryState.MARKED_FOR_DELETION: return None RepositoryTag.create( repository=repo, image=image_obj, name=tag_name, lifetime_start_ts=now_ts, lifetime_end_ts=expire_ts, hidden=True, ) return tag_name
def create_temporary_hidden_tag(repo, image_obj, expiration_s): """ Create a tag with a defined timeline, that will not appear in the UI or CLI. Returns the name of the temporary tag. """ now_ts = get_epoch_timestamp() expire_ts = now_ts + expiration_s tag_name = str(uuid4()) RepositoryTag.create( repository=repo, image=image_obj, name=tag_name, lifetime_start_ts=now_ts, lifetime_end_ts=expire_ts, hidden=True, ) return tag_name
def get_matching_tags(docker_image_id, storage_uuid, *args): """ Returns a query pointing to all tags that contain the image with the given docker_image_id and storage_uuid. """ image_row = image.get_image_with_storage(docker_image_id, storage_uuid) if image_row is None: return RepositoryTag.select().where( RepositoryTag.id < 0) # Empty query. ancestors_str = "%s%s/%%" % (image_row.ancestors, image_row.id) return _tag_alive( RepositoryTag.select( *args).distinct().join(Image).join(ImageStorage).where( RepositoryTag.hidden == False).where( (Image.id == image_row.id) | (Image.ancestors**ancestors_str)))
def restore_tag_to_image(repo_obj, tag_name, docker_image_id): """ Restores a tag to a specific image ID. """ with db_transaction(): # Verify that the image ID already existed under this repository under the # tag. try: (RepositoryTag.select().join(Image).where( RepositoryTag.repository == repo_obj).where( RepositoryTag.name == tag_name).where( Image.docker_image_id == docker_image_id).get()) except RepositoryTag.DoesNotExist: raise DataModelException( "Cannot restore to unknown or invalid image") # Lookup the existing image, if any. try: existing_image = get_repo_tag_image(repo_obj, tag_name) except DataModelException: existing_image = None create_or_update_tag_for_repo(repo_obj, tag_name, docker_image_id, reversion=True) return existing_image
def get_active_tag(namespace, repo_name, tag_name): return _tag_alive(RepositoryTag.select().join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( RepositoryTag.name == tag_name, Repository.name == repo_name, Namespace.username == namespace, )).get()
def test_manifest_backfill_broken_tag(clear_rows, initialized_db): """ Tests backfilling a broken tag. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Create a tag with an image referenced missing parent images. repo = model.repository.get_repository("devtable", "gargantuan") broken_image = Image.create( docker_image_id="foo", repository=repo, ancestors="/348723847234/", storage=ImageStorage.get(), ) broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name="broken") # Backfill the tag. assert backfill_tag(broken_image_tag) # Ensure we backfilled, even though we reference a broken manifest. tag_manifest = TagManifest.get(tag=broken_image_tag) map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) manifest = map_row.manifest assert manifest.manifest_bytes == tag_manifest.json_data tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag assert tag.name == "broken" assert tag.manifest == manifest
def get_possibly_expired_tag(namespace, repo_name, tag_name): return (RepositoryTag.select().join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( RepositoryTag.name == tag_name, Repository.name == repo_name, Namespace.username == namespace, )).get()
def set_tag_end_ts(tag, end_ts): """ Sets the end timestamp for a tag. Should only be called by change_tag_expiration or tests. """ end_ms = end_ts * 1000 if end_ts is not None else None with db_transaction(): # Note: We check not just the ID of the tag but also its lifetime_end_ts, to ensure that it has # not changed while we were updating it expiration. result = (RepositoryTag.update(lifetime_end_ts=end_ts).where( RepositoryTag.id == tag.id, RepositoryTag.lifetime_end_ts == tag.lifetime_end_ts).execute()) # Check for a mapping to an OCI tag. try: oci_tag = (Tag.select().join(TagToRepositoryTag).where( TagToRepositoryTag.repository_tag == tag).get()) (Tag.update(lifetime_end_ms=end_ms).where( Tag.id == oci_tag.id, Tag.lifetime_end_ms == oci_tag.lifetime_end_ms).execute()) except Tag.DoesNotExist: pass return (tag.lifetime_end_ts, result > 0)
def get_min_id_for_sec_scan(version): """ Gets the minimum id for a security scanning. """ return _tag_alive( RepositoryTag.select(fn.Min(RepositoryTag.id)).join(Image).where( Image.security_indexed_engine < version)).scalar()
def _purge_pre_oci_tag(tag, context, allow_non_expired=False): assert tag.repository_id == context.repository.id if not allow_non_expired: assert tag.lifetime_end_ts is not None assert tag.lifetime_end_ts <= pre_oci_tag.get_epoch_timestamp() # If it exists, GC the tag manifest. try: tag_manifest = TagManifest.select().where(TagManifest.tag == tag).get() _garbage_collect_legacy_manifest(tag_manifest.id, context) except TagManifest.DoesNotExist: pass # Add the tag's legacy image to be GCed. context.add_legacy_image_id(tag.image_id) with db_transaction(): # Reload the tag and verify its lifetime_end_ts has not changed. try: reloaded_tag = db_for_update(RepositoryTag.select().where(RepositoryTag.id == tag.id)).get() except RepositoryTag.DoesNotExist: return False assert reloaded_tag.id == tag.id assert reloaded_tag.repository_id == context.repository.id if reloaded_tag.lifetime_end_ts != tag.lifetime_end_ts: return False # Delete mapping rows. TagToRepositoryTag.delete().where(TagToRepositoryTag.repository_tag == reloaded_tag).execute() # Delete the tag. reloaded_tag.delete_instance()
def test_get_matching_tag_ids_images_filtered(initialized_db): def filter_query(query): return query.join(Repository).where(Repository.name == "simple") filtered_images = filter_query( Image.select(Image, ImageStorage) .join(RepositoryTag) .switch(Image) .join(ImageStorage) .switch(Image) ) expected_tags_query = _tag_alive(filter_query(RepositoryTag.select())) pairs = [] for image in filtered_images: pairs.append((image.docker_image_id, image.storage.uuid)) matching_tags = get_matching_tags_for_images( pairs, filter_images=filter_query, filter_tags=filter_query ) expected_tag_ids = set([tag.id for tag in expected_tags_query]) matching_tags_ids = set([tag.id for tag in matching_tags]) # Ensure every alive tag was found. assert matching_tags_ids == expected_tag_ids
def _candidates_to_backfill(self): def missing_tmt_query(): return ( self._filter(RepositoryTag.select()) .join(TagToRepositoryTag, JOIN.LEFT_OUTER) .where(TagToRepositoryTag.id >> None, RepositoryTag.hidden == False) ) min_id = self._filter(RepositoryTag.select(fn.Min(RepositoryTag.id))).scalar() max_id = self._filter(RepositoryTag.select(fn.Max(RepositoryTag.id))).scalar() logger.info("Found candidate range %s-%s", min_id, max_id) iterator = yield_random_entries(missing_tmt_query, RepositoryTag.id, 1000, max_id, min_id,) return iterator
def _delete_temp_links(repo): """ Deletes any temp links to blobs. """ for hidden in list( RepositoryTag.select().where(RepositoryTag.hidden == True, RepositoryTag.repository == repo) ): hidden.delete_instance() hidden.image.delete_instance()
def list_repository_tag_history(repo_obj, page=1, size=100, specific_tag=None, active_tags_only=False, since_time=None): # Only available on OCI model if since_time is not None: raise NotImplementedError query = (RepositoryTag.select( RepositoryTag, Image, ImageStorage).join(Image).join(ImageStorage).switch( RepositoryTag).where(RepositoryTag.repository == repo_obj).where( RepositoryTag.hidden == False).order_by( RepositoryTag.lifetime_start_ts.desc(), RepositoryTag.name).limit(size + 1).offset(size * (page - 1))) if active_tags_only: query = _tag_alive(query) if specific_tag: query = query.where(RepositoryTag.name == specific_tag) tags = list(query) if not tags: return [], {}, False manifest_map = get_tag_manifest_digests(tags) return tags[0:size], manifest_map, len(tags) > size
def _get_expected_tags(image): expected_query = ( RepositoryTag.select() .join(Image) .where(RepositoryTag.hidden == False) .where((Image.id == image.id) | (Image.ancestors ** ("%%/%s/%%" % image.id))) ) return set([tag.id for tag in _tag_alive(expected_query)])
def upgrade(tables, tester, progress_reporter): if not app.config.get("SETUP_COMPLETE", False): return start_id = 0 end_id = 1000 size = 1000 max_id = RepositoryTag.select(fn.Max(RepositoryTag.id)).scalar() if max_id is None: return logger.info("Found maximum ID %s" % max_id) while True: if start_id > max_id: break logger.info("Checking tag range %s - %s", start_id, end_id) r = list( RepositoryTag.select() .join(Repository) .switch(RepositoryTag) .join(TagToRepositoryTag, JOIN.LEFT_OUTER) .where(TagToRepositoryTag.id >> None) .where( RepositoryTag.hidden == False, RepositoryTag.id >= start_id, RepositoryTag.id < end_id, ) ) if len(r) < 1000 and size < 100000: size *= 2 start_id = end_id end_id = start_id + size if not len(r): continue logger.info("Found %s tags to backfill", len(r)) for index, t in enumerate(r): logger.info("Backfilling tag %s of %s", index, len(r)) backfill_tag(t)
def purge_repository(repo, force=False): """ Completely delete all traces of the repository. Will return True upon complete success, and False upon partial or total failure. Garbage collection is incremental and repeatable, so this return value does not need to be checked or responded to. """ assert repo.state == RepositoryState.MARKED_FOR_DELETION or force # Delete the repository of all Appr-referenced entries. # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags, # and MySQL is... particular... about such relationships when deleting. if repo.kind.name == "application": ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute() ApprTag.delete().where(ApprTag.repository == repo).execute() else: # GC to remove the images and storage. _purge_repository_contents(repo) # Ensure there are no additional tags, manifests, images or blobs in the repository. assert ApprTag.select().where(ApprTag.repository == repo).count() == 0 assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where( RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where( ManifestBlob.repository == repo).count() == 0 assert Image.select().where(Image.repository == repo).count() == 0 # Delete any repository build triggers, builds, and any other large-ish reference tables for # the repository. _chunk_delete_all(repo, RepositoryPermission, force=force) _chunk_delete_all(repo, RepositoryBuild, force=force) _chunk_delete_all(repo, RepositoryBuildTrigger, force=force) _chunk_delete_all(repo, RepositoryActionCount, force=force) _chunk_delete_all(repo, Star, force=force) _chunk_delete_all(repo, AccessToken, force=force) _chunk_delete_all(repo, RepositoryNotification, force=force) _chunk_delete_all(repo, BlobUpload, force=force) _chunk_delete_all(repo, RepoMirrorConfig, force=force) _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force) # Delete any marker rows for the repository. DeletedRepository.delete().where( DeletedRepository.repository == repo).execute() # Delete the rest of the repository metadata. try: # Make sure the repository still exists. fetched = Repository.get(id=repo.id) except Repository.DoesNotExist: return False fetched.delete_instance(recursive=True, delete_nullable=False, force=force) return True
def _purge_repository_contents(repo): """ Purges all the contents of a repository, removing all of its tags, manifests and images. """ logger.debug('Purging repository %s', repo) # Purge via all the tags. while True: found = False for tags in _chunk_iterate_for_deletion(Tag.select().where(Tag.repository == repo)): logger.debug('Found %s tags to GC under repository %s', len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug('Deleting tag %s under repository %s', tag, repo) assert tag.repository_id == repo.id _purge_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # TODO: remove this once we're fully on the OCI data model. while True: found = False repo_tag_query = RepositoryTag.select().where(RepositoryTag.repository == repo) for tags in _chunk_iterate_for_deletion(repo_tag_query): logger.debug('Found %s tags to GC under repository %s', len(tags), repo) found = True context = _GarbageCollectorContext(repo) for tag in tags: logger.debug('Deleting tag %s under repository %s', tag, repo) assert tag.repository_id == repo.id _purge_pre_oci_tag(tag, context, allow_non_expired=True) _run_garbage_collection(context) if not found: break # Add all remaining images to a new context. We do this here to minimize the number of images # we need to load. while True: found_image = False image_context = _GarbageCollectorContext(repo) for image in Image.select().where(Image.repository == repo): found_image = True logger.debug('Deleting image %s under repository %s', image, repo) assert image.repository_id == repo.id image_context.add_legacy_image_id(image.id) _run_garbage_collection(image_context) if not found_image: break
def get_most_recent_tag(repo_id): """ Returns the most recently pushed alive tag in the repository, or None if none. """ try: return _tag_alive(RepositoryTag.select().where( RepositoryTag.repository == repo_id, RepositoryTag.hidden == False).order_by( RepositoryTag.lifetime_start_ts.desc())).get() except RepositoryTag.DoesNotExist: return None
def lookup_unrecoverable_tags(repo): """ Returns the tags in a repository that are expired and past their time machine recovery period. """ expired_clause = get_epoch_timestamp() - Namespace.removed_tag_expiration_s return (RepositoryTag.select().join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( RepositoryTag.repository == repo).where( ~(RepositoryTag.lifetime_end_ts >> None), RepositoryTag.lifetime_end_ts <= expired_clause, ))
def find_matching_tag(repo_id, tag_names): """ Finds the most recently pushed alive tag in the repository with one of the given names, if any. """ try: return _tag_alive(RepositoryTag.select().where( RepositoryTag.repository == repo_id, RepositoryTag.name << list(tag_names)).order_by( RepositoryTag.lifetime_start_ts.desc())).get() except RepositoryTag.DoesNotExist: return None
def test_get_matching_tag_ids_for_all_images(max_subqueries, max_image_lookup_count, initialized_db): with patch('data.model.tag._MAX_SUB_QUERIES', max_subqueries): with patch('data.model.tag._MAX_IMAGE_LOOKUP_COUNT', max_image_lookup_count): pairs = [] for image in Image.select(Image, ImageStorage).join(ImageStorage): pairs.append((image.docker_image_id, image.storage.uuid)) expected_tags_ids = set([tag.id for tag in _tag_alive(RepositoryTag.select())]) matching_tags_ids = set([tag.id for tag in get_matching_tags_for_images(pairs)]) # Ensure every alive tag was found. assert matching_tags_ids == expected_tags_ids
def get_active_tag_for_repo(repo, tag_name): try: return _tag_alive( RepositoryTag.select( RepositoryTag, Image, ImageStorage).join(Image).join(ImageStorage).where( RepositoryTag.name == tag_name, RepositoryTag.repository == repo, RepositoryTag.hidden == False, )).get() except RepositoryTag.DoesNotExist: return None
def upgrade_progress(): total_tags = RepositoryTag.select().where(RepositoryTag.hidden == False).count() if total_tags == 0: return jsonify({ 'progress': 1.0, 'tags_remaining': 0, 'total_tags': 0, }) upgraded_tags = TagToRepositoryTag.select().count() return jsonify({ 'progress': float(upgraded_tags) / total_tags, 'tags_remaining': total_tags - upgraded_tags, 'total_tags': total_tags, })
def delete_manifest_by_digest(namespace, repo_name, digest): tag_manifests = list( _load_repo_manifests(namespace, repo_name).where(TagManifest.digest == digest)) now_ms = get_epoch_timestamp_ms() for tag_manifest in tag_manifests: try: tag = _tag_alive(RepositoryTag.select().where( RepositoryTag.id == tag_manifest.tag_id)).get() delete_tag(namespace, repo_name, tag_manifest.tag.name, now_ms) except RepositoryTag.DoesNotExist: pass return [tag_manifest.tag for tag_manifest in tag_manifests]
def get_tags_images_eligible_for_scan(clair_version): Parent = Image.alias() ParentImageStorage = ImageStorage.alias() return _tag_alive( RepositoryTag.select(Image, ImageStorage, Parent, ParentImageStorage, RepositoryTag).join( Image, on=(RepositoryTag.image == Image.id)). join(ImageStorage, on=(Image.storage == ImageStorage.id)).switch(Image).join( Parent, JOIN.LEFT_OUTER, on=(Image.parent == Parent.id)).join( ParentImageStorage, JOIN.LEFT_OUTER, on=(ParentImageStorage.id == Parent.storage)).where( RepositoryTag.hidden == False).where( Image.security_indexed_engine < clair_version))
def purge_repository(namespace_name, repository_name): """ Completely delete all traces of the repository. Will return True upon complete success, and False upon partial or total failure. Garbage collection is incremental and repeatable, so this return value does not need to be checked or responded to. """ try: repo = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: return False assert repo.name == repository_name # Delete the repository of all Appr-referenced entries. # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags, # and MySQL is... particular... about such relationships when deleting. if repo.kind.name == 'application': ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute() ApprTag.delete().where(ApprTag.repository == repo).execute() else: # GC to remove the images and storage. _purge_repository_contents(repo) # Ensure there are no additional tags, manifests, images or blobs in the repository. assert ApprTag.select().where(ApprTag.repository == repo).count() == 0 assert Tag.select().where(Tag.repository == repo).count() == 0 assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0 assert Manifest.select().where(Manifest.repository == repo).count() == 0 assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0 assert Image.select().where(Image.repository == repo).count() == 0 # Delete the rest of the repository metadata. try: # Make sure the repository still exists. fetched = _basequery.get_existing_repository(namespace_name, repository_name) except Repository.DoesNotExist: return False fetched.delete_instance(recursive=True, delete_nullable=False) # Run callbacks for callback in config.repo_cleanup_callbacks: callback(namespace_name, repository_name) return True