def test_manifest_backfill_broken_tag(clear_rows, initialized_db): """ Tests backfilling a broken tag. """ # Delete existing tag manifest so we can reuse the tag. TagManifestLabel.delete().execute() TagManifest.delete().execute() # Create a tag with an image referenced missing parent images. repo = model.repository.get_repository("devtable", "gargantuan") broken_image = Image.create( docker_image_id="foo", repository=repo, ancestors="/348723847234/", storage=ImageStorage.get(), ) broken_image_tag = RepositoryTag.create(repository=repo, image=broken_image, name="broken") # Backfill the tag. assert backfill_tag(broken_image_tag) # Ensure we backfilled, even though we reference a broken manifest. tag_manifest = TagManifest.get(tag=broken_image_tag) map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) manifest = map_row.manifest assert manifest.manifest_bytes == tag_manifest.json_data tag = TagToRepositoryTag.get(repository_tag=broken_image_tag).tag assert tag.name == "broken" assert tag.manifest == manifest
def store_tag_manifest_for_repo(repository_id, tag_name, manifest, leaf_layer_id, storage_id_map, reversion=False): """ Stores a tag manifest for a specific tag name in the database. Returns the TagManifest object, as well as a boolean indicating whether the TagManifest was created. """ # Create the new-style OCI manifest and its blobs. oci_manifest = _populate_manifest_and_blobs(repository_id, manifest, storage_id_map, leaf_layer_id=leaf_layer_id) # Create the tag for the tag manifest. tag = create_or_update_tag_for_repo(repository_id, tag_name, leaf_layer_id, reversion=reversion, oci_manifest=oci_manifest) # Add a tag manifest pointing to that tag. try: manifest = TagManifest.get(digest=manifest.digest) manifest.tag = tag manifest.save() return manifest, False except TagManifest.DoesNotExist: created = _associate_manifest(tag, oci_manifest) return created, True
def _namespace_from_kwargs(self, args_dict): if "namespace_name" in args_dict: return args_dict["namespace_name"] if "repository_ref" in args_dict: return args_dict["repository_ref"].namespace_name if "tag" in args_dict: return args_dict["tag"].repository.namespace_name if "manifest" in args_dict: manifest = args_dict["manifest"] if manifest._is_tag_manifest: return TagManifest.get( id=manifest._db_id).tag.repository.namespace_user.username else: return Manifest.get( id=manifest._db_id).repository.namespace_user.username if "manifest_or_legacy_image" in args_dict: manifest_or_legacy_image = args_dict["manifest_or_legacy_image"] if isinstance(manifest_or_legacy_image, LegacyImage): return Image.get(id=manifest_or_legacy_image._db_id ).repository.namespace_user.username else: manifest = manifest_or_legacy_image if manifest._is_tag_manifest: return TagManifest.get( id=manifest._db_id ).tag.repository.namespace_user.username else: return Manifest.get( id=manifest._db_id).repository.namespace_user.username if "derived_image" in args_dict: return DerivedStorageForImage.get( id=args_dict["derived_image"]._db_id ).source_image.repository.namespace_user.username if "blob" in args_dict: return "" # Blob functions are shared, so no need to do anything. if "blob_upload" in args_dict: return "" # Blob functions are shared, so no need to do anything. raise Exception("Unknown namespace for dict `%s`" % args_dict)
def _namespace_from_kwargs(self, args_dict): if 'namespace_name' in args_dict: return args_dict['namespace_name'] if 'repository_ref' in args_dict: return args_dict['repository_ref'].namespace_name if 'tag' in args_dict: return args_dict['tag'].repository.namespace_name if 'manifest' in args_dict: manifest = args_dict['manifest'] if manifest._is_tag_manifest: return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username else: return Manifest.get(id=manifest._db_id).repository.namespace_user.username if 'manifest_or_legacy_image' in args_dict: manifest_or_legacy_image = args_dict['manifest_or_legacy_image'] if isinstance(manifest_or_legacy_image, LegacyImage): return Image.get(id=manifest_or_legacy_image._db_id).repository.namespace_user.username else: manifest = manifest_or_legacy_image if manifest._is_tag_manifest: return TagManifest.get(id=manifest._db_id).tag.repository.namespace_user.username else: return Manifest.get(id=manifest._db_id).repository.namespace_user.username if 'derived_image' in args_dict: return (DerivedStorageForImage .get(id=args_dict['derived_image']._db_id) .source_image .repository .namespace_user .username) if 'blob' in args_dict: return '' # Blob functions are shared, so no need to do anything. if 'blob_upload' in args_dict: return '' # Blob functions are shared, so no need to do anything. raise Exception('Unknown namespace for dict `%s`' % args_dict)
def _get_manifest_id(repositorytag): repository_tag_datatype = TagDataType.for_repository_tag(repositorytag) # Retrieve the TagManifest for the RepositoryTag, backfilling if necessary. with db_transaction(): manifest_datatype = None try: manifest_datatype = pre_oci_model.get_manifest_for_tag( repository_tag_datatype, backfill_if_necessary=True) except MalformedSchema1Manifest: logger.exception('Error backfilling manifest for tag `%s`', repositorytag.id) if manifest_datatype is None: logger.error('Could not load or backfill manifest for tag `%s`', repositorytag.id) # Create a broken manifest for the tag. tag_manifest = TagManifest.create(tag=repositorytag, digest='BROKEN-%s' % repositorytag.id, json_data='{}') else: # Retrieve the new-style Manifest for the TagManifest, if any. try: tag_manifest = TagManifest.get(id=manifest_datatype._db_id) except TagManifest.DoesNotExist: logger.exception('Could not find tag manifest') return None try: found = TagManifestToManifest.get(tag_manifest=tag_manifest).manifest # Verify that the new-style manifest has the same contents as the old-style manifest. # If not, update and then return. This is an extra check put in place to ensure unicode # manifests have been correctly copied. if found.manifest_bytes != tag_manifest.json_data: logger.warning('Fixing manifest `%s`', found.id) found.manifest_bytes = tag_manifest.json_data found.save() return found.id except TagManifestToManifest.DoesNotExist: # Could not find the new style manifest, so backfill. _backfill_manifest(tag_manifest) # Try to retrieve the manifest again, since we've performed a backfill. try: return TagManifestToManifest.get(tag_manifest=tag_manifest).manifest_id except TagManifestToManifest.DoesNotExist: return None
def verify_backfill(namespace_name): logger.info('Checking namespace %s', namespace_name) namespace_user = model.user.get_namespace_user(namespace_name) assert namespace_user repo_tags = (RepositoryTag .select() .join(Repository) .where(Repository.namespace_user == namespace_user) .where(RepositoryTag.hidden == False)) repo_tags = list(repo_tags) logger.info('Found %s tags', len(repo_tags)) for index, repo_tag in enumerate(repo_tags): logger.info('Checking tag %s under repository %s (%s/%s)', repo_tag.name, repo_tag.repository.name, index + 1, len(repo_tags)) tag = TagToRepositoryTag.get(repository_tag=repo_tag).tag assert not tag.hidden assert tag.repository == repo_tag.repository assert tag.name == repo_tag.name, _vs(tag.name, repo_tag.name) assert tag.repository == repo_tag.repository, _vs(tag.repository_id, repo_tag.repository_id) assert tag.reversion == repo_tag.reversion, _vs(tag.reversion, repo_tag.reversion) start_check = int(tag.lifetime_start_ms / 1000) == repo_tag.lifetime_start_ts assert start_check, _vs(tag.lifetime_start_ms, repo_tag.lifetime_start_ts) if repo_tag.lifetime_end_ts is not None: end_check = int(tag.lifetime_end_ms / 1000) == repo_tag.lifetime_end_ts assert end_check, _vs(tag.lifetime_end_ms, repo_tag.lifetime_end_ts) else: assert tag.lifetime_end_ms is None try: tag_manifest = tag.manifest repo_tag_manifest = TagManifest.get(tag=repo_tag) digest_check = tag_manifest.digest == repo_tag_manifest.digest assert digest_check, _vs(tag_manifest.digest, repo_tag_manifest.digest) bytes_check = tag_manifest.manifest_bytes == repo_tag_manifest.json_data assert bytes_check, _vs(tag_manifest.manifest_bytes, repo_tag_manifest.json_data) except TagManifest.DoesNotExist: logger.info('No tag manifest found for repository tag %s', repo_tag.id) mli = ManifestLegacyImage.get(manifest=tag_manifest) assert mli.repository == repo_tag.repository manifest_legacy_image = mli.image assert manifest_legacy_image == repo_tag.image, _vs(manifest_legacy_image.id, repo_tag.image_id)
def compute_layer_id(layer): """ Returns the ID for the layer in the security scanner. """ # NOTE: this is temporary until we switch to Clair V3. if isinstance(layer, ManifestDataType): if layer._is_tag_manifest: layer = TagManifest.get(id=layer._db_id).tag.image else: manifest = Manifest.get(id=layer._db_id) try: layer = ManifestLegacyImage.get(manifest=manifest).image except ManifestLegacyImage.DoesNotExist: return None elif isinstance(layer, LegacyImage): layer = Image.get(id=layer._db_id) assert layer.docker_image_id assert layer.storage.uuid return '%s.%s' % (layer.docker_image_id, layer.storage.uuid)
def test_retarget_tag(initialized_db): repo = get_repository("devtable", "history") results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 2 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None # Revert back to the original manifest. created = retarget_tag("latest", results[0].manifest, is_reversion=True, now_ms=results[1].lifetime_end_ms + 10000) assert created.lifetime_end_ms is None assert created.reversion assert created.name == "latest" assert created.manifest == results[0].manifest # Verify in the history. results, _ = list_repository_tag_history(repo, 1, 100, specific_tag_name="latest") assert len(results) == 3 assert results[0].lifetime_end_ms is None assert results[1].lifetime_end_ms is not None assert results[2].lifetime_end_ms is not None assert results[0] == created # Verify old-style tables. repository_tag = TagToRepositoryTag.get(tag=created).repository_tag assert repository_tag.lifetime_start_ts == int(created.lifetime_start_ms / 1000) tag_manifest = TagManifest.get(tag=repository_tag) assert TagManifestToManifest.get( tag_manifest=tag_manifest).manifest == created.manifest
def get_tag_manifest(tag): try: return TagManifest.get(tag=tag) except TagManifest.DoesNotExist: return None
def test_tagbackfillworker(clear_all_rows, initialized_db): # Remove the new-style rows so we can backfill. TagToRepositoryTag.delete().execute() Tag.delete().execute() if clear_all_rows: TagManifestLabelMap.delete().execute() ManifestLabel.delete().execute() ManifestBlob.delete().execute() ManifestLegacyImage.delete().execute() TagManifestToManifest.delete().execute() Manifest.delete().execute() found_dead_tag = False for repository_tag in list(RepositoryTag.select()): # Backfill the tag. assert backfill_tag(repository_tag) # Ensure if we try again, the backfill is skipped. assert not backfill_tag(repository_tag) # Ensure that we now have the expected tag rows. tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag) tag = tag_to_repo_tag.tag assert tag.name == repository_tag.name assert tag.repository == repository_tag.repository assert not tag.hidden assert tag.reversion == repository_tag.reversion if repository_tag.lifetime_start_ts is None: assert tag.lifetime_start_ms is None else: assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts * 1000) if repository_tag.lifetime_end_ts is None: assert tag.lifetime_end_ms is None else: assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts * 1000) found_dead_tag = True assert tag.manifest # Ensure that we now have the expected manifest rows. try: tag_manifest = TagManifest.get(tag=repository_tag) except TagManifest.DoesNotExist: continue map_row = TagManifestToManifest.get(tag_manifest=tag_manifest) assert not map_row.broken manifest_row = map_row.manifest assert manifest_row.manifest_bytes == tag_manifest.json_data assert manifest_row.digest == tag_manifest.digest assert manifest_row.repository == tag_manifest.tag.repository assert tag.manifest == map_row.manifest legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image assert tag_manifest.tag.image == legacy_image expected_storages = {tag_manifest.tag.image.storage.id} for parent_image_id in tag_manifest.tag.image.ancestor_id_list(): expected_storages.add(Image.get(id=parent_image_id).storage_id) found_storages = { manifest_blob.blob_id for manifest_blob in ManifestBlob.select().where( ManifestBlob.manifest == manifest_row) } assert expected_storages == found_storages # Ensure the labels were copied over. tmls = list(TagManifestLabel.select().where( TagManifestLabel.annotated == tag_manifest)) expected_labels = {tml.label_id for tml in tmls} found_labels = { m.label_id for m in ManifestLabel.select().where( ManifestLabel.manifest == manifest_row) } assert found_labels == expected_labels # Verify at the repository level. for repository in list(Repository.select()): tags = RepositoryTag.select().where( RepositoryTag.repository == repository, RepositoryTag.hidden == False) oci_tags = Tag.select().where(Tag.repository == repository) assert len(tags) == len(oci_tags) assert {t.name for t in tags} == {t.name for t in oci_tags} for tag in tags: tag_manifest = TagManifest.get(tag=tag) ttr = TagToRepositoryTag.get(repository_tag=tag) manifest = ttr.tag.manifest assert tag_manifest.json_data == manifest.manifest_bytes assert tag_manifest.digest == manifest.digest assert tag.image == ManifestLegacyImage.get( manifest=manifest).image assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000) if tag.lifetime_end_ts: assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000) else: assert ttr.tag.lifetime_end_ms is None assert found_dead_tag
def _backfill_manifest(tag_manifest): logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id) # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. if lookup_manifest_map_row(tag_manifest): return False # Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it # without additional rows or data, as it will eventually not be useful. is_broken = False try: manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode( tag_manifest.json_data), validate=False) except ManifestException: logger.exception('Exception when trying to parse manifest %s', tag_manifest.id) manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data) is_broken = True # Lookup the storages for the digests. root_image = tag_manifest.tag.image repository = tag_manifest.tag.repository image_storage_id_map = { root_image.storage.content_checksum: root_image.storage.id } try: parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) except DataModelException: logger.exception( 'Exception when trying to load parent images for manifest `%s`', tag_manifest.id) parent_images = {} is_broken = True for parent_image in parent_images: image_storage_id_map[ parent_image.storage.content_checksum] = parent_image.storage.id # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo # and add its storage ID. If the blob is not found, we mark the manifest as broken. storage_ids = set() try: for blob_digest in manifest.get_blob_digests_for_translation(): if blob_digest in image_storage_id_map: storage_ids.add(image_storage_id_map[blob_digest]) else: logger.debug( 'Blob `%s` not found in images for manifest `%s`; checking repo', blob_digest, tag_manifest.id) try: blob_storage = get_repo_blob_by_digest( repository.namespace_user.username, repository.name, blob_digest) storage_ids.add(blob_storage.id) except BlobDoesNotExist: logger.debug( 'Blob `%s` not found in repo for manifest `%s`', blob_digest, tag_manifest.id) is_broken = True except MalformedSchema1Manifest: logger.warning( 'Found malformed schema 1 manifest during blob backfill') is_broken = True with db_transaction(): # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. try: tag_manifest = TagManifest.get(id=tag_manifest.id) except TagManifest.DoesNotExist: return True # Ensure it wasn't already created. if lookup_manifest_map_row(tag_manifest): return False # Check for a pre-existing manifest matching the digest in the repository. This can happen # if we've already created the manifest row (typically for tag reverision). try: manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository) except Manifest.DoesNotExist: # Create the new-style rows for the manifest. try: manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, tag_manifest.tag.image, storage_ids) except IntegrityError: # Pre-empted. return False # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. try: TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row, broken=is_broken) except IntegrityError: return False # Backfill any labels on the manifest. _backfill_labels(tag_manifest, manifest_row, repository) return True