示例#1
0
文件: tag.py 项目: zhill/quay
def populate_manifest(repository, manifest, legacy_image, storage_ids):
    """ Populates the rows for the manifest, including its blobs and legacy image. """
    media_type = Manifest.media_type.get_id(manifest.media_type)

    # Check for an existing manifest. If present, return it.
    try:
        return Manifest.get(repository=repository, digest=manifest.digest)
    except Manifest.DoesNotExist:
        pass

    with db_transaction():
        try:
            manifest_row = Manifest.create(
                digest=manifest.digest,
                repository=repository,
                manifest_bytes=manifest.bytes.as_encoded_str(),
                media_type=media_type,
            )
        except IntegrityError as ie:
            logger.debug(
                "Got integrity error when trying to write manifest: %s", ie)
            return Manifest.get(repository=repository, digest=manifest.digest)

        ManifestLegacyImage.create(manifest=manifest_row,
                                   repository=repository,
                                   image=legacy_image)

        blobs_to_insert = [
            dict(manifest=manifest_row, repository=repository, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        return manifest_row
示例#2
0
 def delete_manifests():
     ManifestLegacyImage.delete().execute()
     ManifestBlob.delete().execute()
     Manifest.delete().execute()
     TagManifestToManifest.delete().execute()
     TagManifest.delete().execute()
     return "OK"
示例#3
0
def _is_storage_orphaned(candidate_id):
    """
    Returns the whether the given candidate storage ID is orphaned. Must be executed
    under a transaction.
    """
    with ensure_under_transaction():
        try:
            ManifestBlob.get(blob=candidate_id)
            return False
        except ManifestBlob.DoesNotExist:
            pass

        try:
            Image.get(storage=candidate_id)
            return False
        except Image.DoesNotExist:
            pass

        try:
            UploadedBlob.get(blob=candidate_id)
            return False
        except UploadedBlob.DoesNotExist:
            pass

    return True
示例#4
0
def connect_blobs(manifest: ManifestInterface, blob_ids: set[int],
                  repository_id: int):
    manifest_blobs = [
        dict(manifest=manifest, repository=repository_id, blob=blob_id)
        for blob_id in blob_ids
    ]
    try:
        ManifestBlob.insert_many(manifest_blobs).execute()
    except IntegrityError as e:
        raise _ManifestAlreadyExists(e)
示例#5
0
def clear_rows(initialized_db):
    # Remove all new-style rows so we can backfill.
    TagToRepositoryTag.delete().execute()
    Tag.delete().execute()
    TagManifestLabelMap.delete().execute()
    ManifestLabel.delete().execute()
    ManifestBlob.delete().execute()
    ManifestLegacyImage.delete().execute()
    TagManifestToManifest.delete().execute()
    Manifest.delete().execute()
示例#6
0
def test_perform_indexing_invalid_manifest(initialized_db, set_secscan_config):
    secscan = V4SecurityScanner(app, instance_keys, storage)
    secscan._secscan_api = mock.Mock()

    # Delete all ManifestBlob rows to cause the manifests to be invalid.
    ManifestBlob.delete().execute()

    secscan.perform_indexing()

    assert ManifestSecurityStatus.select().count() == Manifest.select().count()
    for mss in ManifestSecurityStatus.select():
        assert mss.index_status == IndexStatus.MANIFEST_UNSUPPORTED
示例#7
0
def test_store_tag_manifest(get_storages, initialized_db):
  # Create a manifest with some layers.
  builder = DockerSchema1ManifestBuilder('devtable', 'simple', 'sometag')

  storages = get_storages()
  assert storages

  repo = model.repository.get_repository('devtable', 'simple')
  storage_id_map = {}
  for index, storage in enumerate(storages):
    image_id = 'someimage%s' % index
    builder.add_layer(storage.content_checksum, json.dumps({'id': image_id}))
    find_create_or_link_image(image_id, repo, 'devtable', {}, 'local_us')
    storage_id_map[storage.content_checksum] = storage.id

  manifest = builder.build(docker_v2_signing_key)
  tag_manifest, _ = store_tag_manifest_for_testing('devtable', 'simple', 'sometag', manifest,
                                                   manifest.leaf_layer_v1_image_id, storage_id_map)

  # Ensure we have the new-model expected rows.
  mapping_row = TagManifestToManifest.get(tag_manifest=tag_manifest)

  assert mapping_row.manifest is not None
  assert mapping_row.manifest.manifest_bytes == manifest.bytes.as_encoded_str()
  assert mapping_row.manifest.digest == str(manifest.digest)

  blob_rows = {m.blob_id for m in
               ManifestBlob.select().where(ManifestBlob.manifest == mapping_row.manifest)}
  assert blob_rows == {s.id for s in storages}

  assert ManifestLegacyImage.get(manifest=mapping_row.manifest).image == tag_manifest.tag.image
示例#8
0
def test_manifestbackfillworker_mislinked_invalid_manifest(clear_rows, initialized_db):
    """ Tests that a manifest whose image is mislinked will attempt to have its storages relinked
      properly. """
    # Delete existing tag manifest so we can reuse the tag.
    TagManifestLabel.delete().execute()
    TagManifest.delete().execute()

    repo = model.repository.get_repository("devtable", "complex")
    tag_v50 = model.tag.get_active_tag("devtable", "gargantuan", "v5.0")

    # Add a mislinked manifest, by having its layer point to an invalid blob but its image
    # be the v5.0 image.
    builder = DockerSchema1ManifestBuilder("devtable", "gargantuan", "sometag")
    builder.add_layer("sha256:deadbeef", '{"id": "foo"}')
    manifest = builder.build(docker_v2_signing_key)

    broken_manifest = TagManifest.create(
        json_data=manifest.bytes.as_encoded_str(), digest=manifest.digest, tag=tag_v50
    )

    # Backfill the manifest and ensure it is marked as broken.
    assert _backfill_manifest(broken_manifest)

    map_row = TagManifestToManifest.get(tag_manifest=broken_manifest)
    assert map_row.broken

    manifest_row = map_row.manifest
    legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
    assert legacy_image == tag_v50.image

    manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
    assert len(manifest_blobs) == 0
 def test_create_placeholder_blobs_for_new_manifest(self, create_repo):
     repo_ref = create_repo(self.orgname, self.upstream_repository,
                            self.user)
     input_manifest = parse_manifest_from_bytes(
         Bytes.for_string_or_unicode(UBI8_8_4_MANIFEST_SCHEMA2),
         DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE,
     )
     proxy_model = ProxyModel(
         self.orgname,
         self.upstream_repository,
         self.user,
     )
     manifest, _ = proxy_model._create_manifest_and_retarget_tag(
         repo_ref, input_manifest, self.tag)
     assert manifest is not None
     blob_count = 1  # schema 2 manifests have one extra config blob
     blob_count += len(input_manifest.manifest_dict["layers"])
     mblobs = ManifestBlob.select().where(
         ManifestBlob.manifest == manifest.id)
     assert blob_count == mblobs.count()
     expected_digests = [
         layer["digest"] for layer in input_manifest.manifest_dict["layers"]
     ]
     expected_digests.append(input_manifest.config.digest)
     created_digests = [mblob.blob.content_checksum for mblob in mblobs]
     assert sorted(expected_digests) == sorted(created_digests)
示例#10
0
def backfill_replication():
    encountered = set()

    query = (ManifestBlob.select(ManifestBlob, Repository, User).join(
        ImageStorage).switch(ManifestBlob).join(Repository).join(User))

    for manifest in query:
        if manifest.blob.uuid in encountered:
            continue

        namespace = manifest.repository.namespace_user
        locations = model.user.get_region_locations(namespace)
        locations_required = locations | set(storage.default_locations)

        query = (ImageStoragePlacement.select(
            ImageStoragePlacement, ImageStorageLocation).where(
                ImageStoragePlacement.storage == manifest.blob).join(
                    ImageStorageLocation))

        existing_locations = set([p.location.name for p in query])
        locations_missing = locations_required - existing_locations
        if locations_missing:
            print("Enqueueing manifest blob %s to be replicated" %
                  (manifest.blob.uuid))
            encountered.add(manifest.blob.uuid)

            if not image_replication_queue.alive([manifest.blob.uuid]):
                queue_storage_replication(
                    manifest.repository.namespace_user.username, manifest.blob)
示例#11
0
def test_manifestbackfillworker_mislinked_manifest(clear_rows, initialized_db):
  """ Tests that a manifest whose image is mislinked will have its storages relinked properly. """
  # Delete existing tag manifest so we can reuse the tag.
  TagManifestLabel.delete().execute()
  TagManifest.delete().execute()

  repo = model.repository.get_repository('devtable', 'complex')
  tag_v30 = model.tag.get_active_tag('devtable', 'gargantuan', 'v3.0')
  tag_v50 = model.tag.get_active_tag('devtable', 'gargantuan', 'v5.0')

  # Add a mislinked manifest, by having its layer point to a blob in v3.0 but its image
  # be the v5.0 image.
  builder = DockerSchema1ManifestBuilder('devtable', 'gargantuan', 'sometag')
  builder.add_layer(tag_v30.image.storage.content_checksum, '{"id": "foo"}')
  manifest = builder.build(docker_v2_signing_key)

  mislinked_manifest = TagManifest.create(json_data=manifest.bytes.as_encoded_str(),
                                          digest=manifest.digest,
                                          tag=tag_v50)

  # Backfill the manifest and ensure its proper content checksum was linked.
  assert _backfill_manifest(mislinked_manifest)

  map_row = TagManifestToManifest.get(tag_manifest=mislinked_manifest)
  assert not map_row.broken

  manifest_row = map_row.manifest
  legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
  assert legacy_image == tag_v50.image

  manifest_blobs = list(ManifestBlob.select().where(ManifestBlob.manifest == manifest_row))
  assert len(manifest_blobs) == 1
  assert manifest_blobs[0].blob.content_checksum == tag_v30.image.storage.content_checksum
示例#12
0
def _get_dangling_storage_count():
    storage_ids = set([current.id for current in ImageStorage.select()])
    referenced_by_image = set([image.storage_id for image in Image.select()])
    referenced_by_manifest = set([blob.blob_id for blob in ManifestBlob.select()])
    referenced_by_derived = set(
        [derived.derivative_id for derived in DerivedStorageForImage.select()]
    )
    return len(storage_ids - referenced_by_image - referenced_by_derived - referenced_by_manifest)
示例#13
0
 def _create_blob(self, digest: str, size: int, manifest_id: int,
                  repo_id: int):
     try:
         blob = ImageStorage.get(content_checksum=digest)
     except ImageStorage.DoesNotExist:
         # TODO: which size should we really be setting here?
         blob = ImageStorage.create(content_checksum=digest,
                                    image_size=size,
                                    compressed_size=size)
     try:
         ManifestBlob.get(manifest_id=manifest_id,
                          blob=blob,
                          repository_id=repo_id)
     except ManifestBlob.DoesNotExist:
         ManifestBlob.create(manifest_id=manifest_id,
                             blob=blob,
                             repository_id=repo_id)
     return blob
示例#14
0
文件: gc.py 项目: Mulecharda/quay
def purge_repository(repo, force=False):
    """
    Completely delete all traces of the repository.

    Will return True upon complete success, and False upon partial or total failure. Garbage
    collection is incremental and repeatable, so this return value does not need to be checked or
    responded to.
    """
    assert repo.state == RepositoryState.MARKED_FOR_DELETION or force

    # Delete the repository of all Appr-referenced entries.
    # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
    # and MySQL is... particular... about such relationships when deleting.
    if repo.kind.name == "application":
        ApprTag.delete().where(ApprTag.repository == repo,
                               ~(ApprTag.linked_tag >> None)).execute()
        ApprTag.delete().where(ApprTag.repository == repo).execute()
    else:
        # GC to remove the images and storage.
        _purge_repository_contents(repo)

    # Ensure there are no additional tags, manifests, images or blobs in the repository.
    assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert Image.select().where(Image.repository == repo).count() == 0

    # Delete any repository build triggers, builds, and any other large-ish reference tables for
    # the repository.
    _chunk_delete_all(repo, RepositoryPermission, force=force)
    _chunk_delete_all(repo, RepositoryBuild, force=force)
    _chunk_delete_all(repo, RepositoryBuildTrigger, force=force)
    _chunk_delete_all(repo, RepositoryActionCount, force=force)
    _chunk_delete_all(repo, Star, force=force)
    _chunk_delete_all(repo, AccessToken, force=force)
    _chunk_delete_all(repo, RepositoryNotification, force=force)
    _chunk_delete_all(repo, BlobUpload, force=force)
    _chunk_delete_all(repo, RepoMirrorConfig, force=force)
    _chunk_delete_all(repo, RepositoryAuthorizedEmail, force=force)

    # Delete any marker rows for the repository.
    DeletedRepository.delete().where(
        DeletedRepository.repository == repo).execute()

    # Delete the rest of the repository metadata.
    try:
        # Make sure the repository still exists.
        fetched = Repository.get(id=repo.id)
    except Repository.DoesNotExist:
        return False

    fetched.delete_instance(recursive=True, delete_nullable=False, force=force)
    return True
示例#15
0
def test_purge_repository_storage_blob(default_tag_policy, initialized_db):
    with populate_storage_for_gc():
        expected_blobs_removed_from_storage = set()
        preferred = storage.preferred_locations[0]

        # Check that existing uploadedblobs has an object in storage
        for repo in database.Repository.select().order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                assert storage.exists(
                    {preferred}, storage.blob_path(uploadedblob.blob.content_checksum)
                )

        # Remove eveyrhing
        for repo in database.Repository.select():  # .order_by(database.Repository.id):
            for uploadedblob in UploadedBlob.select().where(UploadedBlob.repository == repo):
                # Check if only this repository is referencing the uploadedblob
                # If so, the blob should be removed from storage
                has_depedent_manifestblob = (
                    ManifestBlob.select()
                    .where(
                        ManifestBlob.blob == uploadedblob.blob,
                        ManifestBlob.repository != repo,
                    )
                    .count()
                )
                has_dependent_image = (
                    Image.select()
                    .where(
                        Image.storage == uploadedblob.blob,
                        Image.repository != repo,
                    )
                    .count()
                )
                has_dependent_uploadedblobs = (
                    UploadedBlob.select()
                    .where(
                        UploadedBlob == uploadedblob,
                        UploadedBlob.repository != repo,
                    )
                    .count()
                )

                if (
                    not has_depedent_manifestblob
                    and not has_dependent_image
                    and not has_dependent_uploadedblobs
                ):
                    expected_blobs_removed_from_storage.add(uploadedblob.blob)

            assert model.gc.purge_repository(repo, force=True)

        for removed_blob_from_storage in expected_blobs_removed_from_storage:
            assert not storage.exists(
                {preferred}, storage.blob_path(removed_blob_from_storage.content_checksum)
            )
示例#16
0
def purge_repository(namespace_name, repository_name):
  """ Completely delete all traces of the repository. Will return True upon
      complete success, and False upon partial or total failure. Garbage
      collection is incremental and repeatable, so this return value does
      not need to be checked or responded to.
      """
  try:
    repo = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    return False

  assert repo.name == repository_name

  # Delete the repository of all Appr-referenced entries.
  # Note that new-model Tag's must be deleted in *two* passes, as they can reference parent tags,
  # and MySQL is... particular... about such relationships when deleting.
  if repo.kind.name == 'application':
    ApprTag.delete().where(ApprTag.repository == repo, ~(ApprTag.linked_tag >> None)).execute()
    ApprTag.delete().where(ApprTag.repository == repo).execute()
  else:
    # GC to remove the images and storage.
    _purge_repository_contents(repo)

  # Ensure there are no additional tags, manifests, images or blobs in the repository.
  assert ApprTag.select().where(ApprTag.repository == repo).count() == 0
  assert Tag.select().where(Tag.repository == repo).count() == 0
  assert RepositoryTag.select().where(RepositoryTag.repository == repo).count() == 0
  assert Manifest.select().where(Manifest.repository == repo).count() == 0
  assert ManifestBlob.select().where(ManifestBlob.repository == repo).count() == 0
  assert Image.select().where(Image.repository == repo).count() == 0

  # Delete the rest of the repository metadata.
  try:
    # Make sure the repository still exists.
    fetched = _basequery.get_existing_repository(namespace_name, repository_name)
  except Repository.DoesNotExist:
    return False

  fetched.delete_instance(recursive=True, delete_nullable=False)

  # Run callbacks
  for callback in config.repo_cleanup_callbacks:
    callback(namespace_name, repository_name)

  return True
示例#17
0
def test_get_or_create_manifest(schema_version, initialized_db):
    repository = create_repository('devtable', 'newrepo', None)

    expected_labels = {
        'Foo': 'Bar',
        'Baz': 'Meh',
    }

    layer_json = json.dumps({
        'id':
        'somelegacyid',
        'config': {
            'Labels': expected_labels,
        },
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Create a legacy image.
    find_create_or_link_image('somelegacyid', repository, 'devtable', {},
                              'local_us')

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = 'hello world'
    _, random_digest = _populate_blob(random_data)

    # Build the manifest.
    if schema_version == 1:
        builder = DockerSchema1ManifestBuilder('devtable', 'simple',
                                               'anothertag')
        builder.add_layer(random_digest, layer_json)
        sample_manifest_instance = builder.build(docker_v2_signing_key)
    elif schema_version == 2:
        builder = DockerSchema2ManifestBuilder()
        builder.set_config_digest(config_digest, len(layer_json))
        builder.add_layer(random_digest, len(random_data))
        sample_manifest_instance = builder.build()

    # Create a new manifest.
    created_manifest = get_or_create_manifest(repository,
                                              sample_manifest_instance,
                                              storage)
    created = created_manifest.manifest
    newly_created = created_manifest.newly_created

    assert newly_created
    assert created is not None
    assert created.media_type.name == sample_manifest_instance.media_type
    assert created.digest == sample_manifest_instance.digest
    assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str(
    )
    assert created_manifest.labels_to_apply == expected_labels

    # Verify it has a temporary tag pointing to it.
    assert Tag.get(manifest=created, hidden=True).lifetime_end_ms

    # Verify the legacy image.
    legacy_image = get_legacy_image_for_manifest(created)
    assert legacy_image is not None
    assert legacy_image.storage.content_checksum == random_digest

    # Verify the linked blobs.
    blob_digests = [
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(ManifestBlob.manifest == created)
    ]

    assert random_digest in blob_digests
    if schema_version == 2:
        assert config_digest in blob_digests

    # Retrieve it again and ensure it is the same manifest.
    created_manifest2 = get_or_create_manifest(repository,
                                               sample_manifest_instance,
                                               storage)
    created2 = created_manifest2.manifest
    newly_created2 = created_manifest2.newly_created

    assert not newly_created2
    assert created2 == created

    # Ensure it again has a temporary tag.
    assert Tag.get(manifest=created2, hidden=True).lifetime_end_ms

    # Ensure the labels were added.
    labels = list(list_manifest_labels(created))
    assert len(labels) == 2

    labels_dict = {label.key: label.value for label in labels}
    assert labels_dict == expected_labels
示例#18
0
def test_images_shared_cas(default_tag_policy, initialized_db):
    """
    A repository, each two tags, pointing to the same image, which has image storage with the same
    *CAS path*, but *distinct records*.

    Deleting the first tag should delete the first image, and its storage, but not the file in
    storage, as it shares its CAS path.
    """
    with assert_gc_integrity(expect_storage_removed=True):
        repository = create_repository()

        # Create two image storage records with the same content checksum.
        content = b"hello world"
        digest = "sha256:" + hashlib.sha256(content).hexdigest()
        preferred = storage.preferred_locations[0]
        storage.put_content({preferred}, storage.blob_path(digest), content)

        is1 = database.ImageStorage.create(content_checksum=digest)
        is2 = database.ImageStorage.create(content_checksum=digest)

        location = database.ImageStorageLocation.get(name=preferred)

        database.ImageStoragePlacement.create(location=location, storage=is1)
        database.ImageStoragePlacement.create(location=location, storage=is2)

        # Temp link so its available.
        model.blob.store_blob_record_and_temp_link_in_repo(
            repository, digest, location, len(content), 120)

        # Ensure the CAS path exists.
        assert storage.exists({preferred}, storage.blob_path(digest))

        repo_ref = RepositoryReference.for_repo_obj(repository)

        # Store a manifest pointing to that path as `first`.
        builder = DockerSchema1ManifestBuilder(
            repository.namespace_user.username, repository.name, "first")
        builder.insert_layer(
            digest,
            json.dumps({
                "id": "i1",
            }),
        )
        manifest = builder.build(docker_v2_signing_key)
        registry_model.create_manifest_and_retarget_tag(repo_ref,
                                                        manifest,
                                                        "first",
                                                        storage,
                                                        raise_on_error=True)

        tag_ref = registry_model.get_repo_tag(repo_ref, "first")
        manifest_ref = registry_model.get_manifest_for_tag(tag_ref)
        registry_model.populate_legacy_images_for_testing(
            manifest_ref, storage)

        # Store another as `second`.
        builder = DockerSchema1ManifestBuilder(
            repository.namespace_user.username, repository.name, "second")
        builder.insert_layer(
            digest,
            json.dumps({
                "id": "i2",
            }),
        )
        manifest = builder.build(docker_v2_signing_key)
        created, _ = registry_model.create_manifest_and_retarget_tag(
            repo_ref, manifest, "second", storage, raise_on_error=True)

        tag_ref = registry_model.get_repo_tag(repo_ref, "second")
        manifest_ref = registry_model.get_manifest_for_tag(tag_ref)
        registry_model.populate_legacy_images_for_testing(
            manifest_ref, storage)

        # Manually retarget the second manifest's blob to the second row.
        try:
            second_blob = ManifestBlob.get(manifest=created._db_id, blob=is1)
            second_blob.blob = is2
            second_blob.save()
        except ManifestBlob.DoesNotExist:
            second_blob = ManifestBlob.get(manifest=created._db_id, blob=is2)
            second_blob.blob = is1
            second_blob.save()

        # Delete the temp reference.
        _delete_temp_links(repository)

        # Ensure the legacy images exist.
        assert_not_deleted(repository, "i1", "i2")

        # Delete the first tag.
        delete_tag(repository, "first")
        assert_deleted(repository, "i1")
        assert_not_deleted(repository, "i2")

        # Ensure the CAS path still exists.
        assert storage.exists({preferred}, storage.blob_path(digest))
示例#19
0
def _garbage_collect_manifest(manifest_id, context):
  assert manifest_id is not None

  # Make sure the manifest isn't referenced.
  if _check_manifest_used(manifest_id):
    return False

  # Add the manifest's blobs to the context to be GCed.
  for manifest_blob in ManifestBlob.select().where(ManifestBlob.manifest == manifest_id):
    context.add_blob_id(manifest_blob.blob_id)

  # Retrieve the manifest's associated image, if any.
  try:
    legacy_image_id = ManifestLegacyImage.get(manifest=manifest_id).image_id
    context.add_legacy_image_id(legacy_image_id)
  except ManifestLegacyImage.DoesNotExist:
    legacy_image_id = None

  # Add child manifests to be GCed.
  for connector in ManifestChild.select().where(ManifestChild.manifest == manifest_id):
    context.add_manifest_id(connector.child_manifest_id)

  # Add the labels to be GCed.
  for manifest_label in ManifestLabel.select().where(ManifestLabel.manifest == manifest_id):
    context.add_label_id(manifest_label.label_id)

  # Delete the manifest.
  with db_transaction():
    try:
      manifest = Manifest.select().where(Manifest.id == manifest_id).get()
    except Manifest.DoesNotExist:
      return False

    assert manifest.id == manifest_id
    assert manifest.repository_id == context.repository.id
    if _check_manifest_used(manifest_id):
      return False

    # Delete any label mappings.
    (TagManifestLabelMap
     .delete()
     .where(TagManifestLabelMap.manifest == manifest_id)
     .execute())

    # Delete any mapping rows for the manifest.
    TagManifestToManifest.delete().where(TagManifestToManifest.manifest == manifest_id).execute()

    # Delete any label rows.
    ManifestLabel.delete().where(ManifestLabel.manifest == manifest_id,
                                 ManifestLabel.repository == context.repository).execute()

    # Delete any child manifest rows.
    ManifestChild.delete().where(ManifestChild.manifest == manifest_id,
                                 ManifestChild.repository == context.repository).execute()

    # Delete the manifest blobs for the manifest.
    ManifestBlob.delete().where(ManifestBlob.manifest == manifest_id,
                                ManifestBlob.repository == context.repository).execute()

    # Delete the manifest legacy image row.
    if legacy_image_id:
      (ManifestLegacyImage
       .delete()
       .where(ManifestLegacyImage.manifest == manifest_id,
              ManifestLegacyImage.repository == context.repository)
       .execute())

    # Delete the manifest.
    manifest.delete_instance()

  context.mark_manifest_removed(manifest)
  return True
示例#20
0
文件: test_gc.py 项目: zhill/quay
def assert_gc_integrity(expect_storage_removed=True, check_oci_tags=True):
    """ Specialized assertion for ensuring that GC cleans up all dangling storages
      and labels, invokes the callback for images removed and doesn't invoke the
      callback for images *not* removed.
  """
    # Add a callback for when images are removed.
    removed_image_storages = []
    model.config.register_image_cleanup_callback(removed_image_storages.extend)

    # Store the number of dangling storages and labels.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()
    yield

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        with pytest.raises(Image.DoesNotExist):
            Image.get(id=removed_image_and_storage.id)

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure there are no danglings OCI tags.
    if check_oci_tags:
        oci_tags = {t.id for t in Tag.select()}
        referenced_oci_tags = {t.tag_id for t in TagToRepositoryTag.select()}
        assert not oci_tags - referenced_oci_tags

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
示例#21
0
def assert_gc_integrity(expect_storage_removed=True):
    """
    Specialized assertion for ensuring that GC cleans up all dangling storages and labels, invokes
    the callback for images removed and doesn't invoke the callback for images *not* removed.
    """

    # Add a callback for when images are removed.
    removed_image_storages = []
    remove_callback = model.config.register_image_cleanup_callback(
        removed_image_storages.extend)

    # Store existing storages. We won't verify these for existence because they
    # were likely created as test data.
    existing_digests = set()
    for storage_row in ImageStorage.select():
        if storage_row.cas_path:
            existing_digests.add(storage_row.content_checksum)

    for blob_row in ApprBlob.select():
        existing_digests.add(blob_row.digest)

    # Store the number of dangling objects.
    existing_storage_count = _get_dangling_storage_count()
    existing_label_count = _get_dangling_label_count()
    existing_manifest_count = _get_dangling_manifest_count()

    # Yield to the GC test.
    with check_transitive_modifications():
        try:
            yield
        finally:
            remove_callback()

    # Ensure the number of dangling storages, manifests and labels has not changed.
    updated_storage_count = _get_dangling_storage_count()
    assert updated_storage_count == existing_storage_count

    updated_label_count = _get_dangling_label_count()
    assert updated_label_count == existing_label_count, _get_dangling_labels()

    updated_manifest_count = _get_dangling_manifest_count()
    assert updated_manifest_count == existing_manifest_count

    # Ensure that for each call to the image+storage cleanup callback, the image and its
    # storage is not found *anywhere* in the database.
    for removed_image_and_storage in removed_image_storages:
        assert isinstance(removed_image_and_storage, Image)

        try:
            # NOTE: SQLite can and will reuse AUTOINCREMENT IDs occasionally, so if we find a row
            # with the same ID, make sure it does not have the same Docker Image ID.
            # See: https://www.sqlite.org/autoinc.html
            found_image = Image.get(id=removed_image_and_storage.id)
            assert (found_image.docker_image_id !=
                    removed_image_and_storage.docker_image_id
                    ), "Found unexpected removed image %s under repo %s" % (
                        found_image.id,
                        found_image.repository,
                    )
        except Image.DoesNotExist:
            pass

        # Ensure that image storages are only removed if not shared.
        shared = Image.select().where(
            Image.storage == removed_image_and_storage.storage_id).count()
        if shared == 0:
            shared = (ManifestBlob.select().where(
                ManifestBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            shared = (UploadedBlob.select().where(
                UploadedBlob.blob ==
                removed_image_and_storage.storage_id).count())

        if shared == 0:
            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(id=removed_image_and_storage.storage_id)

            with pytest.raises(ImageStorage.DoesNotExist):
                ImageStorage.get(uuid=removed_image_and_storage.storage.uuid)

    # Ensure all CAS storage is in the storage engine.
    preferred = storage.preferred_locations[0]
    for storage_row in ImageStorage.select():
        if storage_row.content_checksum in existing_digests:
            continue

        if storage_row.cas_path:
            storage.get_content({preferred},
                                storage.blob_path(
                                    storage_row.content_checksum))

    for blob_row in ApprBlob.select():
        if blob_row.digest in existing_digests:
            continue

        storage.get_content({preferred}, storage.blob_path(blob_row.digest))

    # Ensure all tags have valid manifests.
    for manifest in {t.manifest for t in Tag.select()}:
        # Ensure that the manifest's blobs all exist.
        found_blobs = {
            b.blob.content_checksum
            for b in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest)
        }

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(manifest.manifest_bytes),
            manifest.media_type.name)
        assert set(parsed.local_blob_digests) == found_blobs
示例#22
0
def test_get_or_create_manifest(schema_version, initialized_db):
    repository = create_repository("devtable", "newrepo", None)

    expected_labels = {
        "Foo": "Bar",
        "Baz": "Meh",
    }

    layer_json = json.dumps({
        "id":
        "somelegacyid",
        "config": {
            "Labels": expected_labels,
        },
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Create a legacy image.
    find_create_or_link_image("somelegacyid", repository, "devtable", {},
                              "local_us")

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = "hello world"
    _, random_digest = _populate_blob(random_data)

    # Build the manifest.
    if schema_version == 1:
        builder = DockerSchema1ManifestBuilder("devtable", "simple",
                                               "anothertag")
        builder.add_layer(random_digest, layer_json)
        sample_manifest_instance = builder.build(docker_v2_signing_key)
    elif schema_version == 2:
        builder = DockerSchema2ManifestBuilder()
        builder.set_config_digest(config_digest,
                                  len(layer_json.encode("utf-8")))
        builder.add_layer(random_digest, len(random_data.encode("utf-8")))
        sample_manifest_instance = builder.build()

    assert sample_manifest_instance.layers_compressed_size is not None

    # Create a new manifest.
    created_manifest = get_or_create_manifest(repository,
                                              sample_manifest_instance,
                                              storage)
    created = created_manifest.manifest
    newly_created = created_manifest.newly_created

    assert newly_created
    assert created is not None
    assert created.media_type.name == sample_manifest_instance.media_type
    assert created.digest == sample_manifest_instance.digest
    assert created.manifest_bytes == sample_manifest_instance.bytes.as_encoded_str(
    )
    assert created_manifest.labels_to_apply == expected_labels
    assert created.config_media_type == sample_manifest_instance.config_media_type
    assert created.layers_compressed_size == sample_manifest_instance.layers_compressed_size

    # Lookup the manifest and verify.
    found = lookup_manifest(repository, created.digest, allow_dead=True)
    assert found.digest == created.digest
    assert found.config_media_type == created.config_media_type
    assert found.layers_compressed_size == created.layers_compressed_size

    # Verify it has a temporary tag pointing to it.
    assert Tag.get(manifest=created, hidden=True).lifetime_end_ms

    # Verify the linked blobs.
    blob_digests = [
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(ManifestBlob.manifest == created)
    ]

    assert random_digest in blob_digests
    if schema_version == 2:
        assert config_digest in blob_digests

    # Retrieve it again and ensure it is the same manifest.
    created_manifest2 = get_or_create_manifest(repository,
                                               sample_manifest_instance,
                                               storage)
    created2 = created_manifest2.manifest
    newly_created2 = created_manifest2.newly_created

    assert not newly_created2
    assert created2 == created

    # Ensure it again has a temporary tag.
    assert Tag.get(manifest=created2, hidden=True).lifetime_end_ms

    # Ensure the labels were added.
    labels = list(list_manifest_labels(created))
    assert len(labels) == 2

    labels_dict = {label.key: label.value for label in labels}
    assert labels_dict == expected_labels
示例#23
0
    def test_create_placeholder_blobs_on_first_pull(self, test_name,
                                                    proxy_manifest_response):
        test_params = storage_test_cases[test_name]
        # no blob placeholders are created for manifest lists - we don't have
        # the sub-manifests at manifest list creation time, so there's no way
        # to know which blobs the sub-manifest has.
        if test_params["manifest_type"] in [
                DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE,
                OCI_IMAGE_INDEX_CONTENT_TYPE,
        ]:
            pytest.skip(
                "manifest list detected - skipping blob placeholder test")

        repo = f"{self.orgname}/{test_params['image_name']}"
        params = {
            "repository": repo,
            "manifest_ref": test_params["manifest_ref"],
        }

        proxy_mock = proxy_manifest_response(test_params["manifest_ref"],
                                             test_params["manifest_json"],
                                             test_params["manifest_type"])
        with patch("data.registry_model.registry_proxy_model.Proxy",
                   MagicMock(return_value=proxy_mock)):
            headers = _get_auth_headers(self.sub, self.ctx, repo)
            headers["Accept"] = ", ".join(
                DOCKER_SCHEMA2_CONTENT_TYPES.union(OCI_CONTENT_TYPES).union(
                    DOCKER_SCHEMA1_CONTENT_TYPES))
            conduct_call(
                self.client,
                test_params["view_name"],
                url_for,
                "GET",
                params,
                expected_code=200,
                headers=headers,
            )

        parsed = parse_manifest_from_bytes(
            Bytes.for_string_or_unicode(test_params["manifest_json"]),
            test_params["manifest_type"],
            sparse_manifest_support=True,
        )
        manifest = Manifest.filter(Manifest.digest == parsed.digest).get()
        mdict = parsed.manifest_dict
        layers = mdict.get("layers", mdict.get("fsLayers"))
        mblobs = ManifestBlob.filter(ManifestBlob.manifest == manifest)

        expected_count = len(layers)

        # schema 2 manifests have an extra config blob which we need to take into
        # consideration in the total count
        config_digest = ""
        if parsed.schema_version == 2:
            config_digest = parsed.config.digest
            expected_count += 1

        assert mblobs.count() == expected_count

        for mblob in mblobs:
            blob = None
            layer = None

            # don't assert if digest belongs to a config blob
            if mblob.blob.content_checksum == config_digest:
                continue

            for layer in layers:
                digest = layer.get("digest", layer.get("blobSum"))
                if mblob.blob.content_checksum == digest:
                    blob = mblob.blob
                    layer = layer
                    break

            assert blob is not None
            assert blob.image_size == layer.get("size", None)

            # the absence of an image storage placement for a blob indicates that it's
            # a placeholder blob, not yet downloaded from the upstream registry.
            placements = ImageStoragePlacement.filter(
                ImageStoragePlacement.storage == blob)
            assert placements.count() == 0
示例#24
0
文件: gc.py 项目: kleesc/quay
def _purge_repository_contents(repo):
    """
    Purges all the contents of a repository, removing all of its tags, manifests and images.
    """
    logger.debug("Purging repository %s", repo)

    # Purge via all the tags.
    while True:
        found = False
        for tags in _chunk_iterate_for_deletion(
                Tag.select().where(Tag.repository == repo)):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # Purge any uploaded blobs that have expired.
    while True:
        found = False
        for uploaded_blobs in _chunk_iterate_for_deletion(
                UploadedBlob.select().where(UploadedBlob.repository == repo)):
            logger.debug("Found %s uploaded blobs to GC under repository %s",
                         len(uploaded_blobs), repo)
            found = True
            context = _GarbageCollectorContext(repo)
            for uploaded_blob in uploaded_blobs:
                logger.debug("Deleting uploaded blob %s under repository %s",
                             uploaded_blob, repo)
                assert uploaded_blob.repository_id == repo.id
                _purge_uploaded_blob(uploaded_blob,
                                     context,
                                     allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    # TODO: remove this once we've removed the foreign key constraints from RepositoryTag
    # and Image.
    while True:
        found = False
        repo_tag_query = RepositoryTag.select().where(
            RepositoryTag.repository == repo)
        for tags in _chunk_iterate_for_deletion(repo_tag_query):
            logger.debug("Found %s tags to GC under repository %s", len(tags),
                         repo)
            found = True
            context = _GarbageCollectorContext(repo)

            for tag in tags:
                logger.debug("Deleting tag %s under repository %s", tag, repo)
                assert tag.repository_id == repo.id
                _purge_pre_oci_tag(tag, context, allow_non_expired=True)

            _run_garbage_collection(context)

        if not found:
            break

    assert Tag.select().where(Tag.repository == repo).count() == 0
    assert RepositoryTag.select().where(
        RepositoryTag.repository == repo).count() == 0
    assert Manifest.select().where(Manifest.repository == repo).count() == 0
    assert ManifestBlob.select().where(
        ManifestBlob.repository == repo).count() == 0
    assert UploadedBlob.select().where(
        UploadedBlob.repository == repo).count() == 0

    # Add all remaining images to a new context. We do this here to minimize the number of images
    # we need to load.
    while True:
        found_image = False
        image_context = _GarbageCollectorContext(repo)

        existing_count = Image.select().where(Image.repository == repo).count()
        if not existing_count:
            break

        for image in Image.select().where(Image.repository == repo):
            found_image = True
            logger.debug("Trying to delete image %s under repository %s",
                         image, repo)
            assert image.repository_id == repo.id
            image_context.add_legacy_image_id(image.id)

        _run_garbage_collection(image_context)
        new_count = Image.select().where(Image.repository == repo).count()
        if new_count >= existing_count:
            raise Exception("GC purge bug! Please report this to support!")
示例#25
0
文件: gc.py 项目: kleesc/quay
def _garbage_collect_manifest(manifest_id, context):
    assert manifest_id is not None

    # Make sure the manifest isn't referenced.
    if _check_manifest_used(manifest_id):
        return False

    # Add the manifest's blobs to the context to be GCed.
    for manifest_blob in ManifestBlob.select().where(
            ManifestBlob.manifest == manifest_id):
        context.add_blob_id(manifest_blob.blob_id)

    # Retrieve the manifest's associated image, if any.
    try:
        legacy_image_id = ManifestLegacyImage.get(
            manifest=manifest_id).image_id
        context.add_legacy_image_id(legacy_image_id)
    except ManifestLegacyImage.DoesNotExist:
        legacy_image_id = None

    # Add child manifests to be GCed.
    for connector in ManifestChild.select().where(
            ManifestChild.manifest == manifest_id):
        context.add_manifest_id(connector.child_manifest_id)

    # Add the labels to be GCed.
    for manifest_label in ManifestLabel.select().where(
            ManifestLabel.manifest == manifest_id):
        context.add_label_id(manifest_label.label_id)

    # Delete the manifest.
    with db_transaction():
        try:
            manifest = Manifest.select().where(
                Manifest.id == manifest_id).get()
        except Manifest.DoesNotExist:
            return False

        assert manifest.id == manifest_id
        assert manifest.repository_id == context.repository.id
        if _check_manifest_used(manifest_id):
            return False

        # Delete any label mappings.
        deleted_tag_manifest_label_map = (TagManifestLabelMap.delete().where(
            TagManifestLabelMap.manifest == manifest_id).execute())

        # Delete any mapping rows for the manifest.
        deleted_tag_manifest_to_manifest = (
            TagManifestToManifest.delete().where(
                TagManifestToManifest.manifest == manifest_id).execute())

        # Delete any label rows.
        deleted_manifest_label = (ManifestLabel.delete().where(
            ManifestLabel.manifest == manifest_id,
            ManifestLabel.repository == context.repository,
        ).execute())

        # Delete any child manifest rows.
        deleted_manifest_child = (ManifestChild.delete().where(
            ManifestChild.manifest == manifest_id,
            ManifestChild.repository == context.repository,
        ).execute())

        # Delete the manifest blobs for the manifest.
        deleted_manifest_blob = (ManifestBlob.delete().where(
            ManifestBlob.manifest == manifest_id,
            ManifestBlob.repository == context.repository).execute())

        # Delete the security status for the manifest
        deleted_manifest_security = (ManifestSecurityStatus.delete().where(
            ManifestSecurityStatus.manifest == manifest_id,
            ManifestSecurityStatus.repository == context.repository,
        ).execute())

        # Delete the manifest legacy image row.
        deleted_manifest_legacy_image = 0
        if legacy_image_id:
            deleted_manifest_legacy_image = (
                ManifestLegacyImage.delete().where(
                    ManifestLegacyImage.manifest == manifest_id,
                    ManifestLegacyImage.repository == context.repository,
                ).execute())

        # Delete the manifest.
        manifest.delete_instance()

    context.mark_manifest_removed(manifest)

    gc_table_rows_deleted.labels(
        table="TagManifestLabelMap").inc(deleted_tag_manifest_label_map)
    gc_table_rows_deleted.labels(
        table="TagManifestToManifest").inc(deleted_tag_manifest_to_manifest)
    gc_table_rows_deleted.labels(
        table="ManifestLabel").inc(deleted_manifest_label)
    gc_table_rows_deleted.labels(
        table="ManifestChild").inc(deleted_manifest_child)
    gc_table_rows_deleted.labels(
        table="ManifestBlob").inc(deleted_manifest_blob)
    gc_table_rows_deleted.labels(
        table="ManifestSecurityStatus").inc(deleted_manifest_security)
    if deleted_manifest_legacy_image:
        gc_table_rows_deleted.labels(
            table="ManifestLegacyImage").inc(deleted_manifest_legacy_image)

    gc_table_rows_deleted.labels(table="Manifest").inc()

    return True
示例#26
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Build the map from required blob digests to the blob objects.
    blob_map = _build_blob_map(
        repository_id,
        manifest_interface_instance,
        retriever,
        storage,
        raise_on_error,
        require_empty_layer=False,
    )
    if blob_map is None:
        return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in list(blob_map.values())}

    # Check for the manifest, in case it was created since we checked earlier.
    try:
        manifest = Manifest.get(repository=repository_id,
                                digest=manifest_interface_instance.digest)
        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)
    except Manifest.DoesNotExist:
        pass

    try:
        with db_transaction():
            # Create the manifest.
            try:
                manifest = Manifest.create(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest,
                    media_type=media_type,
                    manifest_bytes=manifest_interface_instance.bytes.
                    as_encoded_str(),
                    config_media_type=manifest_interface_instance.
                    config_media_type,
                    layers_compressed_size=manifest_interface_instance.
                    layers_compressed_size,
                )
            except IntegrityError as ie:
                # NOTE: An IntegrityError means (barring a bug) that the manifest was created by
                # another caller while we were attempting to create it. Since we need to return
                # the manifest, we raise a specialized exception here to break out of the
                # transaction so we can retrieve it.
                raise _ManifestAlreadyExists(ie)

            # Insert the blobs.
            blobs_to_insert = [
                dict(manifest=manifest,
                     repository=repository_id,
                     blob=storage_id) for storage_id in storage_ids
            ]
            if blobs_to_insert:
                try:
                    ManifestBlob.insert_many(blobs_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # Insert the manifest child rows (if applicable).
            if child_manifest_rows:
                children_to_insert = [
                    dict(manifest=manifest,
                         child_manifest=child_manifest,
                         repository=repository_id)
                    for child_manifest in list(child_manifest_rows.values())
                ]
                try:
                    ManifestChild.insert_many(children_to_insert).execute()
                except IntegrityError as ie:
                    raise _ManifestAlreadyExists(ie)

            # If this manifest is being created not for immediate tagging, add a temporary tag to the
            # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
            # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
            # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
            # in a repository for GC, then we will have to reevaluate this optimization at that time.
            if not for_tagging:
                create_temporary_tag_if_necessary(manifest,
                                                  temp_tag_expiration_sec)

        # Define the labels for the manifest (if any).
        # TODO: Once the old data model is gone, turn this into a batch operation and make the label
        # application to the manifest occur under the transaction.
        labels = manifest_interface_instance.get_manifest_labels(retriever)
        if labels:
            for key, value in labels.items():
                # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
                # such `labels`, as they don't really mean anything.
                if not key:
                    continue

                media_type = "application/json" if is_json(
                    value) else "text/plain"
                create_manifest_label(manifest, key, value, "manifest",
                                      media_type)

        # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
        # on the manifest or its resulting tags). We only return those labels either defined on
        # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
        # to ensure that any action performed is defined in all manifests.
        labels_to_apply = labels or {}
        if child_manifest_label_dicts:
            labels_to_apply = child_manifest_label_dicts[0].items()
            for child_manifest_label_dict in child_manifest_label_dicts[1:]:
                # Intersect the key+values of the labels to ensure we get the exact same result
                # for all the child manifests.
                labels_to_apply = labels_to_apply & child_manifest_label_dict.items(
                )

            labels_to_apply = dict(labels_to_apply)

        return CreatedManifest(manifest=manifest,
                               newly_created=True,
                               labels_to_apply=labels_to_apply)
    except _ManifestAlreadyExists as mae:
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
        except Manifest.DoesNotExist:
            # NOTE: If we've reached this point, then somehow we had an IntegrityError without it
            # being due to a duplicate manifest. We therefore log the error.
            logger.error(
                "Got integrity error when trying to create manifest: %s",
                mae.internal_exception)
            if raise_on_error:
                raise CreateManifestException(
                    "Attempt to create an invalid manifest. Please report this issue."
                )

            return None

        return CreatedManifest(manifest=manifest,
                               newly_created=False,
                               labels_to_apply=None)
示例#27
0
def test_get_or_create_manifest_with_remote_layers(initialized_db):
    repository = create_repository("devtable", "newrepo", None)

    layer_json = json.dumps({
        "config": {},
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = "hello world"
    _, random_digest = _populate_blob(random_data)

    remote_digest = sha256_digest(b"something")

    builder = DockerSchema2ManifestBuilder()
    builder.set_config_digest(config_digest, len(layer_json.encode("utf-8")))
    builder.add_layer(remote_digest, 1234, urls=["http://hello/world"])
    builder.add_layer(random_digest, len(random_data.encode("utf-8")))
    manifest = builder.build()

    assert remote_digest in manifest.blob_digests
    assert remote_digest not in manifest.local_blob_digests

    assert manifest.has_remote_layer
    assert not manifest.has_legacy_image
    assert manifest.get_schema1_manifest("foo", "bar", "baz", None) is None

    # Write the manifest.
    created_tuple = get_or_create_manifest(repository, manifest, storage)
    assert created_tuple is not None

    created_manifest = created_tuple.manifest
    assert created_manifest
    assert created_manifest.media_type.name == manifest.media_type
    assert created_manifest.digest == manifest.digest
    assert created_manifest.config_media_type == manifest.config_media_type
    assert created_manifest.layers_compressed_size == manifest.layers_compressed_size

    # Verify the legacy image.
    legacy_image = get_legacy_image_for_manifest(created_manifest)
    assert legacy_image is None

    # Verify the linked blobs.
    blob_digests = {
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(
            ManifestBlob.manifest == created_manifest)
    }

    assert random_digest in blob_digests
    assert config_digest in blob_digests
    assert remote_digest not in blob_digests
示例#28
0
def test_retriever(initialized_db):
    repository = create_repository("devtable", "newrepo", None)

    layer_json = json.dumps({
        "config": {},
        "rootfs": {
            "type": "layers",
            "diff_ids": []
        },
        "history": [
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
            {
                "created": "2018-04-03T18:37:09.284840891Z",
                "created_by": "do something",
            },
        ],
    })

    # Add a blob containing the config.
    _, config_digest = _populate_blob(layer_json)

    # Add a blob of random data.
    random_data = "hello world"
    _, random_digest = _populate_blob(random_data)

    # Add another blob of random data.
    other_random_data = "hi place"
    _, other_random_digest = _populate_blob(other_random_data)

    remote_digest = sha256_digest(b"something")

    builder = DockerSchema2ManifestBuilder()
    builder.set_config_digest(config_digest, len(layer_json.encode("utf-8")))
    builder.add_layer(other_random_digest,
                      len(other_random_data.encode("utf-8")))
    builder.add_layer(random_digest, len(random_data.encode("utf-8")))
    manifest = builder.build()

    assert config_digest in manifest.blob_digests
    assert random_digest in manifest.blob_digests
    assert other_random_digest in manifest.blob_digests

    assert config_digest in manifest.local_blob_digests
    assert random_digest in manifest.local_blob_digests
    assert other_random_digest in manifest.local_blob_digests

    # Write the manifest.
    created_tuple = get_or_create_manifest(repository, manifest, storage)
    assert created_tuple is not None

    created_manifest = created_tuple.manifest
    assert created_manifest
    assert created_manifest.media_type.name == manifest.media_type
    assert created_manifest.digest == manifest.digest

    # Verify the linked blobs.
    blob_digests = {
        mb.blob.content_checksum
        for mb in ManifestBlob.select().where(
            ManifestBlob.manifest == created_manifest)
    }

    assert random_digest in blob_digests
    assert other_random_digest in blob_digests
    assert config_digest in blob_digests

    # Delete any Image rows linking to the blobs from temp tags.
    for blob_digest in blob_digests:
        storage_row = ImageStorage.get(content_checksum=blob_digest)
        for image in list(Image.select().where(Image.storage == storage_row)):
            all_temp = all([
                rt.hidden for rt in RepositoryTag.select().where(
                    RepositoryTag.image == image)
            ])
            if all_temp:
                RepositoryTag.delete().where(
                    RepositoryTag.image == image).execute()
                image.delete_instance(recursive=True)

    # Verify the blobs in the retriever.
    retriever = RepositoryContentRetriever(repository, storage)
    assert (retriever.get_manifest_bytes_with_digest(
        created_manifest.digest) == manifest.bytes.as_encoded_str())

    for blob_digest in blob_digests:
        assert retriever.get_blob_bytes_with_digest(blob_digest) is not None
示例#29
0
def _create_manifest(
    repository_id,
    manifest_interface_instance,
    storage,
    temp_tag_expiration_sec=TEMP_TAG_EXPIRATION_SEC,
    for_tagging=False,
    raise_on_error=False,
    retriever=None,
):
    # Validate the manifest.
    retriever = retriever or RepositoryContentRetriever.for_repository(
        repository_id, storage)
    try:
        manifest_interface_instance.validate(retriever)
    except (ManifestException, MalformedSchema2ManifestList, BlobDoesNotExist,
            IOError) as ex:
        logger.exception("Could not validate manifest `%s`",
                         manifest_interface_instance.digest)
        if raise_on_error:
            raise CreateManifestException(str(ex))

        return None

    # Load, parse and get/create the child manifests, if any.
    child_manifest_refs = manifest_interface_instance.child_manifests(
        retriever)
    child_manifest_rows = {}
    child_manifest_label_dicts = []

    if child_manifest_refs is not None:
        for child_manifest_ref in child_manifest_refs:
            # Load and parse the child manifest.
            try:
                child_manifest = child_manifest_ref.manifest_obj
            except (
                    ManifestException,
                    MalformedSchema2ManifestList,
                    BlobDoesNotExist,
                    IOError,
            ) as ex:
                logger.exception(
                    "Could not load manifest list for manifest `%s`",
                    manifest_interface_instance.digest,
                )
                if raise_on_error:
                    raise CreateManifestException(str(ex))

                return None

            # Retrieve its labels.
            labels = child_manifest.get_manifest_labels(retriever)
            if labels is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve manifest labels")

                logger.exception(
                    "Could not load manifest labels for child manifest")
                return None

            # Get/create the child manifest in the database.
            child_manifest_info = get_or_create_manifest(
                repository_id,
                child_manifest,
                storage,
                raise_on_error=raise_on_error)
            if child_manifest_info is None:
                if raise_on_error:
                    raise CreateManifestException(
                        "Unable to retrieve child manifest")

                logger.error("Could not get/create child manifest")
                return None

            child_manifest_rows[child_manifest_info.manifest.
                                digest] = child_manifest_info.manifest
            child_manifest_label_dicts.append(labels)

    # Ensure all the blobs in the manifest exist.
    digests = set(manifest_interface_instance.local_blob_digests)
    blob_map = {}

    # If the special empty layer is required, simply load it directly. This is much faster
    # than trying to load it on a per repository basis, and that is unnecessary anyway since
    # this layer is predefined.
    if EMPTY_LAYER_BLOB_DIGEST in digests:
        digests.remove(EMPTY_LAYER_BLOB_DIGEST)
        blob_map[EMPTY_LAYER_BLOB_DIGEST] = get_shared_blob(
            EMPTY_LAYER_BLOB_DIGEST)
        if not blob_map[EMPTY_LAYER_BLOB_DIGEST]:
            if raise_on_error:
                raise CreateManifestException(
                    "Unable to retrieve specialized empty blob")

            logger.warning("Could not find the special empty blob in storage")
            return None

    if digests:
        query = lookup_repo_storages_by_content_checksum(
            repository_id, digests)
        blob_map.update({s.content_checksum: s for s in query})
        for digest_str in digests:
            if digest_str not in blob_map:
                logger.warning(
                    "Unknown blob `%s` under manifest `%s` for repository `%s`",
                    digest_str,
                    manifest_interface_instance.digest,
                    repository_id,
                )

                if raise_on_error:
                    raise CreateManifestException("Unknown blob `%s`" %
                                                  digest_str)

                return None

    # Special check: If the empty layer blob is needed for this manifest, add it to the
    # blob map. This is necessary because Docker decided to elide sending of this special
    # empty layer in schema version 2, but we need to have it referenced for GC and schema version 1.
    if EMPTY_LAYER_BLOB_DIGEST not in blob_map:
        try:
            requires_empty_layer = manifest_interface_instance.get_requires_empty_layer_blob(
                retriever)
        except ManifestException as ex:
            if raise_on_error:
                raise CreateManifestException(str(ex))

            return None

        if requires_empty_layer is None:
            if raise_on_error:
                raise CreateManifestException(
                    "Could not load configuration blob")

            return None

        if requires_empty_layer:
            shared_blob = get_or_create_shared_blob(EMPTY_LAYER_BLOB_DIGEST,
                                                    EMPTY_LAYER_BYTES, storage)
            assert not shared_blob.uploading
            assert shared_blob.content_checksum == EMPTY_LAYER_BLOB_DIGEST
            blob_map[EMPTY_LAYER_BLOB_DIGEST] = shared_blob

    # Determine and populate the legacy image if necessary. Manifest lists will not have a legacy
    # image.
    legacy_image = None
    if manifest_interface_instance.has_legacy_image:
        legacy_image_id = _populate_legacy_image(repository_id,
                                                 manifest_interface_instance,
                                                 blob_map, retriever,
                                                 raise_on_error)
        if legacy_image_id is None:
            return None

        legacy_image = get_image(repository_id, legacy_image_id)
        if legacy_image is None:
            return None

    # Create the manifest and its blobs.
    media_type = Manifest.media_type.get_id(
        manifest_interface_instance.media_type)
    storage_ids = {storage.id for storage in blob_map.values()}

    with db_transaction():
        # Check for the manifest. This is necessary because Postgres doesn't handle IntegrityErrors
        # well under transactions.
        try:
            manifest = Manifest.get(repository=repository_id,
                                    digest=manifest_interface_instance.digest)
            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)
        except Manifest.DoesNotExist:
            pass

        # Create the manifest.
        try:
            manifest = Manifest.create(
                repository=repository_id,
                digest=manifest_interface_instance.digest,
                media_type=media_type,
                manifest_bytes=manifest_interface_instance.bytes.
                as_encoded_str(),
            )
        except IntegrityError as ie:
            try:
                manifest = Manifest.get(
                    repository=repository_id,
                    digest=manifest_interface_instance.digest)
            except Manifest.DoesNotExist:
                logger.error(
                    "Got integrity error when trying to create manifest: %s",
                    ie)
                if raise_on_error:
                    raise CreateManifestException(
                        "Attempt to create an invalid manifest. Please report this issue."
                    )

                return None

            return CreatedManifest(manifest=manifest,
                                   newly_created=False,
                                   labels_to_apply=None)

        # Insert the blobs.
        blobs_to_insert = [
            dict(manifest=manifest, repository=repository_id, blob=storage_id)
            for storage_id in storage_ids
        ]
        if blobs_to_insert:
            ManifestBlob.insert_many(blobs_to_insert).execute()

        # Set the legacy image (if applicable).
        if legacy_image is not None:
            ManifestLegacyImage.create(repository=repository_id,
                                       image=legacy_image,
                                       manifest=manifest)

        # Insert the manifest child rows (if applicable).
        if child_manifest_rows:
            children_to_insert = [
                dict(manifest=manifest,
                     child_manifest=child_manifest,
                     repository=repository_id)
                for child_manifest in child_manifest_rows.values()
            ]
            ManifestChild.insert_many(children_to_insert).execute()

        # If this manifest is being created not for immediate tagging, add a temporary tag to the
        # manifest to ensure it isn't being GCed. If the manifest *is* for tagging, then since we're
        # creating a new one here, it cannot be GCed (since it isn't referenced by anything yet), so
        # its safe to elide the temp tag operation. If we ever change GC code to collect *all* manifests
        # in a repository for GC, then we will have to reevaluate this optimization at that time.
        if not for_tagging:
            create_temporary_tag_if_necessary(manifest,
                                              temp_tag_expiration_sec)

    # Define the labels for the manifest (if any).
    # TODO: Once the old data model is gone, turn this into a batch operation and make the label
    # application to the manifest occur under the transaction.
    labels = manifest_interface_instance.get_manifest_labels(retriever)
    if labels:
        for key, value in labels.iteritems():
            # NOTE: There can technically be empty label keys via Dockerfile's. We ignore any
            # such `labels`, as they don't really mean anything.
            if not key:
                continue

            media_type = "application/json" if is_json(value) else "text/plain"
            create_manifest_label(manifest, key, value, "manifest", media_type)

    # Return the dictionary of labels to apply (i.e. those labels that cause an action to be taken
    # on the manifest or its resulting tags). We only return those labels either defined on
    # the manifest or shared amongst all the child manifests. We intersect amongst all child manifests
    # to ensure that any action performed is defined in all manifests.
    labels_to_apply = labels or {}
    if child_manifest_label_dicts:
        labels_to_apply = child_manifest_label_dicts[0].viewitems()
        for child_manifest_label_dict in child_manifest_label_dicts[1:]:
            # Intersect the key+values of the labels to ensure we get the exact same result
            # for all the child manifests.
            labels_to_apply = labels_to_apply & child_manifest_label_dict.viewitems(
            )

        labels_to_apply = dict(labels_to_apply)

    return CreatedManifest(manifest=manifest,
                           newly_created=True,
                           labels_to_apply=labels_to_apply)
示例#30
0
def test_tagbackfillworker(clear_all_rows, initialized_db):
    # Remove the new-style rows so we can backfill.
    TagToRepositoryTag.delete().execute()
    Tag.delete().execute()

    if clear_all_rows:
        TagManifestLabelMap.delete().execute()
        ManifestLabel.delete().execute()
        ManifestBlob.delete().execute()
        ManifestLegacyImage.delete().execute()
        TagManifestToManifest.delete().execute()
        Manifest.delete().execute()

    found_dead_tag = False

    for repository_tag in list(RepositoryTag.select()):
        # Backfill the tag.
        assert backfill_tag(repository_tag)

        # Ensure if we try again, the backfill is skipped.
        assert not backfill_tag(repository_tag)

        # Ensure that we now have the expected tag rows.
        tag_to_repo_tag = TagToRepositoryTag.get(repository_tag=repository_tag)
        tag = tag_to_repo_tag.tag
        assert tag.name == repository_tag.name
        assert tag.repository == repository_tag.repository
        assert not tag.hidden
        assert tag.reversion == repository_tag.reversion

        if repository_tag.lifetime_start_ts is None:
            assert tag.lifetime_start_ms is None
        else:
            assert tag.lifetime_start_ms == (repository_tag.lifetime_start_ts *
                                             1000)

        if repository_tag.lifetime_end_ts is None:
            assert tag.lifetime_end_ms is None
        else:
            assert tag.lifetime_end_ms == (repository_tag.lifetime_end_ts *
                                           1000)
            found_dead_tag = True

        assert tag.manifest

        # Ensure that we now have the expected manifest rows.
        try:
            tag_manifest = TagManifest.get(tag=repository_tag)
        except TagManifest.DoesNotExist:
            continue

        map_row = TagManifestToManifest.get(tag_manifest=tag_manifest)
        assert not map_row.broken

        manifest_row = map_row.manifest
        assert manifest_row.manifest_bytes == tag_manifest.json_data
        assert manifest_row.digest == tag_manifest.digest
        assert manifest_row.repository == tag_manifest.tag.repository

        assert tag.manifest == map_row.manifest

        legacy_image = ManifestLegacyImage.get(manifest=manifest_row).image
        assert tag_manifest.tag.image == legacy_image

        expected_storages = {tag_manifest.tag.image.storage.id}
        for parent_image_id in tag_manifest.tag.image.ancestor_id_list():
            expected_storages.add(Image.get(id=parent_image_id).storage_id)

        found_storages = {
            manifest_blob.blob_id
            for manifest_blob in ManifestBlob.select().where(
                ManifestBlob.manifest == manifest_row)
        }
        assert expected_storages == found_storages

        # Ensure the labels were copied over.
        tmls = list(TagManifestLabel.select().where(
            TagManifestLabel.annotated == tag_manifest))
        expected_labels = {tml.label_id for tml in tmls}
        found_labels = {
            m.label_id
            for m in ManifestLabel.select().where(
                ManifestLabel.manifest == manifest_row)
        }
        assert found_labels == expected_labels

    # Verify at the repository level.
    for repository in list(Repository.select()):
        tags = RepositoryTag.select().where(
            RepositoryTag.repository == repository,
            RepositoryTag.hidden == False)
        oci_tags = Tag.select().where(Tag.repository == repository)
        assert len(tags) == len(oci_tags)
        assert {t.name for t in tags} == {t.name for t in oci_tags}

        for tag in tags:
            tag_manifest = TagManifest.get(tag=tag)
            ttr = TagToRepositoryTag.get(repository_tag=tag)
            manifest = ttr.tag.manifest

            assert tag_manifest.json_data == manifest.manifest_bytes
            assert tag_manifest.digest == manifest.digest
            assert tag.image == ManifestLegacyImage.get(
                manifest=manifest).image
            assert tag.lifetime_start_ts == (ttr.tag.lifetime_start_ms / 1000)

            if tag.lifetime_end_ts:
                assert tag.lifetime_end_ts == (ttr.tag.lifetime_end_ms / 1000)
            else:
                assert ttr.tag.lifetime_end_ms is None

    assert found_dead_tag