def test_known_issue_schema1(registry_model): test_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join( test_dir, "../../../image/docker/test/validate_manifest_known_issue.json") with open(path, "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest( Bytes.for_string_or_unicode(manifest_bytes)) for blob_digest in manifest.local_blob_digests: _populate_blob(blob_digest) digest = manifest.digest assert digest == "sha256:44518f5a4d1cb5b7a6347763116fb6e10f6a8563b6c40bb389a0a982f0a9f47a" # Create the manifest in the database. repository_ref = registry_model.lookup_repository("devtable", "simple") created_manifest, _ = registry_model.create_manifest_and_retarget_tag( repository_ref, manifest, "latest", storage) assert created_manifest assert created_manifest.digest == manifest.digest assert (created_manifest.internal_manifest_bytes.as_encoded_str() == manifest.bytes.as_encoded_str()) # Look it up again and validate. found = registry_model.lookup_manifest_by_digest(repository_ref, manifest.digest, allow_dead=True) assert found assert found.digest == digest assert found.internal_manifest_bytes.as_encoded_str( ) == manifest.bytes.as_encoded_str() assert found.get_parsed_manifest().digest == digest
def test_validate_manifest_with_unicode(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "validated_manifest_with_unicode.json"), "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == "sha256:815ecf45716a96b19d54d911e6ace91f78bab26ca0dd299645d9995dacd9f1ef" assert manifest.created_datetime
def test_validate_manifest_with_unicode_encoded(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "manifest_unicode_row.json"), "r") as f: manifest_bytes = json.loads(f.read())[0]["json_data"] manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == "sha256:dde3714ce7e23edc6413aa85c0b42792e4f2f79e9ea36afc154d63ff3d04e86c" assert manifest.created_datetime
def test_validate_manifest(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "validated_manifest.json"), "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == "sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571" assert manifest.created_datetime
def test_validate_manifest_with_none_metadata_layer(with_key): builder = DockerSchema1ManifestBuilder("somenamespace", "somerepo", "sometag") builder.add_layer("sha256:abcde", None) built = builder.build(with_key, ensure_ascii=False) built._validate() # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
def test_validate_manifest_known_issue(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "validate_manifest_known_issue.json"), "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes)) digest = manifest.digest assert digest == "sha256:44518f5a4d1cb5b7a6347763116fb6e10f6a8563b6c40bb389a0a982f0a9f47a" assert manifest.created_datetime layers = list(manifest.get_layers(None)) assert layers[-1].author is None
def test_validate_manifest_with_emoji(with_key): builder = DockerSchema1ManifestBuilder("somenamespace", "somerepo", "sometag") builder.add_layer( "sha256:abcde", json.dumps({"id": "someid", "author": "😱",}, ensure_ascii=False) ) built = builder.build(with_key, ensure_ascii=False) built._validate() # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
def test_validate_manifest_with_unencoded_unicode(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "manifest_unencoded_unicode.json"), "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode(manifest_bytes)) digest = manifest.digest assert digest == "sha256:5d8a0f34744a39bf566ba430251adc0cc86587f86aed3ac2acfb897f349777bc" assert manifest.created_datetime layers = list(manifest.get_layers(None)) assert layers[-1].author == "Sômé guy"
def test_validate_manifest_with_emoji(with_key): builder = DockerSchema1ManifestBuilder('somenamespace', 'somerepo', 'sometag') builder.add_layer( 'sha256:abcde', json.dumps({ 'id': 'someid', 'author': u'😱', }, ensure_ascii=False)) built = builder.build(with_key, ensure_ascii=False) built._validate() # Ensure the manifest can be reloaded. built_bytes = built.bytes.as_encoded_str() DockerSchema1Manifest(Bytes.for_string_or_unicode(built_bytes))
def parse_manifest_from_bytes(manifest_bytes, media_type, validate=True): """ Parses and returns a manifest from the given bytes, for the given media type. Raises a ManifestException if the parse fails for some reason. """ assert isinstance(manifest_bytes, Bytes) if media_type == DOCKER_SCHEMA2_MANIFEST_CONTENT_TYPE: return DockerSchema2Manifest(manifest_bytes) if media_type == DOCKER_SCHEMA2_MANIFESTLIST_CONTENT_TYPE: return DockerSchema2ManifestList(manifest_bytes) if media_type in DOCKER_SCHEMA1_CONTENT_TYPES: return DockerSchema1Manifest(manifest_bytes, validate=validate) raise ManifestException('Unknown or unsupported manifest media type `%s`' % media_type)
def test_get_or_create_manifest_invalid_image(initialized_db): repository = get_repository("devtable", "simple") latest_tag = get_tag(repository, "latest") parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode( latest_tag.manifest.manifest_bytes), validate=False) builder = DockerSchema1ManifestBuilder("devtable", "simple", "anothertag") builder.add_layer(parsed.blob_digests[0], '{"id": "foo", "parent": "someinvalidimageid"}') sample_manifest_instance = builder.build(docker_v2_signing_key) created_manifest = get_or_create_manifest(repository, sample_manifest_instance, storage) assert created_manifest is None
def test_valid_manifest(): manifest = DockerSchema1Manifest( Bytes.for_string_or_unicode(MANIFEST_BYTES), validate=False) assert len(manifest.signatures) == 1 assert manifest.namespace == "" assert manifest.repo_name == "hello-world" assert manifest.tag == "latest" assert manifest.image_ids == {"sizedid", "someid", "anotherid"} assert manifest.parent_image_ids == {"someid", "anotherid"} assert manifest.layers_compressed_size == 1234 assert manifest.config_media_type is None assert len(manifest.layers) == 3 assert manifest.layers[0].v1_metadata.image_id == "anotherid" assert manifest.layers[0].v1_metadata.parent_image_id is None assert manifest.layers[1].v1_metadata.image_id == "someid" assert manifest.layers[1].v1_metadata.parent_image_id == "anotherid" assert manifest.layers[2].v1_metadata.image_id == "sizedid" assert manifest.layers[2].v1_metadata.parent_image_id == "someid" assert manifest.layers[0].compressed_size is None assert manifest.layers[1].compressed_size is None assert manifest.layers[2].compressed_size == 1234 assert manifest.leaf_layer == manifest.layers[2] assert manifest.created_datetime is None unsigned = manifest.unsigned() assert unsigned.namespace == manifest.namespace assert unsigned.repo_name == manifest.repo_name assert unsigned.tag == manifest.tag assert unsigned.layers == manifest.layers assert unsigned.blob_digests == manifest.blob_digests assert unsigned.digest != manifest.digest image_layers = list(manifest.get_layers(None)) assert len(image_layers) == 3 for index in range(0, 3): assert image_layers[index].layer_id == manifest.layers[ index].v1_metadata.image_id assert image_layers[index].blob_digest == manifest.layers[index].digest assert image_layers[index].command == manifest.layers[ index].v1_metadata.command
def build_schema1(self, namespace, repo_name, tag_name, images, blobs, options, arch="amd64"): builder = DockerSchema1ManifestBuilder(namespace, repo_name, tag_name, arch) for image in reversed(images): assert image.urls is None checksum = "sha256:" + hashlib.sha256(image.bytes).hexdigest() blobs[checksum] = image.bytes # If invalid blob references were requested, just make it up. if options.manifest_invalid_blob_references: checksum = "sha256:" + hashlib.sha256( "notarealthing").hexdigest() layer_dict = {"id": image.id, "parent": image.parent_id} if image.config is not None: layer_dict["config"] = image.config if image.size is not None: layer_dict["Size"] = image.size if image.created is not None: layer_dict["created"] = image.created builder.add_layer( checksum, json.dumps(layer_dict, ensure_ascii=options.ensure_ascii)) # Build the manifest. built = builder.build(self.jwk, ensure_ascii=options.ensure_ascii) # Validate it before we send it. DockerSchema1Manifest(built.bytes) return built
def test_legacy_layers(name, config_sha): cr = {} cr[config_sha] = _get_test_file_contents(name, 'config').as_encoded_str() retriever = ContentRetrieverForTesting(cr) schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) schema1 = DockerSchema1Manifest(_get_test_file_contents(name, 'schema1'), validate=False) # Check legacy layers schema2_legacy_layers = list(schema2.generate_legacy_layers({}, retriever)) schema1_legacy_layers = list(schema1.generate_legacy_layers({}, retriever)) assert len(schema1_legacy_layers) == len(schema2_legacy_layers) for index in range(0, len(schema1_legacy_layers)): schema1_legacy_layer = schema1_legacy_layers[index] schema2_legacy_layer = schema2_legacy_layers[index] assert schema1_legacy_layer.content_checksum == schema2_legacy_layer.content_checksum assert schema1_legacy_layer.comment == schema2_legacy_layer.comment assert schema1_legacy_layer.command == schema2_legacy_layer.command
def test_2to1_conversion(name, config_sha): cr = {} cr[config_sha] = _get_test_file_contents(name, 'config').as_encoded_str() retriever = ContentRetrieverForTesting(cr) schema2 = DockerSchema2Manifest(_get_test_file_contents(name, 'schema2')) schema1 = DockerSchema1Manifest(_get_test_file_contents(name, 'schema1'), validate=False) converted = schema2.get_schema1_manifest('devtable', 'somerepo', 'latest', retriever) assert len(converted.layers) == len(schema1.layers) image_id_map = {} for index in range(0, len(converted.layers)): converted_layer = converted.layers[index] schema1_layer = schema1.layers[index] image_id_map[schema1_layer.v1_metadata. image_id] = converted_layer.v1_metadata.image_id assert str(schema1_layer.digest) == str(converted_layer.digest) schema1_parent_id = schema1_layer.v1_metadata.parent_image_id converted_parent_id = converted_layer.v1_metadata.parent_image_id assert (schema1_parent_id is None) == (converted_parent_id is None) if schema1_parent_id is not None: assert image_id_map[schema1_parent_id] == converted_parent_id assert schema1_layer.v1_metadata.created == converted_layer.v1_metadata.created assert schema1_layer.v1_metadata.comment == converted_layer.v1_metadata.comment assert schema1_layer.v1_metadata.command == converted_layer.v1_metadata.command assert schema1_layer.v1_metadata.labels == converted_layer.v1_metadata.labels schema1_container_config = json.loads( schema1_layer.raw_v1_metadata)['container_config'] converted_container_config = json.loads( converted_layer.raw_v1_metadata)['container_config'] assert schema1_container_config == converted_container_config
def test_valid_manifest(): manifest = DockerSchema1Manifest( Bytes.for_string_or_unicode(MANIFEST_BYTES), validate=False) assert len(manifest.signatures) == 1 assert manifest.namespace == '' assert manifest.repo_name == 'hello-world' assert manifest.tag == 'latest' assert manifest.image_ids == {'someid', 'anotherid'} assert manifest.parent_image_ids == {'anotherid'} assert len(manifest.layers) == 2 assert manifest.layers[0].v1_metadata.image_id == 'anotherid' assert manifest.layers[0].v1_metadata.parent_image_id is None assert manifest.layers[1].v1_metadata.image_id == 'someid' assert manifest.layers[1].v1_metadata.parent_image_id == 'anotherid' assert manifest.layers[0].compressed_size is None assert manifest.layers[1].compressed_size is None assert manifest.leaf_layer == manifest.layers[1] assert manifest.created_datetime is None unsigned = manifest.unsigned() assert unsigned.namespace == manifest.namespace assert unsigned.repo_name == manifest.repo_name assert unsigned.tag == manifest.tag assert unsigned.layers == manifest.layers assert unsigned.blob_digests == manifest.blob_digests assert unsigned.digest != manifest.digest image_layers = list(manifest.get_layers(None)) assert len(image_layers) == 2 for index in range(0, 2): assert image_layers[index].layer_id == manifest.layers[ index].v1_metadata.image_id assert image_layers[index].blob_digest == manifest.layers[index].digest assert image_layers[index].command == manifest.layers[ index].v1_metadata.command
def test_validate_manifest_without_metadata(): test_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(test_dir, "validated_manifest.json"), "r") as f: manifest_bytes = f.read() manifest = DockerSchema1Manifest( Bytes.for_string_or_unicode(manifest_bytes), validate=True) digest = manifest.digest assert digest == "sha256:b5dc4f63fdbd64f34f2314c0747ef81008f9fcddce4edfc3fd0e8ec8b358d571" assert manifest.created_datetime with_metadata_removed = manifest._unsigned_builder().with_metadata_removed( ).build() assert with_metadata_removed.leaf_layer_v1_image_id == manifest.leaf_layer_v1_image_id manifest_layers = list(manifest.get_layers(None)) with_metadata_removed_layers = list(with_metadata_removed.get_layers(None)) assert len(manifest_layers) == len(with_metadata_removed_layers) for index, built_layer in enumerate(manifest_layers): with_metadata_removed_layer = with_metadata_removed_layers[index] assert built_layer.layer_id == with_metadata_removed_layer.layer_id assert built_layer.compressed_size == with_metadata_removed_layer.compressed_size assert built_layer.command == with_metadata_removed_layer.command assert built_layer.comment == with_metadata_removed_layer.comment assert built_layer.author == with_metadata_removed_layer.author assert built_layer.blob_digest == with_metadata_removed_layer.blob_digest assert built_layer.created_datetime == with_metadata_removed_layer.created_datetime assert with_metadata_removed.digest != manifest.digest assert with_metadata_removed.namespace == manifest.namespace assert with_metadata_removed.repo_name == manifest.repo_name assert with_metadata_removed.tag == manifest.tag assert with_metadata_removed.created_datetime == manifest.created_datetime assert with_metadata_removed.checksums == manifest.checksums assert with_metadata_removed.image_ids == manifest.image_ids assert with_metadata_removed.parent_image_ids == manifest.parent_image_ids
def retarget_tag( tag_name, manifest_id, is_reversion=False, now_ms=None, raise_on_error=False, ): """ Creates or updates a tag with the specified name to point to the given manifest under its repository. If this action is a reversion to a previous manifest, is_reversion should be set to True. Returns the newly created tag row or None on error. """ try: manifest = (Manifest.select( Manifest, MediaType).join(MediaType).where(Manifest.id == manifest_id).get()) except Manifest.DoesNotExist: if raise_on_error: raise RetargetTagException("Manifest requested no longer exists") return None # CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different # name. if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: try: parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode( manifest.manifest_bytes), validate=False) if parsed.tag != tag_name: logger.error( "Tried to re-target schema1 manifest with tag `%s` to tag `%s", parsed.tag, tag_name, ) return None except MalformedSchema1Manifest as msme: logger.exception("Could not parse schema1 manifest") if raise_on_error: raise RetargetTagException(msme) return None legacy_image = get_legacy_image_for_manifest(manifest) now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms // 1000) with db_transaction(): # Lookup an existing tag in the repository with the same name and, if present, mark it # as expired. existing_tag = get_tag(manifest.repository_id, tag_name) if existing_tag is not None: _, okay = set_tag_end_ms(existing_tag, now_ms) # TODO: should we retry here and/or use a for-update? if not okay: return None # Create a new tag pointing to the manifest with a lifetime start of now. created = Tag.create( name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms, reversion=is_reversion, manifest=manifest, tag_kind=Tag.tag_kind.get_id("tag"), ) return created
def test_malformed_manifests(json_data): with pytest.raises(MalformedSchema1Manifest): DockerSchema1Manifest(Bytes.for_string_or_unicode(json_data))
def _backfill_manifest(tag_manifest): logger.info('Backfilling manifest for tag manifest %s', tag_manifest.id) # Ensure that a mapping row doesn't already exist. If it does, we've been preempted. if lookup_manifest_map_row(tag_manifest): return False # Parse the manifest. If we cannot parse, then we treat the manifest as broken and just emit it # without additional rows or data, as it will eventually not be useful. is_broken = False try: manifest = DockerSchema1Manifest(Bytes.for_string_or_unicode( tag_manifest.json_data), validate=False) except ManifestException: logger.exception('Exception when trying to parse manifest %s', tag_manifest.id) manifest = BrokenManifest(tag_manifest.digest, tag_manifest.json_data) is_broken = True # Lookup the storages for the digests. root_image = tag_manifest.tag.image repository = tag_manifest.tag.repository image_storage_id_map = { root_image.storage.content_checksum: root_image.storage.id } try: parent_images = get_parent_images(repository.namespace_user.username, repository.name, root_image) except DataModelException: logger.exception( 'Exception when trying to load parent images for manifest `%s`', tag_manifest.id) parent_images = {} is_broken = True for parent_image in parent_images: image_storage_id_map[ parent_image.storage.content_checksum] = parent_image.storage.id # Ensure that all the expected blobs have been found. If not, we lookup the blob under the repo # and add its storage ID. If the blob is not found, we mark the manifest as broken. storage_ids = set() try: for blob_digest in manifest.get_blob_digests_for_translation(): if blob_digest in image_storage_id_map: storage_ids.add(image_storage_id_map[blob_digest]) else: logger.debug( 'Blob `%s` not found in images for manifest `%s`; checking repo', blob_digest, tag_manifest.id) try: blob_storage = get_repo_blob_by_digest( repository.namespace_user.username, repository.name, blob_digest) storage_ids.add(blob_storage.id) except BlobDoesNotExist: logger.debug( 'Blob `%s` not found in repo for manifest `%s`', blob_digest, tag_manifest.id) is_broken = True except MalformedSchema1Manifest: logger.warning( 'Found malformed schema 1 manifest during blob backfill') is_broken = True with db_transaction(): # Re-retrieve the tag manifest to ensure it still exists and we're pointing at the correct tag. try: tag_manifest = TagManifest.get(id=tag_manifest.id) except TagManifest.DoesNotExist: return True # Ensure it wasn't already created. if lookup_manifest_map_row(tag_manifest): return False # Check for a pre-existing manifest matching the digest in the repository. This can happen # if we've already created the manifest row (typically for tag reverision). try: manifest_row = Manifest.get(digest=manifest.digest, repository=tag_manifest.tag.repository) except Manifest.DoesNotExist: # Create the new-style rows for the manifest. try: manifest_row = populate_manifest(tag_manifest.tag.repository, manifest, tag_manifest.tag.image, storage_ids) except IntegrityError: # Pre-empted. return False # Create the mapping row. If we find another was created for this tag manifest in the # meantime, then we've been preempted. try: TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest_row, broken=is_broken) except IntegrityError: return False # Backfill any labels on the manifest. _backfill_labels(tag_manifest, manifest_row, repository) return True
def retarget_tag(tag_name, manifest_id, is_reversion=False, now_ms=None, adjust_old_model=True): """ Creates or updates a tag with the specified name to point to the given manifest under its repository. If this action is a reversion to a previous manifest, is_reversion should be set to True. Returns the newly created tag row or None on error. """ try: manifest = (Manifest.select( Manifest, MediaType).join(MediaType).where(Manifest.id == manifest_id).get()) except Manifest.DoesNotExist: return None # CHECK: Make sure that we are not mistargeting a schema 1 manifest to a tag with a different # name. if manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES: try: parsed = DockerSchema1Manifest(Bytes.for_string_or_unicode( manifest.manifest_bytes), validate=False) if parsed.tag != tag_name: logger.error( "Tried to re-target schema1 manifest with tag `%s` to tag `%s", parsed.tag, tag_name, ) return None except MalformedSchema1Manifest: logger.exception("Could not parse schema1 manifest") return None legacy_image = get_legacy_image_for_manifest(manifest) now_ms = now_ms or get_epoch_timestamp_ms() now_ts = int(now_ms / 1000) with db_transaction(): # Lookup an existing tag in the repository with the same name and, if present, mark it # as expired. existing_tag = get_tag(manifest.repository_id, tag_name) if existing_tag is not None: _, okay = set_tag_end_ms(existing_tag, now_ms) # TODO: should we retry here and/or use a for-update? if not okay: return None # Create a new tag pointing to the manifest with a lifetime start of now. created = Tag.create( name=tag_name, repository=manifest.repository_id, lifetime_start_ms=now_ms, reversion=is_reversion, manifest=manifest, tag_kind=Tag.tag_kind.get_id("tag"), ) # TODO: Remove the linkage code once RepositoryTag is gone. # If this is a schema 1 manifest, then add a TagManifest linkage to it. Otherwise, it will only # be pullable via the new OCI model. if adjust_old_model: if (manifest.media_type.name in DOCKER_SCHEMA1_CONTENT_TYPES and legacy_image is not None): old_style_tag = RepositoryTag.create( repository=manifest.repository_id, image=legacy_image, name=tag_name, lifetime_start_ts=now_ts, reversion=is_reversion, ) TagToRepositoryTag.create(tag=created, repository_tag=old_style_tag, repository=manifest.repository_id) tag_manifest = TagManifest.create( tag=old_style_tag, digest=manifest.digest, json_data=manifest.manifest_bytes) TagManifestToManifest.create(tag_manifest=tag_manifest, manifest=manifest, repository=manifest.repository_id) return created