def retrieve_manifest( image_reference: str, credentials_lookup: typing.Callable[[image_reference, oa.Privileges, bool], oa.OciConfig], absent_ok: bool = False, ) -> om.OciImageManifest: ''' retrieves the OCI Artifact manifest for the specified reference, and returns it in a deserialised form. ''' client = oc.Client(credentials_lookup=credentials_lookup) try: return client.manifest(image_reference=image_reference) except om.OciImageNotFoundException as oie: if absent_ok: return None raise oie
def replicate_artifact( src_image_reference: str, tgt_image_reference: str, credentials_lookup: typing.Callable[[image_reference, oa.Privileges, bool], oa.OciConfig], ): ''' verbatimly replicate the OCI Artifact from src -> tgt without taking any assumptions about the transported contents. This in particular allows contents to be replicated that are not e.g. "docker-compliant" OCI Images. ''' src_image_reference = ou.normalise_image_reference(src_image_reference) tgt_image_reference = ou.normalise_image_reference(tgt_image_reference) client = oc.Client(credentials_lookup=credentials_lookup) # we need the unaltered - manifest for verbatim replication raw_manifest = client.manifest_raw( image_reference=src_image_reference, ).text manifest = json.loads(raw_manifest) schema_version = int(manifest['schemaVersion']) if schema_version == 1: manifest = dacite.from_dict(data_class=om.OciImageManifestV1, data=json.loads(raw_manifest)) manifest = client.manifest(src_image_reference) elif schema_version == 2: manifest = dacite.from_dict(data_class=om.OciImageManifest, data=json.loads(raw_manifest)) for idx, layer in enumerate(manifest.blobs()): # need to specially handle manifest (may be absent for v2 / legacy images) is_manifest = idx == 0 blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_manifest, ) if not blob_res: # fallback to non-verbatim replication # XXX we definitely should _not_ read entire blobs into memory # this is done by the used containerregistry lib, so we do not make things worse # here - however this must not remain so! logger.warning('falling back to non-verbatim replication ' '{src_image_reference=} {tgt_image_reference=}') with tempfile.NamedTemporaryFile() as tmp_fh: retrieve_container_image( image_reference=src_image_reference, credentials_lookup=credentials_lookup, outfileobj=tmp_fh, ) publish_container_image( image_reference=tgt_image_reference, image_file_obj=tmp_fh, credentials_lookup=credentials_lookup, ) return client.put_blob( image_reference=tgt_image_reference, digest=layer.digest, octets_count=layer.size, data=blob_res, ) client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, )
def replicate_artifact( src_image_reference: str, tgt_image_reference: str, credentials_lookup: oa.credentials_lookup = None, routes: oc.OciRoutes = oc.OciRoutes(), oci_client: oc.Client = None, ): ''' verbatimly replicate the OCI Artifact from src -> tgt without taking any assumptions about the transported contents. This in particular allows contents to be replicated that are not e.g. "docker-compliant" OCI Images. ''' if not (bool(credentials_lookup) ^ bool(oci_client)): raise ValueError( 'either credentials-lookup + routes, xor client must be passed') src_image_reference = ou.normalise_image_reference(src_image_reference) tgt_image_reference = ou.normalise_image_reference(tgt_image_reference) if not oci_client: client = oc.Client( credentials_lookup=credentials_lookup, routes=routes, ) else: client = oci_client # we need the unaltered - manifest for verbatim replication raw_manifest = client.manifest_raw( image_reference=src_image_reference, ).text manifest = json.loads(raw_manifest) schema_version = int(manifest['schemaVersion']) if schema_version == 1: manifest = dacite.from_dict(data_class=om.OciImageManifestV1, data=json.loads(raw_manifest)) manifest = client.manifest(src_image_reference) elif schema_version == 2: manifest = dacite.from_dict(data_class=om.OciImageManifest, data=json.loads(raw_manifest)) for idx, layer in enumerate(manifest.blobs()): # need to specially handle manifest (may be absent for v2 / legacy images) is_manifest = idx == 0 head_res = client.head_blob( image_reference=tgt_image_reference, digest=layer.digest, ) if head_res.ok: logger.info( f'skipping blob download {layer.digest=} - already exists in tgt' ) continue # no need to download if blob already exists in tgt blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_manifest, ) if not blob_res and is_manifest: # fallback to non-verbatim replication; synthesise cfg logger.warning('falling back to non-verbatim replication ' '{src_image_reference=} {tgt_image_reference=}') fake_cfg = od.docker_cfg( ) # TODO: check whether we need to pass-in cfg fake_cfg_dict = dataclasses.asdict(fake_cfg) fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8') client.put_blob( image_reference=tgt_image_reference, digest=f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}', octets_count=len(fake_cfg_raw), data=fake_cfg_raw, ) continue client.put_blob( image_reference=tgt_image_reference, digest=layer.digest, octets_count=layer.size, data=blob_res, ) client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, )
routes = oc.OciRoutes(base_api_lookup) install_logging_handler &= ci.util._running_on_ci() if install_logging_handler: try: if oci_request_handler_requirements_fulfilled(): _add_oci_request_logging_handler_unless_already_registered() else: logger.warning('skipping oci request logger installation') except: # do not fail just because of logging-issue import traceback traceback.print_exc() return oc.Client( credentials_lookup=credentials_lookup, routes=routes, ) class _OciRequestHandler(logging.Handler): def __init__( self, level, es_client, *args, **kwargs, ) -> None: self.es_client = es_client super().__init__(level=level, *args, **kwargs) def emit(self, record: logging.LogRecord) -> None:
def oci_client(credentials_lookup=oci_cfg_lookup()): return oc.Client(credentials_lookup=credentials_lookup)
def replicate_artifact( src_image_reference: typing.Union[str, om.OciImageReference], tgt_image_reference: typing.Union[str, om.OciImageReference], credentials_lookup: oa.credentials_lookup=None, routes: oc.OciRoutes=oc.OciRoutes(), oci_client: oc.Client=None, mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS, platform_filter: typing.Callable[[om.OciPlatform], bool]=None, ) -> typing.Tuple[requests.Response, str, bytes]: ''' replicate the given OCI Artifact from src_image_reference to tgt_image_reference. try to be verbatim, if possible (i.e. target should reflect source as close as possible). Whether or not a fully identical replication is possible depends on the source artifact and chosen replication `mode`: If source artifact is a "legacy / v1" "docker image" (as it used to be created from older versions of docker) verbatim replication is not possible, because modern (v2) OCI Registries (such as GCR) will not accept those manifests. Therefore, conversion to "v2" is required (done transparently by this function). If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used. Some registries will in this case return a single-image manifest, instead of the multiarch-manifest (in this case, the replication result will only be a single-image). Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to prevent this. If platform_filter is specified (only applied for multi-arch images), the replication result will obviously also deviate from src, depending on the filter semantics. pass either `credentials_lookup`, `routes`, OR `oci_client` ''' if not (bool(credentials_lookup) ^ bool(oci_client)): raise ValueError('either credentials-lookup + routes, xor client must be passed') src_image_reference = om.OciImageReference.to_image_ref(src_image_reference) tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference) if not oci_client: client = oc.Client( credentials_lookup=credentials_lookup, routes=routes, ) else: client = oci_client if mode is ReplicationMode.REGISTRY_DEFAULTS: accept = None elif mode is ReplicationMode.PREFER_MULTIARCH: accept = om.MimeTypes.prefer_multiarch elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH: accept = om.MimeTypes.prefer_multiarch else: raise NotImplementedError(mode) # we need the unaltered - manifest for verbatim replication raw_manifest = client.manifest_raw( image_reference=src_image_reference, accept=accept, ).text manifest = json.loads(raw_manifest) schema_version = int(manifest['schemaVersion']) need_to_synthesise_cfg_blob = False if schema_version == 1: need_to_synthesise_cfg_blob = True manifest = client.manifest(image_reference=src_image_reference) logger.warning( f''' manifest {src_image_reference=} is in legacy-format (schemaVersion==1). Cannot verbatimly replicate ''' ) manifest, _ = oconv.v1_manifest_to_v2( manifest=manifest, oci_client=client, tgt_image_ref=str(tgt_image_reference), ) # we must determine the uncompressed layer-digests to synthesise a valid # cfg-blob docker will accept (this means in particular we must download # all layers, even if we do not need to upload them) need_uncompressed_layer_digests = True uncompressed_layer_digests = [] elif schema_version == 2: manifest = json.loads(raw_manifest) media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME) if media_type == om.DOCKER_MANIFEST_LIST_MIME: # multi-arch manifest = dacite.from_dict( data_class=om.OciImageManifestList, data=manifest, ) src_ref = om.OciImageReference(image_reference=src_image_reference) src_name = src_ref.ref_without_tag tgt_ref = om.OciImageReference(image_reference=tgt_image_reference) tgt_name = tgt_ref.ref_without_tag # try to avoid modifications (from x-serialisation) - unless we have to manifest_dirty = False # cp manifests to tuple, because we _might_ modify if there is a platform_filter for sub_manifest in tuple(manifest.manifests): src_reference = f'{src_name}@{sub_manifest.digest}' tgt_reference = f'{tgt_name}@{sub_manifest.digest}' if platform_filter: platform = op.from_single_image( image_reference=src_reference, oci_client=oci_client, base_platform=sub_manifest.platform, ) if not platform_filter(platform): logger.info(f'skipping {platform=} for {src_image_reference=}') manifest_dirty = True manifest.manifests.remove(sub_manifest) continue logger.info(f'replicating to {tgt_reference=}') replicate_artifact( src_image_reference=src_reference, tgt_image_reference=tgt_reference, oci_client=client, ) if manifest_dirty: raw_manifest = json.dumps(manifest.as_dict()) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8') elif media_type in ( om.OCI_MANIFEST_SCHEMA_V2_MIME, om.DOCKER_MANIFEST_SCHEMA_V2_MIME, ): if mode is ReplicationMode.NORMALISE_TO_MULTIARCH: if not src_image_reference.has_digest_tag: src_image_reference = om.OciImageReference.to_image_ref( oci_client.to_digest_hash( image_reference=src_image_reference, ) ) platform = op.from_single_image( image_reference=src_image_reference, oci_client=oci_client, ) # force usage of digest-tag (symbolic tag required for manifest-list tgt_image_ref = \ f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}' res, ref, manifest_bytes = replicate_artifact( src_image_reference=src_image_reference, tgt_image_reference=tgt_image_ref, oci_client=oci_client, ) manifest_list = om.OciImageManifestList( manifests=[ om.OciImageManifestListEntry( digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}', mediaType=media_type, size=len(manifest_bytes), platform=platform, ), ] ) manifest_list_bytes = json.dumps( manifest_list.as_dict(), ).encode('utf-8') res = oci_client.put_manifest( image_reference=tgt_image_reference, manifest=manifest_list_bytes, ) return res, tgt_image_reference, manifest_list_bytes manifest = dacite.from_dict( data_class=om.OciImageManifest, data=json.loads(raw_manifest) ) need_uncompressed_layer_digests = False uncompressed_layer_digests = None else: raise NotImplementedError(f'{media_type=}') else: raise NotImplementedError(schema_version) for idx, layer in enumerate(manifest.blobs()): # need to specially handle cfg-blob (may be absent for v2 / legacy images) is_cfg_blob = idx == 0 if is_cfg_blob and need_to_synthesise_cfg_blob: # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest) # then there will never be a cfg-blob in src. # -> silently skip to avoid emitting a confusing, but unhelpful warning logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation') continue head_res = client.head_blob( image_reference=tgt_image_reference, digest=layer.digest, ) if head_res.ok: if not need_uncompressed_layer_digests: logger.info(f'skipping blob download {layer.digest=} - already exists in tgt') continue # no need to download if blob already exists in tgt elif not is_cfg_blob: # we will not need to re-upload, however we do need the uncompressed digest blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) for chunk in blob_res.iter_content(chunk_size=4096): layer_hash.update(decompressor.decompress(chunk)) uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}') continue # we may still skip the upload, of course # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist # in this case blob_res = client.blob( image_reference=src_image_reference, digest=layer.digest, absent_ok=is_cfg_blob, ) if not blob_res and is_cfg_blob: # fallback to non-verbatim replication; synthesise cfg logger.warning( 'falling back to non-verbatim replication ' f'{src_image_reference=} {tgt_image_reference=}' ) need_to_synthesise_cfg_blob = True continue if need_uncompressed_layer_digests: uncompressed_layer_hash = hashlib.sha256() decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16) def intercept_chunks(blob_res): for chunk in blob_res.iter_content(chunk_size=4096): uncompressed_layer_hash.update(decompressor.decompress(chunk)) yield chunk uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}') blob_res = intercept_chunks(blob_res=blob_res) client.put_blob( image_reference=tgt_image_reference, digest=layer.digest, octets_count=layer.size, data=blob_res, ) if need_to_synthesise_cfg_blob: fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility']) # patch-in uncompressed layer-digests fake_cfg_dict['rootfs'] = { 'diff_ids': uncompressed_layer_digests, 'type': 'layers', } fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8') client.put_blob( image_reference=tgt_image_reference, digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'), octets_count=len(fake_cfg_raw), data=fake_cfg_raw, ) manifest_dict = dataclasses.asdict(manifest) # patch-on altered cfg-digest manifest_dict['config']['digest'] = cfg_digest manifest_dict['config']['size'] = len(fake_cfg_raw) raw_manifest = json.dumps(manifest_dict) res = client.put_manifest( image_reference=tgt_image_reference, manifest=raw_manifest, ) return res, tgt_image_reference, raw_manifest.encode('utf-8')