Exemplo n.º 1
0
def retrieve_manifest(
    image_reference: str,
    credentials_lookup: typing.Callable[[image_reference, oa.Privileges, bool],
                                        oa.OciConfig],
    absent_ok: bool = False,
) -> om.OciImageManifest:
    '''
  retrieves the OCI Artifact manifest for the specified reference, and returns it in a
  deserialised form.
  '''
    client = oc.Client(credentials_lookup=credentials_lookup)
    try:
        return client.manifest(image_reference=image_reference)
    except om.OciImageNotFoundException as oie:
        if absent_ok:
            return None
        raise oie
Exemplo n.º 2
0
def replicate_artifact(
    src_image_reference: str,
    tgt_image_reference: str,
    credentials_lookup: typing.Callable[[image_reference, oa.Privileges, bool],
                                        oa.OciConfig],
):
    '''
    verbatimly replicate the OCI Artifact from src -> tgt without taking any assumptions
    about the transported contents. This in particular allows contents to be replicated
    that are not e.g. "docker-compliant" OCI Images.
    '''
    src_image_reference = ou.normalise_image_reference(src_image_reference)
    tgt_image_reference = ou.normalise_image_reference(tgt_image_reference)

    client = oc.Client(credentials_lookup=credentials_lookup)

    # we need the unaltered - manifest for verbatim replication
    raw_manifest = client.manifest_raw(
        image_reference=src_image_reference, ).text
    manifest = json.loads(raw_manifest)
    schema_version = int(manifest['schemaVersion'])
    if schema_version == 1:
        manifest = dacite.from_dict(data_class=om.OciImageManifestV1,
                                    data=json.loads(raw_manifest))
        manifest = client.manifest(src_image_reference)
    elif schema_version == 2:
        manifest = dacite.from_dict(data_class=om.OciImageManifest,
                                    data=json.loads(raw_manifest))

    for idx, layer in enumerate(manifest.blobs()):
        # need to specially handle manifest (may be absent for v2 / legacy images)
        is_manifest = idx == 0

        blob_res = client.blob(
            image_reference=src_image_reference,
            digest=layer.digest,
            absent_ok=is_manifest,
        )
        if not blob_res:
            # fallback to non-verbatim replication
            # XXX we definitely should _not_ read entire blobs into memory
            # this is done by the used containerregistry lib, so we do not make things worse
            # here - however this must not remain so!
            logger.warning('falling back to non-verbatim replication '
                           '{src_image_reference=} {tgt_image_reference=}')
            with tempfile.NamedTemporaryFile() as tmp_fh:
                retrieve_container_image(
                    image_reference=src_image_reference,
                    credentials_lookup=credentials_lookup,
                    outfileobj=tmp_fh,
                )
                publish_container_image(
                    image_reference=tgt_image_reference,
                    image_file_obj=tmp_fh,
                    credentials_lookup=credentials_lookup,
                )
            return

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
            octets_count=layer.size,
            data=blob_res,
        )

    client.put_manifest(
        image_reference=tgt_image_reference,
        manifest=raw_manifest,
    )
Exemplo n.º 3
0
def replicate_artifact(
        src_image_reference: str,
        tgt_image_reference: str,
        credentials_lookup: oa.credentials_lookup = None,
        routes: oc.OciRoutes = oc.OciRoutes(),
        oci_client: oc.Client = None,
):
    '''
    verbatimly replicate the OCI Artifact from src -> tgt without taking any assumptions
    about the transported contents. This in particular allows contents to be replicated
    that are not e.g. "docker-compliant" OCI Images.
    '''
    if not (bool(credentials_lookup) ^ bool(oci_client)):
        raise ValueError(
            'either credentials-lookup + routes, xor client must be passed')

    src_image_reference = ou.normalise_image_reference(src_image_reference)
    tgt_image_reference = ou.normalise_image_reference(tgt_image_reference)

    if not oci_client:
        client = oc.Client(
            credentials_lookup=credentials_lookup,
            routes=routes,
        )
    else:
        client = oci_client

    # we need the unaltered - manifest for verbatim replication
    raw_manifest = client.manifest_raw(
        image_reference=src_image_reference, ).text
    manifest = json.loads(raw_manifest)
    schema_version = int(manifest['schemaVersion'])
    if schema_version == 1:
        manifest = dacite.from_dict(data_class=om.OciImageManifestV1,
                                    data=json.loads(raw_manifest))
        manifest = client.manifest(src_image_reference)
    elif schema_version == 2:
        manifest = dacite.from_dict(data_class=om.OciImageManifest,
                                    data=json.loads(raw_manifest))

    for idx, layer in enumerate(manifest.blobs()):
        # need to specially handle manifest (may be absent for v2 / legacy images)
        is_manifest = idx == 0

        head_res = client.head_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
        )
        if head_res.ok:
            logger.info(
                f'skipping blob download {layer.digest=} - already exists in tgt'
            )
            continue  # no need to download if blob already exists in tgt

        blob_res = client.blob(
            image_reference=src_image_reference,
            digest=layer.digest,
            absent_ok=is_manifest,
        )
        if not blob_res and is_manifest:
            # fallback to non-verbatim replication; synthesise cfg
            logger.warning('falling back to non-verbatim replication '
                           '{src_image_reference=} {tgt_image_reference=}')

            fake_cfg = od.docker_cfg(
            )  # TODO: check whether we need to pass-in cfg
            fake_cfg_dict = dataclasses.asdict(fake_cfg)
            fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8')

            client.put_blob(
                image_reference=tgt_image_reference,
                digest=f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}',
                octets_count=len(fake_cfg_raw),
                data=fake_cfg_raw,
            )
            continue

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
            octets_count=layer.size,
            data=blob_res,
        )

    client.put_manifest(
        image_reference=tgt_image_reference,
        manifest=raw_manifest,
    )
Exemplo n.º 4
0
    routes = oc.OciRoutes(base_api_lookup)

    install_logging_handler &= ci.util._running_on_ci()
    if install_logging_handler:
        try:
            if oci_request_handler_requirements_fulfilled():
                _add_oci_request_logging_handler_unless_already_registered()
            else:
                logger.warning('skipping oci request logger installation')
        except:
            # do not fail just because of logging-issue
            import traceback
            traceback.print_exc()

    return oc.Client(
        credentials_lookup=credentials_lookup,
        routes=routes,
    )


class _OciRequestHandler(logging.Handler):
    def __init__(
        self,
        level,
        es_client,
        *args,
        **kwargs,
    ) -> None:
        self.es_client = es_client
        super().__init__(level=level, *args, **kwargs)

    def emit(self, record: logging.LogRecord) -> None:
Exemplo n.º 5
0
def oci_client(credentials_lookup=oci_cfg_lookup()):
    return oc.Client(credentials_lookup=credentials_lookup)
Exemplo n.º 6
0
def replicate_artifact(
    src_image_reference: typing.Union[str, om.OciImageReference],
    tgt_image_reference: typing.Union[str, om.OciImageReference],
    credentials_lookup: oa.credentials_lookup=None,
    routes: oc.OciRoutes=oc.OciRoutes(),
    oci_client: oc.Client=None,
    mode: ReplicationMode=ReplicationMode.REGISTRY_DEFAULTS,
    platform_filter: typing.Callable[[om.OciPlatform], bool]=None,
) -> typing.Tuple[requests.Response, str, bytes]:
    '''
    replicate the given OCI Artifact from src_image_reference to tgt_image_reference.

    try to be verbatim, if possible (i.e. target should reflect source as close as
    possible). Whether or not a fully identical replication is possible depends on
    the source artifact and chosen replication `mode`:

    If source artifact is a "legacy / v1" "docker image" (as it used to be
    created from older versions of docker) verbatim replication is not
    possible, because modern (v2) OCI Registries (such as GCR) will not accept
    those manifests. Therefore, conversion to "v2" is required (done
    transparently by this function).

    If source artifact is a "multiarch" image (oci.model.OciImageManifestList), OCI
    registries show different behaviour if ReplicationMode.REGISTRY_DEFAULTS is used.
    Some registries will in this case return a single-image manifest, instead of the
    multiarch-manifest (in this case, the replication result will only be a single-image).

    Use ReplicationMode.PREFER_MULTIARCH or ReplicationMode.NORMALISE_TO_MULTIARCH to
    prevent this.

    If platform_filter is specified (only applied for multi-arch images), the replication
    result will obviously also deviate from src, depending on the filter semantics.

    pass either `credentials_lookup`, `routes`, OR `oci_client`
    '''
    if not (bool(credentials_lookup) ^ bool(oci_client)):
        raise ValueError('either credentials-lookup + routes, xor client must be passed')

    src_image_reference = om.OciImageReference.to_image_ref(src_image_reference)
    tgt_image_reference = om.OciImageReference.to_image_ref(tgt_image_reference)

    if not oci_client:
        client = oc.Client(
            credentials_lookup=credentials_lookup,
            routes=routes,
        )
    else:
        client = oci_client

    if mode is ReplicationMode.REGISTRY_DEFAULTS:
        accept = None
    elif mode is ReplicationMode.PREFER_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    elif mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
        accept = om.MimeTypes.prefer_multiarch
    else:
        raise NotImplementedError(mode)

    # we need the unaltered - manifest for verbatim replication
    raw_manifest = client.manifest_raw(
        image_reference=src_image_reference,
        accept=accept,
    ).text
    manifest = json.loads(raw_manifest)
    schema_version = int(manifest['schemaVersion'])
    need_to_synthesise_cfg_blob = False

    if schema_version == 1:
        need_to_synthesise_cfg_blob = True
        manifest = client.manifest(image_reference=src_image_reference)

        logger.warning(
          f'''
          manifest {src_image_reference=} is in legacy-format
          (schemaVersion==1). Cannot verbatimly replicate
          '''
        )

        manifest, _ = oconv.v1_manifest_to_v2(
            manifest=manifest,
            oci_client=client,
            tgt_image_ref=str(tgt_image_reference),
        )

        # we must determine the uncompressed layer-digests to synthesise a valid
        # cfg-blob docker will accept (this means in particular we must download
        # all layers, even if we do not need to upload them)
        need_uncompressed_layer_digests = True
        uncompressed_layer_digests = []
    elif schema_version == 2:
        manifest = json.loads(raw_manifest)
        media_type = manifest.get('mediaType', om.DOCKER_MANIFEST_SCHEMA_V2_MIME)

        if media_type == om.DOCKER_MANIFEST_LIST_MIME:
            # multi-arch
            manifest = dacite.from_dict(
                data_class=om.OciImageManifestList,
                data=manifest,
            )

            src_ref = om.OciImageReference(image_reference=src_image_reference)
            src_name = src_ref.ref_without_tag
            tgt_ref = om.OciImageReference(image_reference=tgt_image_reference)
            tgt_name = tgt_ref.ref_without_tag

            # try to avoid modifications (from x-serialisation) - unless we have to
            manifest_dirty = False

            # cp manifests to tuple, because we _might_ modify if there is a platform_filter
            for sub_manifest in tuple(manifest.manifests):
                src_reference = f'{src_name}@{sub_manifest.digest}'
                tgt_reference = f'{tgt_name}@{sub_manifest.digest}'

                if platform_filter:
                    platform = op.from_single_image(
                        image_reference=src_reference,
                        oci_client=oci_client,
                        base_platform=sub_manifest.platform,
                    )
                    if not platform_filter(platform):
                        logger.info(f'skipping {platform=} for {src_image_reference=}')
                        manifest_dirty = True
                        manifest.manifests.remove(sub_manifest)
                        continue

                logger.info(f'replicating to {tgt_reference=}')

                replicate_artifact(
                    src_image_reference=src_reference,
                    tgt_image_reference=tgt_reference,
                    oci_client=client,
                )

            if manifest_dirty:
                raw_manifest = json.dumps(manifest.as_dict())

            res = client.put_manifest(
                image_reference=tgt_image_reference,
                manifest=raw_manifest,
            )

            return res, tgt_image_reference, raw_manifest.encode('utf-8')

        elif media_type in (
            om.OCI_MANIFEST_SCHEMA_V2_MIME,
            om.DOCKER_MANIFEST_SCHEMA_V2_MIME,
        ):
            if mode is ReplicationMode.NORMALISE_TO_MULTIARCH:
                if not src_image_reference.has_digest_tag:
                    src_image_reference = om.OciImageReference.to_image_ref(
                        oci_client.to_digest_hash(
                            image_reference=src_image_reference,
                        )
                    )
                platform = op.from_single_image(
                    image_reference=src_image_reference,
                    oci_client=oci_client,
                )
                # force usage of digest-tag (symbolic tag required for manifest-list
                tgt_image_ref = \
                    f'{tgt_image_reference.ref_without_tag}@{src_image_reference.tag}'

                res, ref, manifest_bytes = replicate_artifact(
                    src_image_reference=src_image_reference,
                    tgt_image_reference=tgt_image_ref,
                    oci_client=oci_client,
                )

                manifest_list = om.OciImageManifestList(
                    manifests=[
                        om.OciImageManifestListEntry(
                            digest=f'sha256:{hashlib.sha256(manifest_bytes).hexdigest()}',
                            mediaType=media_type,
                            size=len(manifest_bytes),
                            platform=platform,
                        ),
                    ]
                )

                manifest_list_bytes = json.dumps(
                    manifest_list.as_dict(),
                ).encode('utf-8')

                res = oci_client.put_manifest(
                    image_reference=tgt_image_reference,
                    manifest=manifest_list_bytes,
                )

                return res, tgt_image_reference, manifest_list_bytes

            manifest = dacite.from_dict(
                data_class=om.OciImageManifest,
                data=json.loads(raw_manifest)
            )
            need_uncompressed_layer_digests = False
            uncompressed_layer_digests = None
        else:
            raise NotImplementedError(f'{media_type=}')
    else:
      raise NotImplementedError(schema_version)

    for idx, layer in enumerate(manifest.blobs()):
        # need to specially handle cfg-blob (may be absent for v2 / legacy images)

        is_cfg_blob = idx == 0
        if is_cfg_blob and need_to_synthesise_cfg_blob:
            # if we need(ed) to synthesise cfg-blob (because source-image contained a v1-manifest)
            # then there will never be a cfg-blob in src.
            # -> silently skip to avoid emitting a confusing, but unhelpful warning
            logger.debug(f'{src_image_reference=} - synthesised cfg-blob - skipping replicatation')
            continue

        head_res = client.head_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
        )
        if head_res.ok:
            if not need_uncompressed_layer_digests:
                logger.info(f'skipping blob download {layer.digest=} - already exists in tgt')
                continue # no need to download if blob already exists in tgt
            elif not is_cfg_blob:
                # we will not need to re-upload, however we do need the uncompressed digest
                blob_res = client.blob(
                    image_reference=src_image_reference,
                    digest=layer.digest,
                    absent_ok=is_cfg_blob,
                )

                layer_hash = hashlib.sha256()
                decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

                for chunk in blob_res.iter_content(chunk_size=4096):
                    layer_hash.update(decompressor.decompress(chunk))

                uncompressed_layer_digests.append(f'sha256:{layer_hash.hexdigest()}')
                continue # we may still skip the upload, of course

        # todo: consider silencing warning if we do v1->v2-conversion (cfg-blob will never exist
        #       in this case
        blob_res = client.blob(
            image_reference=src_image_reference,
            digest=layer.digest,
            absent_ok=is_cfg_blob,
        )
        if not blob_res and is_cfg_blob:
            # fallback to non-verbatim replication; synthesise cfg
            logger.warning(
                'falling back to non-verbatim replication '
                f'{src_image_reference=} {tgt_image_reference=}'
            )
            need_to_synthesise_cfg_blob = True
            continue

        if need_uncompressed_layer_digests:
            uncompressed_layer_hash = hashlib.sha256()
            decompressor = zlib.decompressobj(wbits=zlib.MAX_WBITS | 16)

            def intercept_chunks(blob_res):
                for chunk in blob_res.iter_content(chunk_size=4096):
                    uncompressed_layer_hash.update(decompressor.decompress(chunk))
                    yield chunk

                uncompressed_layer_digests.append(f'sha256:{uncompressed_layer_hash.hexdigest()}')

            blob_res = intercept_chunks(blob_res=blob_res)

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=layer.digest,
            octets_count=layer.size,
            data=blob_res,
        )

    if need_to_synthesise_cfg_blob:
        fake_cfg_dict = json.loads(json.loads(raw_manifest)['history'][0]['v1Compatibility'])

        # patch-in uncompressed layer-digests
        fake_cfg_dict['rootfs'] = {
            'diff_ids': uncompressed_layer_digests,
            'type': 'layers',
        }

        fake_cfg_raw = json.dumps(fake_cfg_dict).encode('utf-8')

        client.put_blob(
            image_reference=tgt_image_reference,
            digest=(cfg_digest := f'sha256:{hashlib.sha256(fake_cfg_raw).hexdigest()}'),
            octets_count=len(fake_cfg_raw),
            data=fake_cfg_raw,
        )

        manifest_dict = dataclasses.asdict(manifest)
        # patch-on altered cfg-digest
        manifest_dict['config']['digest'] = cfg_digest
        manifest_dict['config']['size'] = len(fake_cfg_raw)
        raw_manifest = json.dumps(manifest_dict)

    res = client.put_manifest(
        image_reference=tgt_image_reference,
        manifest=raw_manifest,
    )

    return res, tgt_image_reference, raw_manifest.encode('utf-8')