Пример #1
0
    def test_create_artifact_for_s3(self):
        artifact_location = ArtifactLocation.s3(
            bucket="foo",
            endpoint="s3.amazonaws.com",
            insecure=False,
            region="ap-southeast-1",
            access_key_secret={
                "name": "s3-secret",
                "key": "accesskey"
            },
            secret_key_secret=V1SecretKeySelector(name="s3-secret",
                                                  key="secretkey"))
        artifact = ArtifactLocation.create_artifact_for_s3(artifact_location,
                                                           name="foo",
                                                           path="path/to",
                                                           key="key")

        self.assertEqual(artifact.name, "foo")
        self.assertEqual(artifact.path, "path/to")
        self.assertEqual(artifact.s3.endpoint, "s3.amazonaws.com")
        self.assertEqual(artifact.s3.bucket, "foo")
        self.assertEqual(artifact.s3.key, "key")
        self.assertEqual(artifact.s3.access_key_secret.name, "s3-secret")
        self.assertEqual(artifact.s3.access_key_secret.key, "accesskey")
        self.assertEqual(artifact.s3.secret_key_secret.name, "s3-secret")
        self.assertEqual(artifact.s3.secret_key_secret.key, "secretkey")
Пример #2
0
    def test_create_artifact_for_s3_with_dict(self):
        # use the convert_k8s_obj_to_json to mimick the compiler
        artifact_location_dict = convert_k8s_obj_to_json(
            ArtifactLocation.s3(bucket="foo",
                                endpoint="s3.amazonaws.com",
                                insecure=False,
                                region="ap-southeast-1",
                                access_key_secret={
                                    "name": "s3-secret",
                                    "key": "accesskey"
                                },
                                secret_key_secret=V1SecretKeySelector(
                                    name="s3-secret", key="secretkey")))
        artifact = ArtifactLocation.create_artifact_for_s3(
            artifact_location_dict, name="foo", path="path/to", key="key")

        self.assertEqual(artifact.name, "foo")
        self.assertEqual(artifact.path, "path/to")
        self.assertEqual(artifact.s3.endpoint, "s3.amazonaws.com")
        self.assertEqual(artifact.s3.bucket, "foo")
        self.assertEqual(artifact.s3.key, "key")
        self.assertEqual(artifact.s3.access_key_secret.name, "s3-secret")
        self.assertEqual(artifact.s3.access_key_secret.key, "accesskey")
        self.assertEqual(artifact.s3.secret_key_secret.name, "s3-secret")
        self.assertEqual(artifact.s3.secret_key_secret.key, "secretkey")
Пример #3
0
    def test_create_artifact_for_s3_with_default(self):
        # should trigger pending deprecation warning about not having a default
        # artifact_location if artifact_location is not provided.
        artifact = ArtifactLocation.create_artifact_for_s3(None,
                                                           name="foo",
                                                           path="path/to",
                                                           key="key")

        self.assertEqual(artifact.name, "foo")
        self.assertEqual(artifact.path, "path/to")
Пример #4
0
    def test_artifact_location_constructor(self):
        artifact_location = ArtifactLocation.s3(
            bucket="foo",
            endpoint="s3.amazonaws.com",
            insecure=False,
            region="ap-southeast-1",
            access_key_secret={
                "name": "s3-secret",
                "key": "accesskey"
            },
            secret_key_secret=V1SecretKeySelector(name="s3-secret",
                                                  key="secretkey"))

        expected = {
            "bucket": "foo",
            "endpoint": "s3.amazonaws.com",
            "insecure": False,
            "region": "ap-southeast-1",
            "access_key_secret": {
                "name": "s3-secret",
                "key": "accesskey"
            },
            "secret_key_secret": {
                "name": "s3-secret",
                "key": "secretkey"
            }
        }

        self.assertEqual(artifact_location.s3.bucket, "foo")
        self.assertEqual(artifact_location.s3.endpoint, "s3.amazonaws.com")
        self.assertEqual(artifact_location.s3.insecure, False)
        self.assertEqual(artifact_location.s3.region, "ap-southeast-1")
        self.assertEqual(artifact_location.s3.access_key_secret.name,
                         "s3-secret")
        self.assertEqual(artifact_location.s3.access_key_secret.key,
                         "accesskey")
        self.assertEqual(artifact_location.s3.secret_key_secret.name,
                         "s3-secret")
        self.assertEqual(artifact_location.s3.secret_key_secret.key,
                         "secretkey")
Пример #5
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PIPELINETASK/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        if enable_artifacts:
            for output_artifact in output_artifacts:
                output_annotation = pipelinerun_output_artifacts.get(
                    processed_op.name, [])
                output_annotation.append({
                    'name': output_artifact['name'],
                    'path': output_artifact['path']
                })
                pipelinerun_output_artifacts[
                    processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \
            processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \
            json.dumps(op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    return template
Пример #6
0
def _op_to_template(op: BaseOp, enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PODNAME/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        step = {'name': processed_op.name}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # The input artifacts in KFP is not pulling from s3, it will always be passed as a raw input.
        # Visit https://github.com/kubeflow/pipelines/issues/336 for more details on the implementation.
        copy_inputs_step = _get_base_step('copy-inputs')
        for artifact in inputs['artifacts']:
            if 'raw' in artifact:
                copy_inputs_step['script'] += 'echo -n "%s" > %s\n' % (
                    artifact['raw']['data'], artifact['path'])
            mount_path = artifact['path'].rsplit("/", 1)[0]
            if mount_path not in mounted_param_paths:
                _add_mount_path(artifact['name'], artifact['path'], mount_path,
                                volume_mount_step_template, volume_template,
                                mounted_param_paths)
        template['spec']['steps'] = _prepend_steps([copy_inputs_step],
                                                   template['spec']['steps'])
        _update_volumes(template, volume_mount_step_template, volume_template)

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = processed_op.pod_annotations
        if processed_op.pod_labels:
            template['metadata']['labels'] = processed_op.pod_labels

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault(
            'annotations', {}
        )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault(
            'annotations',
            {})['pipelines.kubeflow.org/component_spec'] = json.dumps(
                op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault(
                'annotations',
                {})['pipelines.kubeflow.org/max_cache_staleness'] = str(
                    op.execution_options.caching_strategy.max_cache_staleness)

    return template