def test_create_artifact_for_s3(self): artifact_location = ArtifactLocation.s3( bucket="foo", endpoint="s3.amazonaws.com", insecure=False, region="ap-southeast-1", access_key_secret={ "name": "s3-secret", "key": "accesskey" }, secret_key_secret=V1SecretKeySelector(name="s3-secret", key="secretkey")) artifact = ArtifactLocation.create_artifact_for_s3(artifact_location, name="foo", path="path/to", key="key") self.assertEqual(artifact.name, "foo") self.assertEqual(artifact.path, "path/to") self.assertEqual(artifact.s3.endpoint, "s3.amazonaws.com") self.assertEqual(artifact.s3.bucket, "foo") self.assertEqual(artifact.s3.key, "key") self.assertEqual(artifact.s3.access_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.access_key_secret.key, "accesskey") self.assertEqual(artifact.s3.secret_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.secret_key_secret.key, "secretkey")
def test_create_artifact_for_s3_with_dict(self): # use the convert_k8s_obj_to_json to mimick the compiler artifact_location_dict = convert_k8s_obj_to_json( ArtifactLocation.s3(bucket="foo", endpoint="s3.amazonaws.com", insecure=False, region="ap-southeast-1", access_key_secret={ "name": "s3-secret", "key": "accesskey" }, secret_key_secret=V1SecretKeySelector( name="s3-secret", key="secretkey"))) artifact = ArtifactLocation.create_artifact_for_s3( artifact_location_dict, name="foo", path="path/to", key="key") self.assertEqual(artifact.name, "foo") self.assertEqual(artifact.path, "path/to") self.assertEqual(artifact.s3.endpoint, "s3.amazonaws.com") self.assertEqual(artifact.s3.bucket, "foo") self.assertEqual(artifact.s3.key, "key") self.assertEqual(artifact.s3.access_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.access_key_secret.key, "accesskey") self.assertEqual(artifact.s3.secret_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.secret_key_secret.key, "secretkey")
def test_create_artifact_for_s3_with_default(self): # should trigger pending deprecation warning about not having a default # artifact_location if artifact_location is not provided. artifact = ArtifactLocation.create_artifact_for_s3(None, name="foo", path="path/to", key="key") self.assertEqual(artifact.name, "foo") self.assertEqual(artifact.path, "path/to")
def test_artifact_location_constructor(self): artifact_location = ArtifactLocation.s3( bucket="foo", endpoint="s3.amazonaws.com", insecure=False, region="ap-southeast-1", access_key_secret={ "name": "s3-secret", "key": "accesskey" }, secret_key_secret=V1SecretKeySelector(name="s3-secret", key="secretkey")) expected = { "bucket": "foo", "endpoint": "s3.amazonaws.com", "insecure": False, "region": "ap-southeast-1", "access_key_secret": { "name": "s3-secret", "key": "accesskey" }, "secret_key_secret": { "name": "s3-secret", "key": "secretkey" } } self.assertEqual(artifact_location.s3.bucket, "foo") self.assertEqual(artifact_location.s3.endpoint, "s3.amazonaws.com") self.assertEqual(artifact_location.s3.insecure, False) self.assertEqual(artifact_location.s3.region, "ap-southeast-1") self.assertEqual(artifact_location.s3.access_key_secret.name, "s3-secret") self.assertEqual(artifact_location.s3.access_key_secret.key, "accesskey") self.assertEqual(artifact_location.s3.secret_key_secret.name, "s3-secret") self.assertEqual(artifact_location.s3.secret_key_secret.key, "secretkey")
def _op_to_template(op: BaseOp, pipelinerun_output_artifacts={}, enable_artifacts=False): """Generate template given an operator inherited from BaseOp.""" # initial local variables for tracking volumes and artifacts volume_mount_step_template = [] volume_template = [] mounted_param_paths = [] replaced_param_list = [] artifact_to_result_mapping = {} # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), # but the _outputs_to_json function changes the output names and we must do the same here, # so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ convert_k8s_obj_to_json( ArtifactLocation.create_artifact_for_s3( op.artifact_location, name=name, path=path, key='runs/$PIPELINERUN/$PIPELINETASK/' + name)) for name, path in output_artifact_paths.items() ] if enable_artifacts else [] # workflow template container = convert_k8s_obj_to_json(processed_op.container) # Calling containerOp step as "main" to align with Argo step = {'name': "main"} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } # Create output artifact tracking annotation. if enable_artifacts: for output_artifact in output_artifacts: output_annotation = pipelinerun_output_artifacts.get( processed_op.name, []) output_annotation.append({ 'name': output_artifact['name'], 'path': output_artifact['path'] }) pipelinerun_output_artifacts[ processed_op.name] = output_annotation elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # Flatten manifest because it needs to replace Argo variables manifest = yaml.dump(convert_k8s_obj_to_json( processed_op.k8s_resource), default_flow_style=False) argo_var = False if manifest.find('{{workflow.name}}') != -1: # Kubernetes Pod arguments only take $() as environment variables manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)") # Remove yaml quote in order to read bash variables manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest) argo_var = True # task template template = _get_resourceOp_template(op, processed_op.name, tekton_api_version, manifest, argo_var=argo_var) # initContainers if processed_op.init_containers: template['spec']['steps'] = _prepend_steps( processed_op.init_containers, template['spec']['steps']) # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) if 'artifacts' in inputs: # Leave artifacts for big data passing template['spec']['artifacts'] = inputs['artifacts'] # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) if outputs_dict: copy_results_step = _process_parameters( processed_op, template, outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, mounted_param_paths) copy_artifacts_step = _process_output_artifacts( outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping) if mounted_param_paths: template['spec']['steps'].append(copy_results_step) _update_volumes(template, volume_mount_step_template, volume_template) if copy_artifacts_step: template['spec']['steps'].append(copy_artifacts_step) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): value for key, value in processed_op.pod_annotations.items() } if processed_op.pod_labels: template['metadata']['labels'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): sanitize_k8s_name(value, allow_capital_underscore=True, allow_dot=True) for key, value in processed_op.pod_labels.items() } # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volume', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \ processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \ json.dumps(op._metadata.to_dict(), sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \ str(op.execution_options.caching_strategy.max_cache_staleness) return template
def _op_to_template(op: BaseOp, enable_artifacts=False): """Generate template given an operator inherited from BaseOp.""" # initial local variables for tracking volumes and artifacts volume_mount_step_template = [] volume_template = [] mounted_param_paths = [] replaced_param_list = [] artifact_to_result_mapping = {} # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), # but the _outputs_to_json function changes the output names and we must do the same here, # so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ convert_k8s_obj_to_json( ArtifactLocation.create_artifact_for_s3( op.artifact_location, name=name, path=path, key='runs/$PIPELINERUN/$PODNAME/' + name)) for name, path in output_artifact_paths.items() ] if enable_artifacts else [] # workflow template container = convert_k8s_obj_to_json(processed_op.container) step = {'name': processed_op.name} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # Flatten manifest because it needs to replace Argo variables manifest = yaml.dump(convert_k8s_obj_to_json( processed_op.k8s_resource), default_flow_style=False) argo_var = False if manifest.find('{{workflow.name}}') != -1: # Kubernetes Pod arguments only take $() as environment variables manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)") # Remove yaml quote in order to read bash variables manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest) argo_var = True # task template template = _get_resourceOp_template(op, processed_op.name, tekton_api_version, manifest, argo_var=argo_var) # initContainers if processed_op.init_containers: template['spec']['steps'] = _prepend_steps( processed_op.init_containers, template['spec']['steps']) # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) if 'artifacts' in inputs: # The input artifacts in KFP is not pulling from s3, it will always be passed as a raw input. # Visit https://github.com/kubeflow/pipelines/issues/336 for more details on the implementation. copy_inputs_step = _get_base_step('copy-inputs') for artifact in inputs['artifacts']: if 'raw' in artifact: copy_inputs_step['script'] += 'echo -n "%s" > %s\n' % ( artifact['raw']['data'], artifact['path']) mount_path = artifact['path'].rsplit("/", 1)[0] if mount_path not in mounted_param_paths: _add_mount_path(artifact['name'], artifact['path'], mount_path, volume_mount_step_template, volume_template, mounted_param_paths) template['spec']['steps'] = _prepend_steps([copy_inputs_step], template['spec']['steps']) _update_volumes(template, volume_mount_step_template, volume_template) # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) if outputs_dict: copy_results_step = _process_parameters( processed_op, template, outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, mounted_param_paths) copy_artifacts_step = _process_output_artifacts( outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping) if mounted_param_paths: template['spec']['steps'].append(copy_results_step) _update_volumes(template, volume_mount_step_template, volume_template) if copy_artifacts_step: template['spec']['steps'].append(copy_artifacts_step) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = processed_op.pod_annotations if processed_op.pod_labels: template['metadata']['labels'] = processed_op.pod_labels # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volume', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault( 'annotations', {} )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps( op._metadata.to_dict(), sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = str( op.execution_options.caching_strategy.max_cache_staleness) return template