def _get_copy_result_step_template(step_number: int, result_maps: list): """Base copy result step for moving Tekton result files around. Return a copy result step for moving Tekton result files around. Args: step_number {int}: step number result_maps {list}: list of maps bucketed with the result groups Returns: Dict[Text, Any] """ args = [""] for key in result_maps[step_number].keys(): sanitize_key = sanitize_k8s_name(key) args[0] += "mv %s%s $(results.%s.path);\n" % ( TEKTON_HOME_RESULT_PATH, sanitize_key, sanitize_key) if step_number > 0: for key in result_maps[step_number - 1].keys(): sanitize_key = sanitize_k8s_name(key) args[0] += "mv $(results.%s.path) %s%s;\n" % ( sanitize_key, TEKTON_HOME_RESULT_PATH, sanitize_key) return { "name": "copy-results-%s" % str(step_number), "args": args, "command": ["sh", "-c"], "image": TEKTON_COPY_RESULTS_STEP_IMAGE }
def test_sanitize_k8s_labels(self): labels = { "my.favorite/hobby": "Hobby? Passion! Football. Go to https://www.fifa.com/", "My other hobbies?": "eating; drinking. sleeping ;-)" } expected_labels = { "my.favorite/hobby": "Hobby-Passion-Football.-Go-to-https-www.fifa.com", "My-other-hobbies": "eating-drinking.-sleeping" } self.assertEqual( list( map( lambda k: sanitize_k8s_name(k, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253), labels.keys())), list(expected_labels.keys())) self.assertEqual( list( map( lambda v: sanitize_k8s_name(v, allow_capital_underscore=True, allow_dot=True, allow_slash=False, max_length=63), labels.values())), list(expected_labels.values()))
def processOperand(operand) -> (str, str): if isinstance(operand, dsl.PipelineParam): return "results_" + sanitize_k8s_name(operand.op_name) + "_" + sanitize_k8s_name(operand.name), operand.op_name else: # Do the same as in _get_super_condition_template to check whehter it's int try: operand = int(operand) except: operand = '\'' + str(operand) + '\'' return operand, None
def _process_output_artifacts(outputs_dict: Dict[Text, Any], volume_mount_step_template: List[Dict[Text, Any]], volume_template: List[Dict[Text, Any]], replaced_param_list: List[Text], artifact_to_result_mapping: Dict[Text, Any], artifact_items: List[Any]): """Process output artifact dependencies to replicate the same behavior as Argo. For storing artifacts, we will need to provide the output artifact dependencies for the server to find and store the artifacts with the proper metadata. Args: outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes replaced_param_list {List[Text]}: List of parameters that already set up as results artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results Returns: Dict[Text, Any] """ if outputs_dict.get('artifacts'): mounted_artifact_paths = [] for artifact in outputs_dict['artifacts']: artifact_name = artifact_to_result_mapping.get( artifact['name'], artifact['name']) if artifact['name'] in replaced_param_list: artifact_items.append([ artifact_name, "$(results.%s.path)" % sanitize_k8s_name(artifact_name) ]) else: artifact_items.append([artifact_name, artifact['path']]) if artifact['path'].rsplit("/", 1)[0] not in mounted_artifact_paths: if artifact['path'].rsplit("/", 1)[0] == "": raise ValueError( 'Undefined volume path or "/" path artifacts are not allowed.' ) volume_mount_step_template.append({ 'name': sanitize_k8s_name(artifact['name']), 'mountPath': artifact['path'].rsplit("/", 1)[0] }) volume_template.append({ 'name': sanitize_k8s_name(artifact['name']), 'emptyDir': {} }) mounted_artifact_paths.append(artifact['path'].rsplit( "/", 1)[0])
def _add_mount_path(name: str, path: str, mount_path: str, volume_mount_step_template: List[Dict[Text, Any]], volume_template: List[Dict[Text, Any]], mounted_param_paths: List[Text]): """ Add emptyDir to the given mount_path for persisting files within the same tasks """ volume_mount_step_template.append({'name': sanitize_k8s_name(name), 'mountPath': path.rsplit("/", 1)[0]}) volume_template.append({'name': sanitize_k8s_name(name), 'emptyDir': {}}) mounted_param_paths.append(mount_path)
def generate_id(name: str = None, length: int = 36) -> str: if name: # return name.lower().replace(" ", "-").replace("---", "-").replace("-–-", "–") return sanitize_k8s_name(name) else: # return ''.join([choice(ascii_letters + digits + '-') for n in range(length)]) return ''.join([choice(hexdigits) for n in range(length)]).lower()
def __init__( self, any: Iterable[Union[dsl.ContainerOp, ConditionOperator]], name: str = None, ): arguments = [ "--namespace", "$(context.pipelineRun.namespace)", "--prName", "$(context.pipelineRun.name)" ] tasks_list = [] condition_list = [] for cop in any: if isinstance(cop, dsl.ContainerOp): cop_name = sanitize_k8s_name(cop.name) tasks_list.append(cop_name) elif isinstance(cop, ConditionOperator): condition_list.append(cop) if len(tasks_list) > 0: task_list_str = ",".join(tasks_list) arguments.extend(["--taskList", task_list_str]) conditonArgs = processConditionArgs(condition_list) arguments.extend(conditonArgs) super().__init__( name=name, image=ANY_SEQUENCER_IMAGE, command="any-taskrun", arguments=arguments, )
def test_sanitize_k8s_name_max_length(self): from string import ascii_lowercase, ascii_uppercase, digits, punctuation names = [ "short-name with under_score and spaces", "very long name".replace("o", "o" * 300), digits + ascii_uppercase + punctuation + digits ] expected_names = [ "short-name-with-under-score-and-spaces", "very-long-name".replace("o", "o" * 300), digits + ascii_lowercase + "-" + digits ] self.assertEqual([sanitize_k8s_name(name) for name in names], [name[:63] for name in expected_names]) self.assertEqual( [sanitize_k8s_name(sanitize_k8s_name(name)) for name in names], [name[:63] for name in expected_names])
def test_sanitize_k8s_annotations(self): annotation_keys = { "sidecar.istio.io/inject", } expected_k8s_annotation_keys = { "sidecar.istio.io/inject", } self.assertEqual( [sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253) for key in annotation_keys], [key[:253] for key in expected_k8s_annotation_keys])
def _sanitize_and_inject_artifact(self, pipeline: dsl.Pipeline, pipeline_conf=None): """Sanitize operator/param names and inject pipeline artifact location.""" # Sanitize operator names and param names sanitized_ops = {} # pipeline level artifact location artifact_location = pipeline_conf.artifact_location for op in pipeline.ops.values(): # inject pipeline level artifact location into if the op does not have # an artifact location config already. if hasattr(op, "artifact_location"): if artifact_location and not op.artifact_location: op.artifact_location = artifact_location sanitized_name = sanitize_k8s_name(op.name) op.name = sanitized_name for param in op.outputs.values(): param.name = sanitize_k8s_name(param.name, True) if param.op_name: param.op_name = sanitize_k8s_name(param.op_name) if op.output is not None and not isinstance( op.output, dsl._container_op._MultipleOutputsError): op.output.name = sanitize_k8s_name(op.output.name, True) op.output.op_name = sanitize_k8s_name(op.output.op_name) if op.dependent_names: op.dependent_names = [ sanitize_k8s_name(name) for name in op.dependent_names ] if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None: sanitized_file_outputs = {} for key in op.file_outputs.keys(): sanitized_file_outputs[sanitize_k8s_name( key, True)] = op.file_outputs[key] op.file_outputs = sanitized_file_outputs elif isinstance( op, dsl.ResourceOp) and op.attribute_outputs is not None: sanitized_attribute_outputs = {} for key in op.attribute_outputs.keys(): sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \ op.attribute_outputs[key] op.attribute_outputs = sanitized_attribute_outputs sanitized_ops[sanitized_name] = op pipeline.ops = sanitized_ops
def after_any(container_ops: List[dsl.ContainerOp]): ''' The function add a flag for any condition handler. ''' tasks_list = [] for cop in container_ops: cop_name = sanitize_k8s_name(cop.name) tasks_list.append(cop_name) task_list_str = ",".join(tasks_list) def _after_components(cop): cop.any_sequencer = {"tasks_list": task_list_str} return cop return _after_components
def __init__(self, any: Iterable[Union[dsl.ContainerOp, ConditionOperator]], name: str = None, statusPath: str = None, skippingPolicy: str = None, errorPolicy: str = None, image: str = ANY_SEQUENCER_IMAGE): arguments = [ "--namespace", "$(context.pipelineRun.namespace)", "--prName", "$(context.pipelineRun.name)" ] tasks_list = [] condition_list = [] file_outputs = None for cop in any: if isinstance(cop, dsl.ContainerOp): cop_name = sanitize_k8s_name(cop.name) tasks_list.append(cop_name) elif isinstance(cop, ConditionOperator): condition_list.append(cop) if len(tasks_list) > 0: task_list_str = ",".join(tasks_list) arguments.extend(["--taskList", task_list_str]) if statusPath is not None: file_outputs = {"status": statusPath} arguments.extend(["--statusPath", statusPath]) if skippingPolicy is not None: assert skippingPolicy == "skipOnNoMatch" or skippingPolicy == "errorOnNoMatch" arguments.extend(["--skippingPolicy", skippingPolicy]) if errorPolicy is not None: assert errorPolicy == "continueOnError" or errorPolicy == "failOnError" arguments.extend(["--errorPolicy", errorPolicy]) conditonArgs = processConditionArgs(condition_list) arguments.extend(conditonArgs) super().__init__( name=name, image=image, file_outputs=file_outputs, command="any-task", arguments=arguments, )
def _sanitize_and_inject_artifact(self, pipeline: dsl.Pipeline, pipeline_conf=None): """Sanitize operator/param names and inject pipeline artifact location.""" # Sanitize operator names and param names sanitized_ops = {} for op in pipeline.ops.values(): sanitized_name = sanitize_k8s_name(op.name) op.name = sanitized_name for param in op.outputs.values(): param.name = sanitize_k8s_name(param.name, True) if param.op_name: param.op_name = sanitize_k8s_name(param.op_name) if op.output is not None and not isinstance( op.output, dsl._container_op._MultipleOutputsError): op.output.name = sanitize_k8s_name(op.output.name, True) op.output.op_name = sanitize_k8s_name(op.output.op_name) if op.dependent_names: op.dependent_names = [ sanitize_k8s_name(name) for name in op.dependent_names ] if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None: sanitized_file_outputs = {} for key in op.file_outputs.keys(): sanitized_file_outputs[sanitize_k8s_name( key, True)] = op.file_outputs[key] op.file_outputs = sanitized_file_outputs elif isinstance( op, dsl.ResourceOp) and op.attribute_outputs is not None: sanitized_attribute_outputs = {} for key in op.attribute_outputs.keys(): sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \ op.attribute_outputs[key] op.attribute_outputs = sanitized_attribute_outputs if isinstance(op, dsl.ContainerOp) and op.container is not None: sanitize_k8s_object(op.container) sanitized_ops[sanitized_name] = op pipeline.ops = sanitized_ops
def __init__( self, any: List[dsl.ContainerOp], name: str = None, ): tasks_list = [] for cop in any: cop_name = sanitize_k8s_name(cop.name) tasks_list.append(cop_name) task_list_str = ",".join(tasks_list) super().__init__( name=name, image="dspipelines/any-sequencer:latest", command="any-taskrun", arguments=[ "-namespace", "$(context.pipelineRun.namespace)", "-prName", "$(context.pipelineRun.name)", "-taskList", task_list_str ], )
def _group_to_dag_template(self, group, inputs, outputs, dependencies): """Generate template given an OpsGroup. inputs, outputs, dependencies are all helper dicts. """ # Generate GroupOp template sub_group = group template = { 'apiVersion': tekton_api_version, 'metadata': { 'name': sanitize_k8s_name(sub_group.name), }, 'spec': {} } # Generates a pseudo-template unique to conditions due to the catalog condition approach # where every condition is an extension of one super-condition if isinstance(sub_group, dsl.OpsGroup) and sub_group.type == 'condition': subgroup_inputs = inputs.get(sub_group.name, []) condition = sub_group.condition operand1_value = self._resolve_value_or_reference( condition.operand1, subgroup_inputs) operand2_value = self._resolve_value_or_reference( condition.operand2, subgroup_inputs) template['kind'] = 'Condition' template['spec']['params'] = [{ 'name': 'operand1', 'value': operand1_value }, { 'name': 'operand2', 'value': operand2_value }, { 'name': 'operator', 'value': str(condition.operator) }] return template
def _handle_tekton_custom_task(custom_task: dict, workflow: dict, recursive_tasks: list, group_names: list): """ Separate custom task's workflow from the main workflow, return a tuple result of custom task cr definitions and a new workflow Args: custom_task: dictionary with custom_task infomation, the format should be as below: { 'kind': '', 'task_list': [], 'spec': {}, 'depends': [] } workflow: a workflow without loop pipeline separeted. recursive_tasks: List of recursive_tasks information. group_names: List of name constructions for creating custom loop crd names. Returns: A tuple (custom_task_crs, workflow). custom_task_crs is a list of custom task cr definitions. and workflow is a dict which will not including the tasks in custom task definitions """ custom_task_crs = [] task_list = [] tasks = workflow['spec']['pipelineSpec']['tasks'] new_tasks = [] dependencies = [] # handle dependecies for key in custom_task.keys(): dependencies.extend(custom_task[key]['depends']) for task in tasks: for dependency in dependencies: if task['name'] == dependency['org']: task_dependencies = [dependency['runAfter']] for dep_task in task.get('runAfter', []): # should exclude the custom task itself for cases like graph dep_task_trim = copy.copy(dep_task) if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH: dep_task_trim = sanitize_k8s_name( dep_task, max_length=LOOP_GROUP_NAME_LENGTH, rev_truncate=True) dep_task_with_prefix = '-'.join(group_names[:-1] + [dep_task_trim]) if dep_task_with_prefix == dependency['runAfter']: continue if dep_task not in custom_task[ dependency['runAfter']]['task_list']: task_dependencies.append(dep_task) task['runAfter'] = task_dependencies # process recursive tasks to match parameters for task in recursive_tasks: recursive_graph = custom_task.get(task['taskRef']['name'], {}) if recursive_graph: if recursive_graph['spec']['params']: recursive_graph['spec']['params'] = sorted( recursive_graph['spec']['params'], key=lambda k: k['name']) for param in recursive_graph['spec']['params']: recursive_params = [param['name'] for param in task['params']] if param['name'] not in recursive_params: task['params'].append({ 'name': param['name'], 'value': "$(params.%s)" % param['name'] }) # get custom tasks for custom_task_key in custom_task.keys(): denpendency_list = custom_task[custom_task_key]['spec'].get( 'runAfter', []) task_list.extend(custom_task[custom_task_key]['task_list']) # generate custom task cr custom_task_cr_tasks = [] for task in tasks: if task['name'] in custom_task[custom_task_key]['task_list']: for param in task.get('taskSpec', {}).get('params', []): param['type'] = 'string' run_after_task_list = [] for run_after_task in task.get('runAfter', []): for recursive_task in recursive_tasks: # The subset of the loop group name should be LOOP_GROUP_NAME_LENGTH minus 4 because the # numbers of loop cannot exceed 1000 due to ETCD limitation. if sanitize_k8s_name(recursive_task['name'], max_length=(LOOP_GROUP_NAME_LENGTH - 4), rev_truncate=True) \ in run_after_task and '-'.join(group_names[:-1]) not in run_after_task: if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH: run_after_task = sanitize_k8s_name( run_after_task, max_length=LOOP_GROUP_NAME_LENGTH, rev_truncate=True) run_after_task = '-'.join(group_names[:-1] + [run_after_task]) break if run_after_task not in denpendency_list: run_after_task_list.append(run_after_task) if task.get('runAfter', []): task['runAfter'] = run_after_task_list custom_task_cr_tasks.append(task) # append recursive tasks for task in recursive_tasks: if task['name'] in custom_task[custom_task_key]['task_list']: custom_task_cr_tasks.append(task) # generator custom task cr custom_task_cr = { "apiVersion": "custom.tekton.dev/v1alpha1", "kind": 'custom_task_kind', "metadata": { "name": custom_task_key }, "spec": { "pipelineSpec": { "params": [{ "name": parm['name'], 'type': 'string' } for parm in sorted(custom_task[custom_task_key]['spec'] ['params'], key=lambda k: k['name'])], "tasks": custom_task_cr_tasks } } } # handle loop special case if custom_task[custom_task_key]['kind'] == 'loops': # if subvar exist, this is dict loop parameters # remove the loop_arg and add subvar args to the cr params if custom_task[custom_task_key]['loop_sub_args'] != []: refesh_cr_params = [] for param in custom_task_cr['spec']['pipelineSpec']['params']: if param['name'] != custom_task[custom_task_key][ 'loop_args']: refesh_cr_params.append(param) custom_task_cr['spec']['pipelineSpec'][ 'params'] = refesh_cr_params custom_task_cr['spec']['pipelineSpec']['params'].extend( [{ "name": sub_param, 'type': 'string' } for sub_param in custom_task[custom_task_key] ['loop_sub_args']]) # add loop special filed custom_task_cr['kind'] = 'PipelineLoop' if custom_task[custom_task_key]['spec'].get( 'parallelism') is not None: custom_task_cr['spec']['parallelism'] = custom_task[ custom_task_key]['spec']['parallelism'] # remove from pipeline run spec del custom_task[custom_task_key]['spec']['parallelism'] custom_task_cr['spec']['iterateParam'] = custom_task[ custom_task_key]['loop_args'] for custom_task_param in custom_task[custom_task_key]['spec'][ 'params']: if custom_task_param['name'] != custom_task[custom_task_key][ 'loop_args'] and '$(tasks.' in custom_task_param[ 'value']: custom_task_cr = json.loads( json.dumps(custom_task_cr).replace( custom_task_param['value'], '$(params.%s)' % custom_task_param['name'])) # need to process task parameters to replace out of scope results # because nested graph cannot refer to task results outside of the sub-pipeline. custom_task_cr_task_names = [ custom_task_cr_task['name'] for custom_task_cr_task in custom_task_cr_tasks ] for task in custom_task_cr_tasks: for task_param in task.get('params', []): if '$(tasks.' in task_param['value']: param_results = re.findall( '\$\(tasks.([^ \t\n.:,;\{\}]+).results.([^ \t\n.:,;\{\}]+)\)', task_param['value']) for param_result in param_results: if param_result[0] not in custom_task_cr_task_names: task['params'] = json.loads( json.dumps(task['params']).replace( task_param['value'], '$(params.%s-%s)' % param_result)) custom_task_crs.append(custom_task_cr) custom_task[custom_task_key]['spec']['params'] = sorted( custom_task[custom_task_key]['spec']['params'], key=lambda k: k['name']) tasks.append(custom_task[custom_task_key]['spec']) # handle the nested custom task case # Need to be verified: nested custom task with tasks result as parameters nested_custom_tasks = [] custom_task_crs_namelist = [] for custom_task_key in custom_task.keys(): if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH: sanitize_k8s_name(custom_task_key, max_length=LOOP_GROUP_NAME_LENGTH, rev_truncate=True) custom_task_crs_namelist.append(custom_task_key) for custom_task_key in custom_task.keys(): for inner_task_name in custom_task[custom_task_key]['task_list']: inner_task_name_trimmed = copy.copy(inner_task_name) if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH: inner_task_name_trimmed = sanitize_k8s_name( inner_task_name, max_length=LOOP_GROUP_NAME_LENGTH, rev_truncate=True) inner_task_cr_name = '-'.join(group_names[:-1] + [inner_task_name_trimmed]) if inner_task_cr_name in custom_task_crs_namelist: nested_custom_tasks.append({ "father_ct": custom_task_key, "nested_custom_task": inner_task_cr_name }) # Summary out all of the nested tasks relationships. for nested_custom_task in nested_custom_tasks: father_ct_name = nested_custom_task['father_ct'] relationships = find_ancestors(nested_custom_tasks, father_ct_name, [], father_ct_name) nested_custom_task['ancestors'] = relationships['ancestors'] nested_custom_task['root_ct'] = relationships['root_ct'] for nested_custom_task in nested_custom_tasks: nested_custom_task_spec = custom_task[ nested_custom_task['nested_custom_task']]['spec'] for custom_task_cr in custom_task_crs: if custom_task_cr['metadata']['name'] == nested_custom_task[ 'father_ct']: # handle parameters of nested custom task params_nested_custom_task = nested_custom_task_spec['params'] # nested_custom_task_special_params = the global params that doesn't defined in parent custom task nested_custom_task_special_params = [ param for param in params_nested_custom_task if '$(params.' in param['value'] and not bool([ True for ct_param in custom_task_cr['spec']['pipelineSpec'] ['params'] if param['name'] in ct_param['name'] ]) ] custom_task_cr['spec']['pipelineSpec']['params'].extend([{ 'name': param['name'], 'type': 'string' } for param in nested_custom_task_special_params]) if nested_custom_task['ancestors']: for custom_task_cr_again in custom_task_crs: if custom_task_cr_again['metadata'][ 'name'] in nested_custom_task[ 'ancestors'] or custom_task_cr_again[ 'metadata'][ 'name'] == nested_custom_task[ 'root_ct']: custom_task_cr_again['spec']['pipelineSpec'][ 'params'].extend( [{ 'name': param['name'], 'type': 'string' } for param in nested_custom_task_special_params]) custom_task_cr_again['spec']['pipelineSpec'][ 'params'] = sorted(custom_task_cr_again['spec'] ['pipelineSpec']['params'], key=lambda k: k['name']) # add children params to the root tasks for task in tasks: if task['name'] == nested_custom_task['root_ct']: task['params'].extend( copy.deepcopy(nested_custom_task_special_params)) elif task['name'] in nested_custom_task[ 'ancestors'] or task['name'] == nested_custom_task[ 'father_ct']: task['params'].extend( nested_custom_task_special_params) if task.get('params') is not None: task['params'] = sorted(task['params'], key=lambda k: k['name']) for special_param in nested_custom_task_special_params: for nested_param in nested_custom_task_spec['params']: if nested_param['name'] == special_param['name']: nested_param[ 'value'] = '$(params.%s)' % nested_param['name'] # need process parameters to replace results custom_task_cr_task_names = [ cr_task['name'] for cr_task in custom_task_cr['spec'] ['pipelineSpec']['tasks'] ] for nested_custom_task_param in nested_custom_task_spec[ 'params']: if '$(tasks.' in nested_custom_task_param['value']: param_results = re.findall( '\$\(tasks.([^ \t\n.:,;\{\}]+).results.([^ \t\n.:,;\{\}]+)\)', nested_custom_task_param['value']) for param_result in param_results: if param_result[ 0] not in custom_task_cr_task_names: custom_task_cr_param_names = [ p['name'] for p in custom_task_cr['spec'] ['pipelineSpec']['params'] ] if nested_custom_task_param[ 'name'] not in custom_task_cr_param_names: for index, param in enumerate( nested_custom_task_spec['params']): if nested_custom_task_param[ 'name'] == param['name']: nested_custom_task_spec[ 'params'].pop(index) break else: nested_custom_task_spec = json.loads( json.dumps(nested_custom_task_spec). replace( nested_custom_task_param['value'], '$(params.%s)' % nested_custom_task_param['name'])) # add nested custom task spec to main custom task custom_task_cr['spec']['pipelineSpec']['tasks'].append( nested_custom_task_spec) custom_task_cr['spec']['pipelineSpec']['params'] = sorted( custom_task_cr['spec']['pipelineSpec']['params'], key=lambda k: k['name']) # remove the tasks belong to custom task from main workflow task_name_prefix = '-'.join(group_names[:-1] + [""]) for task in tasks: if task['name'].replace(task_name_prefix, "") not in task_list: task_list_trimmed = [ sanitize_k8s_name(task, max_length=LOOP_GROUP_NAME_LENGTH, rev_truncate=True) for task in task_list ] if task['name'].replace(task_name_prefix, "") not in task_list_trimmed: new_tasks.append(task) workflow['spec']['pipelineSpec']['tasks'] = new_tasks return custom_task_crs, workflow
def _workflow_with_pipelinerun(self, task_refs, pipeline, pipeline_template, workflow): """ Generate pipelinerun template """ pipelinerun = { 'apiVersion': tekton_api_version, 'kind': 'PipelineRun', 'metadata': { 'name': sanitize_k8s_name(pipeline_template['metadata']['name'], suffix_space=4) + '-run', 'annotation': { 'tekton.dev/output_artifacts': json.dumps(self.output_artifacts), 'tekton.dev/input_artifacts': json.dumps(self.input_artifacts) } }, 'spec': { 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in pipeline_template['spec']['params']], 'pipelineRef': { 'name': pipeline_template['metadata']['name'] } } } # Generate TaskRunSpecs PodTemplate:s task_run_spec = [] for task in task_refs: op = pipeline.ops.get(task['name']) task_spec = { "pipelineTaskName": task['name'], "taskPodTemplate": {} } if op.affinity: task_spec["taskPodTemplate"][ "affinity"] = convert_k8s_obj_to_json(op.affinity) if op.tolerations: task_spec["taskPodTemplate"]['tolerations'] = op.tolerations if op.node_selector: task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector if bool(task_spec["taskPodTemplate"]): task_run_spec.append(task_spec) if len(task_run_spec) > 0: pipelinerun['spec']['taskRunSpecs'] = task_run_spec # add workflow level timeout to pipeline run if pipeline.conf.timeout: pipelinerun['spec']['timeout'] = '%ds' % pipeline.conf.timeout # generate the Tekton podTemplate for image pull secret if len(pipeline.conf.image_pull_secrets) > 0: pipelinerun['spec']['podTemplate'] = pipelinerun['spec'].get( 'podTemplate', {}) pipelinerun['spec']['podTemplate']['imagePullSecrets'] = [{ "name": s.name } for s in pipeline.conf.image_pull_secrets] workflow = workflow + [pipelinerun] return workflow
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Text = None, pipeline_description: Text = None, params_list: List[dsl.PipelineParam] = None, pipeline_conf: dsl.PipelineConf = None, ) -> List[Dict[Text, Any]]: # Tekton change, signature """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError( 'Either specify pipeline params in the pipeline function, or in "params_list", but not both.' ) args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Configuration passed to the compiler is overriding. Unfortunately, it is # not trivial to detect whether the dsl_pipeline.conf was ever modified. pipeline_conf = pipeline_conf or dsl_pipeline.conf self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args ] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance( default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec(name=param.name, type=param.param_type, default=param.value) for param in params_list ] op_transformers = [add_pod_env] op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from kfp_tekton.compiler._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) import json pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0] # Tekton change pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \ json.dumps(pipeline_meta.to_dict(), sort_keys=True) return workflow
def _get_loop_task(self, task: Dict, op_name_to_for_loop_op): """Get the list of task references which will flatten the loop parameters defined in pipeline. Args: task: ops template in pipeline. op_name_to_for_loop_op: a dictionary of ospgroup """ # Get all the params in the task task_params_list = [] for tp in task.get('params', []): task_params_list.append(tp) # Get the loop values for each param for tp in task_params_list: for loop_param in op_name_to_for_loop_op.values(): loop_args = loop_param.loop_args if loop_args.name in tp['name']: lpn = tp['name'].replace(loop_args.name, '').replace( LoopArgumentVariable.SUBVAR_NAME_DELIMITER, '') if lpn: tp['loop-value'] = [ value[lpn] for value in loop_args.items_or_pipeline_param ] else: tp['loop-value'] = loop_args.items_or_pipeline_param # Get the task params list # 1. Get the task_params list without loop first loop_value = [ p['loop-value'] for p in task_params_list if p.get('loop-value') ] task_params_without_loop = [ p for p in task_params_list if not p.get('loop-value') ] # 2. Get the task_params list with loop loop_params = [p for p in task_params_list if p.get('loop-value')] for param in loop_params: del param['loop-value'] del param['value'] value_iter = list(itertools.product(*loop_value)) value_iter_list = [] for values in value_iter: opt = [] for value in values: opt.append({"value": str(value)}) value_iter_list.append(opt) { value[i].update(loop_params[i]) for i in range(len(loop_params)) for value in value_iter_list } task_params_with_loop = value_iter_list # 3. combine task params list(a.extend(task_params_without_loop) for a in task_params_with_loop) task_params_all = task_params_with_loop # Get the task list based on params list task_list = [] del task['params'] task_name_suffix_length = len(LoopArguments.LOOP_ITEM_NAME_BASE ) + LoopArguments.NUM_CODE_CHARS + 2 task_old_name = sanitize_k8s_name(task['name'], suffix_space=task_name_suffix_length) for i in range(len(task_params_all)): task['params'] = task_params_all[i] task['name'] = '%s-%s-%d' % (task_old_name, LoopArguments.LOOP_ITEM_NAME_BASE, i) task_list.append(copy.deepcopy(task)) del task['params'] return task_list
def _op_to_template(op: BaseOp, pipelinerun_output_artifacts={}, artifact_items={}): """Generate template given an operator inherited from BaseOp.""" # Display name if op.display_name: op.add_pod_annotation('pipelines.kubeflow.org/task_display_name', op.display_name) # initial local variables for tracking volumes and artifacts volume_mount_step_template = [] volume_template = [] mounted_param_paths = [] replaced_param_list = [] artifact_to_result_mapping = {} # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), # but the _outputs_to_json function changes the output names and we must do the same here, # so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [{ 'name': name, 'path': path } for name, path in output_artifact_paths.items()] # workflow template container = convert_k8s_obj_to_json(processed_op.container) # Calling containerOp step as "main" to align with Argo step = {'name': "main"} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } # Create output artifact tracking annotation. for output_artifact in output_artifacts: output_annotation = pipelinerun_output_artifacts.get( processed_op.name, []) output_annotation.append({ 'name': output_artifact.get('name', ''), 'path': output_artifact.get('path', ''), 'key': "artifacts/$PIPELINERUN/%s/%s.tgz" % (processed_op.name, output_artifact.get('name', '').replace( processed_op.name + '-', '')) }) pipelinerun_output_artifacts[processed_op.name] = output_annotation elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # Flatten manifest because it needs to replace Argo variables manifest = yaml.dump(convert_k8s_obj_to_json( processed_op.k8s_resource), default_flow_style=False) argo_var = False if manifest.find('{{workflow.name}}') != -1: # Kubernetes Pod arguments only take $() as environment variables manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)") # Remove yaml quote in order to read bash variables manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest) argo_var = True # task template template = _get_resourceOp_template(op, processed_op.name, tekton_api_version, manifest, argo_var=argo_var) # initContainers if processed_op.init_containers: template['spec']['steps'] = _prepend_steps( processed_op.init_containers, template['spec']['steps']) # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) if 'artifacts' in inputs: # Leave artifacts for big data passing template['spec']['artifacts'] = inputs['artifacts'] # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) artifact_items[op.name] = artifact_items.get(op.name, []) if outputs_dict: copy_results_step = _process_parameters( processed_op, template, outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, mounted_param_paths) _process_output_artifacts(outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, artifact_items[op.name]) if mounted_param_paths: template['spec']['steps'].append(copy_results_step) _update_volumes(template, volume_mount_step_template, volume_template) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): value for key, value in processed_op.pod_annotations.items() } if processed_op.pod_labels: template['metadata']['labels'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): sanitize_k8s_name(value, allow_capital_underscore=True, allow_dot=True) for key, value in processed_op.pod_labels.items() } # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volumes', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) if isinstance(op, dsl.ContainerOp ) and op._metadata and GENERATE_COMPONENT_SPEC_ANNOTATIONS: component_spec_dict = op._metadata.to_dict() component_spec_digest = hashlib.sha256( json.dumps(component_spec_dict, sort_keys=True).encode()).hexdigest() component_name = component_spec_dict.get('name', op.name) component_version = component_name + '@sha256=' + component_spec_digest digested_component_spec_dict = { 'name': component_name, 'outputs': component_spec_dict.get('outputs', []), 'version': component_version } template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec_digest'] = \ json.dumps(digested_component_spec_dict, sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \ str(op.execution_options.caching_strategy.max_cache_staleness) # Sort and arrange results based on provided estimate size and process results in multi-steps if the result sizes are too big. result_size_map = "{}" if processed_op.pod_annotations: result_size_map = processed_op.pod_annotations.get( "tekton-result-sizes", "{}") # Only sort and arrange results when the estimated sizes are given. if result_size_map and result_size_map != "{}": try: result_size_map = json.loads(result_size_map) except ValueError: raise ("tekton-result-sizes annotation is not a valid JSON") # Normalize estimated result size keys. result_size_map = { sanitize_k8s_name(key, allow_capital_underscore=True): value for key, value in result_size_map.items() } # Sort key orders based on values result_size_map = dict( sorted(result_size_map.items(), key=lambda item: item[1], reverse=True)) max_byte_size = 2048 verified_result_size_map = {0: {}} op_result_names = [ name['name'] for name in template['spec']['results'] ] step_bins = {0: 0} step_counter = 0 # Group result files to not exceed max_byte_size as a bin packing problem # Results are sorted from large to small, each value will loop over each bin to determine can it fit in the existing bins. for key, value in result_size_map.items(): try: value = int(value) except ValueError: raise ( "Estimated value for result %s is %s, but it needs to be an integer." % (key, value)) if key in op_result_names: packed_index = -1 # Look for bin that can fit the result value for i in range(len(step_bins)): if step_bins[i] + value > max_byte_size: continue step_bins[i] = step_bins[i] + value packed_index = i break # If no bin can fit the value, create a new bin to store the value if packed_index < 0: step_counter += 1 if value > max_byte_size: logging.warning( "The estimated size for parameter %s is %sB which is more than 2KB, " "consider passing this value as artifact instead of output parameter." % (key, str(value))) step_bins[step_counter] = value verified_result_size_map[step_counter] = {} packed_index = step_counter verified_result_size_map[packed_index][key] = value else: logging.warning( "The esitmated size for parameter %s does not exist in the task %s." "Please correct the task annotations with the correct parameter key" % (key, op.name)) missing_param_estimation = [] for result_name in op_result_names: if result_name not in result_size_map.keys(): missing_param_estimation.append(result_name) if missing_param_estimation: logging.warning( "The following output parameter estimations are missing in task %s: Missing params: %s." % (op.name, missing_param_estimation)) # Move results between the Tekton home and result directories if there are more than one step if step_counter > 0: for step in template['spec']['steps']: if step['name'] == 'main': for key in result_size_map.keys(): # Replace main step results that are not in the first bin to the Tekton home path if key not in verified_result_size_map[0].keys(): sanitize_key = sanitize_k8s_name(key) for i, a in enumerate(step['args']): a = a.replace( '$(results.%s.path)' % sanitize_key, '%s%s' % (TEKTON_HOME_RESULT_PATH, sanitize_key)) step['args'][i] = a for i, c in enumerate(step['command']): c = c.replace( '$(results.%s.path)' % sanitize_key, '%s%s' % (TEKTON_HOME_RESULT_PATH, sanitize_key)) step['command'][i] = c # Append new steps to move result files between each step, so Tekton controller can record all results without # exceeding the Kubernetes termination log limit. for i in range(1, step_counter + 1): copy_result_step = _get_copy_result_step_template( i, verified_result_size_map) template['spec']['steps'].append(copy_result_step) # Update actifact item location to the latest stage in order to properly track and store all the artifacts. for i, artifact in enumerate(artifact_items[op.name]): if artifact[0] not in verified_result_size_map[step_counter].keys( ): artifact[1] = '%s%s' % (TEKTON_HOME_RESULT_PATH, sanitize_k8s_name(artifact[0])) artifact_items[op.name][i] = artifact return template
def _process_parameters(processed_op: BaseOp, template: Dict[Text, Any], outputs_dict: Dict[Text, Any], volume_mount_step_template: List[Dict[Text, Any]], volume_template: List[Dict[Text, Any]], replaced_param_list: List[Text], artifact_to_result_mapping: Dict[Text, Any], mounted_param_paths: List[Text]): """Process output parameters to replicate the same behavior as Argo. Since Tekton results need to be under /tekton/results. If file output paths cannot be configured to /tekton/results, we need to create the below copy step for moving file outputs to the Tekton destination. BusyBox is recommended to be used on small tasks because it's relatively lightweight and small compared to the ubuntu and bash images. - image: busybox name: copy-results script: | #!/bin/sh set -exo pipefail cp $LOCALPATH $(results.data.path); Args: processed_op {BaseOp}: class that inherits from BaseOp template {Dict[Text, Any]}: Task template outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes replaced_param_list {List[Text]}: List of parameters that already set up as results artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results mounted_param_paths {List[Text]}: List of paths that already mounted to a volume. Returns: Dict[Text, Any] """ if outputs_dict.get('parameters'): template['spec']['results'] = [] copy_results_step = _get_base_step('copy-results') for name, path in processed_op.file_outputs.items(): template['spec']['results'].append({ 'name': name, 'description': path }) # replace all occurrences of the output file path with the Tekton output parameter expression need_copy_step = True for s in template['spec']['steps']: if 'command' in s: commands = [] for c in s['command']: if path in c: c = c.replace( path, '$(results.%s.path)' % sanitize_k8s_name(name)) need_copy_step = False commands.append(c) s['command'] = commands if 'args' in s: args = [] for a in s['args']: if path in a: a = a.replace( path, '$(results.%s.path)' % sanitize_k8s_name(name)) need_copy_step = False args.append(a) s['args'] = args if path == '/tekton/results/' + sanitize_k8s_name(name): need_copy_step = False # If file output path cannot be found/replaced, use emptyDir to copy it to the tekton/results path if need_copy_step: copy_results_step['script'] = copy_results_step['script'] + 'cp ' + path + ' $(results.%s.path);' \ % sanitize_k8s_name(name) + '\n' mount_path = path.rsplit("/", 1)[0] if mount_path not in mounted_param_paths: _add_mount_path(name, path, mount_path, volume_mount_step_template, volume_template, mounted_param_paths) # Record what artifacts are moved to result parameters. parameter_name = sanitize_k8s_name(processed_op.name + '-' + name, allow_capital_underscore=True, max_length=float('Inf')) replaced_param_list.append(parameter_name) artifact_to_result_mapping[parameter_name] = name return copy_results_step else: return {}
def _create_pipeline_workflow(self, args, pipeline, op_transformers=None, pipeline_conf=None) \ -> Dict[Text, Any]: """Create workflow for the pipeline.""" # Input Parameters params = [] for arg in args: param = {'name': arg.name} if arg.value is not None: if isinstance(arg.value, (list, tuple, dict)): param['default'] = json.dumps(arg.value, sort_keys=True) else: param['default'] = str(arg.value) params.append(param) # TODO: task templates? # generate Tekton tasks from pipeline ops raw_templates = self._create_dag_templates(pipeline, op_transformers, params) # generate task and condition reference list for the Tekton Pipeline condition_refs = {} # TODO task_refs = [] templates = [] condition_task_refs = {} for template in raw_templates: # TODO Allow an opt-out for the condition_template if template['kind'] == 'Condition': condition_task_ref = [{ 'name': template['metadata']['name'], 'params': [{ 'name': p['name'], 'value': p.get('value', '') } for p in template['spec'].get('params', [])], 'taskSpec': _get_super_condition_template(), }] condition_refs[template['metadata']['name']] = [{ 'input': '$(tasks.%s.results.status)' % template['metadata']['name'], 'operator': 'in', 'values': ['true'] }] condition_task_refs[template['metadata'] ['name']] = condition_task_ref else: templates.append(template) task_ref = { 'name': template['metadata']['name'], 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in template['spec'].get('params', [])], 'taskSpec': template['spec'], } if template['metadata'].get('labels', None): task_ref['taskSpec']['metadata'] = task_ref[ 'taskSpec'].get('metadata', {}) task_ref['taskSpec']['metadata']['labels'] = template[ 'metadata']['labels'] if template['metadata'].get('annotations', None): task_ref['taskSpec']['metadata'] = task_ref[ 'taskSpec'].get('metadata', {}) task_ref['taskSpec']['metadata']['annotations'] = template[ 'metadata']['annotations'] task_refs.append(task_ref) # process input parameters from upstream tasks for conditions and pair conditions with their ancestor conditions opsgroup_stack = [pipeline.groups[0]] condition_stack = [None] while opsgroup_stack: cur_opsgroup = opsgroup_stack.pop() most_recent_condition = condition_stack.pop() if cur_opsgroup.type == 'condition': condition_task_ref = condition_task_refs[cur_opsgroup.name][0] condition = cur_opsgroup.condition input_params = [] # Process input parameters if needed if isinstance(condition.operand1, dsl.PipelineParam): if condition.operand1.op_name: operand_value = '$(tasks.' + condition.operand1.op_name + '.results.' + sanitize_k8s_name( condition.operand1.name) + ')' else: operand_value = '$(params.' + condition.operand1.name + ')' input_params.append(operand_value) if isinstance(condition.operand2, dsl.PipelineParam): if condition.operand2.op_name: operand_value = '$(tasks.' + condition.operand2.op_name + '.results.' + sanitize_k8s_name( condition.operand2.name) + ')' else: operand_value = '$(params.' + condition.operand2.name + ')' input_params.append(operand_value) for param_iter in range(len(input_params)): condition_task_ref['params'][param_iter][ 'value'] = input_params[param_iter] opsgroup_stack.extend(cur_opsgroup.groups) condition_stack.extend([ most_recent_condition for x in range(len(cur_opsgroup.groups)) ]) # add task dependencies and add condition refs to the task ref that depends on the condition op_name_to_parent_groups = self._get_groups_for_ops(pipeline.groups[0]) for task in task_refs: op = pipeline.ops.get(task['name']) parent_group = op_name_to_parent_groups.get(task['name'], []) if parent_group: if condition_refs.get(parent_group[-2], []): task['when'] = condition_refs.get( op_name_to_parent_groups[task['name']][-2], []) if op.dependent_names: task['runAfter'] = op.dependent_names # process input parameters from upstream tasks pipeline_param_names = [p['name'] for p in params] for task in task_refs: op = pipeline.ops.get(task['name']) for tp in task.get('params', []): if tp['name'] in pipeline_param_names: tp['value'] = '$(params.%s)' % tp['name'] else: for pp in op.inputs: if tp['name'] == pp.full_name: tp['value'] = '$(tasks.%s.results.%s)' % ( pp.op_name, pp.name) # Create input artifact tracking annotation input_annotation = self.input_artifacts.get( task['name'], []) input_annotation.append({ 'name': tp['name'], 'parent_task': pp.op_name }) self.input_artifacts[ task['name']] = input_annotation break # add retries params for task in task_refs: op = pipeline.ops.get(task['name']) if op.num_retries: task['retries'] = op.num_retries # add timeout params to task_refs, instead of task. for task in task_refs: op = pipeline.ops.get(task['name']) if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or op.timeout: task['timeout'] = '%ds' % op.timeout # handle resourceOp cases in pipeline self._process_resourceOp(task_refs, pipeline) # handle exit handler in pipeline finally_tasks = [] for task in task_refs: op = pipeline.ops.get(task['name']) if op.is_exit_handler: finally_tasks.append(task) task_refs = [ task for task in task_refs if not pipeline.ops.get(task['name']).is_exit_handler ] # process loop parameters, keep this section in the behind of other processes, ahead of gen pipeline root_group = pipeline.groups[0] op_name_to_for_loop_op = self._get_for_loop_ops(root_group) if op_name_to_for_loop_op: for loop_param in op_name_to_for_loop_op.values(): if loop_param.items_is_pipeline_param is True: raise NotImplementedError( "dynamic params are not yet implemented") include_loop_task_refs = [] for task in task_refs: with_loop_task = self._get_loop_task(task, op_name_to_for_loop_op) include_loop_task_refs.extend(with_loop_task) task_refs = include_loop_task_refs # Flatten condition task condition_task_refs_temp = [] for condition_task_ref in condition_task_refs.values(): for ref in condition_task_ref: condition_task_refs_temp.append(ref) condition_task_refs = condition_task_refs_temp # TODO: generate the PipelineRun template pipeline_run = { 'apiVersion': tekton_api_version, 'kind': 'PipelineRun', 'metadata': { 'name': sanitize_k8s_name(pipeline.name or 'Pipeline', suffix_space=4), # 'labels': get_default_telemetry_labels(), 'annotations': { 'tekton.dev/output_artifacts': json.dumps(self.output_artifacts, sort_keys=True), 'tekton.dev/input_artifacts': json.dumps(self.input_artifacts, sort_keys=True), 'tekton.dev/artifact_bucket': DEFAULT_ARTIFACT_BUCKET, 'tekton.dev/artifact_endpoint': DEFAULT_ARTIFACT_ENDPOINT, 'tekton.dev/artifact_endpoint_scheme': DEFAULT_ARTIFACT_ENDPOINT_SCHEME, 'tekton.dev/artifact_items': json.dumps(self.artifact_items, sort_keys=True), 'sidecar.istio.io/inject': 'false' # disable Istio inject since Tekton cannot run with Istio sidecar } }, 'spec': { 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in params], 'pipelineSpec': { 'params': params, 'tasks': task_refs + condition_task_refs, 'finally': finally_tasks } } } # TODO: pipelineRun additions # Generate TaskRunSpec PodTemplate:s task_run_spec = [] for task in task_refs: # TODO: should loop-item tasks be included here? if LoopArguments.LOOP_ITEM_NAME_BASE in task['name']: task_name = re.sub( r'-%s-.+$' % LoopArguments.LOOP_ITEM_NAME_BASE, '', task['name']) else: task_name = task['name'] op = pipeline.ops.get(task_name) if not op: raise RuntimeError("unable to find op with name '%s'" % task["name"]) task_spec = { "pipelineTaskName": task['name'], "taskPodTemplate": {} } if op.affinity: task_spec["taskPodTemplate"][ "affinity"] = convert_k8s_obj_to_json(op.affinity) if op.tolerations: task_spec["taskPodTemplate"]['tolerations'] = op.tolerations if op.node_selector: task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector if bool(task_spec["taskPodTemplate"]): task_run_spec.append(task_spec) if len(task_run_spec) > 0: pipeline_run['spec']['taskRunSpecs'] = task_run_spec # add workflow level timeout to pipeline run if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or pipeline.conf.timeout: pipeline_run['spec']['timeout'] = '%ds' % pipeline.conf.timeout # generate the Tekton podTemplate for image pull secret if len(pipeline.conf.image_pull_secrets) > 0: pipeline_run['spec']['podTemplate'] = pipeline_run['spec'].get( 'podTemplate', {}) pipeline_run['spec']['podTemplate']['imagePullSecrets'] = [{ "name": s.name } for s in pipeline.conf.image_pull_secrets] workflow = pipeline_run return workflow
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Text = None, pipeline_description: Text = None, params_list: List[dsl.PipelineParam] = None, pipeline_conf: dsl.PipelineConf = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError( 'Either specify pipeline params in the pipeline function, or in "params_list", but not both.' ) args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Configuration passed to the compiler is overriding. Unfortunately, it is # not trivial to detect whether the dsl_pipeline.conf was ever modified. pipeline_conf = pipeline_conf or dsl_pipeline.conf self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args ] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance( default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec(name=param.name, type=param.param_type, default=param.value) for param in params_list ] op_transformers = [add_pod_env] # # By default adds telemetry instruments. Users can opt out toggling # # allow_telemetry. # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4. # if allow_telemetry: # pod_labels = get_default_telemetry_labels() # op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) workflow = fix_big_data_passing(workflow) workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \ json.dumps(pipeline_meta.to_dict(), sort_keys=True) # recursively strip empty structures, DANGER: this may remove necessary empty elements ?! def remove_empty_elements(obj) -> dict: if not isinstance(obj, (dict, list)): return obj if isinstance(obj, list): return [remove_empty_elements(o) for o in obj if o != []] return { k: remove_empty_elements(v) for k, v in obj.items() if v != [] } workflow = remove_empty_elements(workflow) return workflow
def fix_big_data_passing( workflow: List[Dict[Text, Any]] ) -> List[Dict[Text, Any]]: # Tekton change signature """ fix_big_data_passing converts a workflow where some artifact data is passed as parameters and converts it to a workflow where this data is passed as artifacts. Args: workflow: The workflow to fix Returns: The fixed workflow Motivation: DSL compiler only supports passing Tekton parameters. Due to the convoluted nature of the DSL compiler, the artifact consumption and passing has been implemented on top of that using parameter passing. The artifact data is passed as parameters and the consumer template creates an artifact/file out of that data. Due to the limitations of Kubernetes and Tekton this scheme cannot pass data larger than few kilobytes preventing any serious use of artifacts. This function rewrites the compiled workflow so that the data consumed as artifact is passed as artifact. It also prunes the unused parameter outputs. This is important since if a big piece of data is ever returned through a file that is also output as parameter, the execution will fail. This makes is possible to pass large amounts of data. Implementation: 1. Index the pipelines to understand how data is being passed and which inputs/outputs are connected to each other. 2. Search for direct data consumers in container/resource templates and some pipeline task attributes (e.g. conditions and loops) to find out which inputs are directly consumed as parameters/artifacts. 3. Propagate the consumption information upstream to all inputs/outputs all the way up to the data producers. 4. Convert the inputs, outputs based on how they're consumed downstream. 5. Use workspaces instead of result and params for big data passing. 6. Added workspaces to tasks, pipelines, pipelineruns, if the parmas is big data. 7. A PVC named with pipelinerun name will be created if big data is passed, as workspaces need to use it. User need to define proper volume or enable dynamic volume provisioning refer to the link of: https://kubernetes.io/docs/concepts/storage/dynamic-provisioning """ workflow = copy.deepcopy(workflow) resource_templates = [] for template in workflow: resource_params = [ param.get('name') for param in template.get('spec', {}).get('params', []) if param.get('name') == 'action' or param.get('name') == 'success-condition' ] if 'action' in resource_params and 'success-condition' in resource_params: resource_templates.append(template) resource_template_names = set( template.get('metadata', {}).get('name') for template in resource_templates) container_templates = [ template for template in workflow if template['kind'] == 'Task' and template.get('metadata', {}).get('name') not in resource_template_names ] pipeline_templates = [ template for template in workflow if template['kind'] == 'Pipeline' ] pipelinerun_templates = [ template for template in workflow if template['kind'] == 'PipelineRun' ] # 1. Index the pipelines to understand how data is being passed and which # inputs/outputs are connected to each other. template_input_to_parent_pipeline_inputs = { } # (task_template_name, task_input_name) -> Set[(pipeline_template_name, pipeline_input_name)] template_input_to_parent_task_outputs = { } # (task_template_name, task_input_name) -> Set[(upstream_template_name, upstream_output_name)] template_input_to_parent_constant_arguments = { } # (task_template_name, task_input_name) -> Set[argument_value] # Unused pipeline_output_to_parent_template_outputs = { } # (pipeline_template_name, output_name) -> Set[(upstream_template_name, upstream_output_name)] for template in pipeline_templates: pipeline_template_name = template.get('metadata', {}).get('name') # Indexing task arguments pipeline_tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', []) task_name_to_template_name = { task['name']: task['taskRef']['name'] for task in pipeline_tasks } for task in pipeline_tasks: task_template_name = task['taskRef']['name'] parameter_arguments = task['params'] for parameter_argument in parameter_arguments: task_input_name = parameter_argument['name'] argument_value = parameter_argument['value'] argument_placeholder_parts = deconstruct_tekton_single_placeholder( argument_value) if not argument_placeholder_parts: # Argument is considered to be constant string template_input_to_parent_constant_arguments.setdefault( (task_template_name, task_input_name), set()).add(argument_value) else: placeholder_type = argument_placeholder_parts[0] if placeholder_type not in ('params', 'outputs', 'tasks', 'steps', 'workflow', 'pod', 'item'): # Do not fail on Jinja or other double-curly-brace templates continue if placeholder_type == 'params': pipeline_input_name = argument_placeholder_parts[1] template_input_to_parent_pipeline_inputs.setdefault( (task_template_name, task_input_name), set()).add( (pipeline_template_name, pipeline_input_name)) elif placeholder_type == 'tasks': upstream_task_name = argument_placeholder_parts[1] assert argument_placeholder_parts[2] == 'results' upstream_output_name = argument_placeholder_parts[3] upstream_template_name = task_name_to_template_name[ upstream_task_name] template_input_to_parent_task_outputs.setdefault( (task_template_name, task_input_name), set()).add( (upstream_template_name, upstream_output_name)) elif placeholder_type == 'item' or placeholder_type == 'workflow' or placeholder_type == 'pod': # workflow.parameters.* placeholders are not supported, # but the DSL compiler does not produce those. template_input_to_parent_constant_arguments.setdefault( (task_template_name, task_input_name), set()).add(argument_value) else: raise AssertionError pipeline_input_name = extract_tekton_input_parameter_name( argument_value) if pipeline_input_name: template_input_to_parent_pipeline_inputs.setdefault( (task_template_name, task_input_name), set()).add( (pipeline_template_name, pipeline_input_name)) else: template_input_to_parent_constant_arguments.setdefault( (task_template_name, task_input_name), set()).add(argument_value) # Finshed indexing the pipelines # 2. Search for direct data consumers in container/resource templates and some pipeline task attributes # (e.g. conditions and loops) to find out which inputs are directly consumed as parameters/artifacts. inputs_directly_consumed_as_parameters = set() inputs_directly_consumed_as_artifacts = set() outputs_directly_consumed_as_parameters = set() # Searching for artifact input consumers in container template inputs for template in container_templates: template_name = template.get('metadata', {}).get('name') for input_artifact in template.get('spec', {}).get('artifacts', {}): raw_data = input_artifact['raw'][ 'data'] # The structure must exist # The raw data must be a single input parameter reference. Otherwise (e.g. it's a string # or a string with multiple inputs) we should not do the conversion to artifact passing. input_name = extract_tekton_input_parameter_name(raw_data) if input_name: inputs_directly_consumed_as_artifacts.add( (template_name, input_name)) del input_artifact[ 'raw'] # Deleting the "default value based" data passing hack # so that it's replaced by the "argument based" way of data passing. input_artifact[ 'name'] = input_name # The input artifact name should be the same # as the original input parameter name # Searching for parameter input consumers in pipeline templates # TODO: loop params is not support for tekton yet, refer to https://github.com/kubeflow/kfp-tekton/issues/82 for template in pipeline_templates: template_name = template.get('metadata', {}).get('name') pipeline_tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', []) task_name_to_template_name = { task['name']: task['taskRef']['name'] for task in pipeline_tasks } for task in pipeline_tasks: # We do not care about the inputs mentioned in task arguments # since we will be free to switch them from parameters to artifacts task_without_arguments = task.copy() # Shallow copy task_without_arguments.pop('params', None) placeholders = extract_all_tekton_placeholders( task_without_arguments) for placeholder in placeholders: parts = placeholder.split('.') placeholder_type = parts[0] if placeholder_type not in ('inputs', 'outputs', 'tasks', 'steps', 'workflow', 'pod', 'item'): # Do not fail on Jinja or other double-curly-brace templates continue if placeholder_type == 'inputs': if parts[1] == 'parameters': input_name = parts[2] inputs_directly_consumed_as_parameters.add( (template_name, input_name)) else: raise AssertionError elif placeholder_type == 'tasks': upstream_task_name = parts[1] assert parts[2] == 'results' upstream_output_name = parts[3] upstream_template_name = task_name_to_template_name[ upstream_task_name] outputs_directly_consumed_as_parameters.add( (upstream_template_name, upstream_output_name)) elif placeholder_type == 'workflow' or placeholder_type == 'pod': pass elif placeholder_type == 'item': raise AssertionError( 'The "{{item}}" placeholder is not expected outside task arguments.' ) else: raise AssertionError( 'Unexpected placeholder type "{}".'.format( placeholder_type)) # Searching for parameter input consumers in container and resource templates for template in container_templates + resource_templates: template_name = template.get('metadata', {}).get('name') placeholders = extract_all_tekton_placeholders(template) for placeholder in placeholders: parts = placeholder.split('.') placeholder_type = parts[0] if placeholder_type not in ('inputs', 'outputs', 'tasks', 'steps', 'workflow', 'pod', 'item', 'results'): # Do not fail on Jinja or other double-curly-brace templates continue if placeholder_type == 'workflow' or placeholder_type == 'pod': pass elif placeholder_type == 'inputs': if parts[1] == 'params': input_name = parts[2] inputs_directly_consumed_as_parameters.add( (template_name, input_name)) elif parts[1] == 'artifacts': raise AssertionError( 'Found unexpected Tekton input artifact placeholder in container template: {}' .format(placeholder)) else: raise AssertionError( 'Found unexpected Tekton input placeholder in container template: {}' .format(placeholder)) elif placeholder_type == 'results': input_name = parts[1] outputs_directly_consumed_as_parameters.add( (template_name, input_name)) else: raise AssertionError( 'Found unexpected Tekton placeholder in container template: {}' .format(placeholder)) # Finished indexing data consumers # 3. Propagate the consumption information upstream to all inputs/outputs all the way up to the data producers. inputs_consumed_as_parameters = set() inputs_consumed_as_artifacts = set() outputs_consumed_as_parameters = set() outputs_consumed_as_artifacts = set() def mark_upstream_ios_of_input(template_input, marked_inputs, marked_outputs): # Stopping if the input has already been visited to save time and handle recursive calls if template_input in marked_inputs: return marked_inputs.add(template_input) upstream_inputs = template_input_to_parent_pipeline_inputs.get( template_input, []) for upstream_input in upstream_inputs: mark_upstream_ios_of_input(upstream_input, marked_inputs, marked_outputs) upstream_outputs = template_input_to_parent_task_outputs.get( template_input, []) for upstream_output in upstream_outputs: mark_upstream_ios_of_output(upstream_output, marked_inputs, marked_outputs) def mark_upstream_ios_of_output(template_output, marked_inputs, marked_outputs): # Stopping if the output has already been visited to save time and handle recursive calls if template_output in marked_outputs: return marked_outputs.add(template_output) upstream_outputs = pipeline_output_to_parent_template_outputs.get( template_output, []) for upstream_output in upstream_outputs: mark_upstream_ios_of_output(upstream_output, marked_inputs, marked_outputs) for input in inputs_directly_consumed_as_parameters: mark_upstream_ios_of_input(input, inputs_consumed_as_parameters, outputs_consumed_as_parameters) for input in inputs_directly_consumed_as_artifacts: mark_upstream_ios_of_input(input, inputs_consumed_as_artifacts, outputs_consumed_as_artifacts) for output in outputs_directly_consumed_as_parameters: mark_upstream_ios_of_output(output, inputs_consumed_as_parameters, outputs_consumed_as_parameters) # 4. Convert the inputs, outputs and arguments based on how they're consumed downstream. # Add workspaces to pipeline and pipeline task_ref if big data passing pipeline_workspaces = set() pipelinerun_workspaces = set() output_tasks_consumed_as_artifacts = { output[0] for output in outputs_consumed_as_artifacts } # task_workspaces = set() for pipeline in pipeline_templates: # Converting pipeline inputs pipeline, pipeline_workspaces = big_data_passing_pipeline( pipeline, inputs_consumed_as_artifacts, output_tasks_consumed_as_artifacts) # Add workspaces to pipelinerun if big data passing # Check whether pipelinerun was generated, through error if not. if pipeline_workspaces: if not pipelinerun_templates: raise AssertionError( 'Found big data passing, please enable generate_pipelinerun for your complier' ) for pipelinerun in pipelinerun_templates: pipeline, pipelinerun_workspaces = big_data_passing_pipelinerun( pipelinerun, pipeline_workspaces) # Use workspaces to tasks if big data passing instead of 'results', 'copy-inputs' for task_template in container_templates: task_template = big_data_passing_tasks(task_template, inputs_consumed_as_artifacts, outputs_consumed_as_artifacts) # Create pvc for pipelinerun if big data passing. # As we used workspaces in tekton pipelines which depends on it. # User need to create PV manually, or enable dynamic volume provisioning, refer to the link of: # https://kubernetes.io/docs/concepts/storage/dynamic-provisioning # TODO: Remove PVC if Tekton version > = 0.12, use 'volumeClaimTemplate' instead if pipelinerun_workspaces: for pipelinerun in pipelinerun_workspaces: workflow.append(create_pvc(pipelinerun)) # Remove input parameters unless they're used downstream. # This also removes unused container template inputs if any. for template in container_templates + pipeline_templates: spec = template.get('spec', {}) spec['params'] = [ input_parameter for input_parameter in spec.get('params', []) if (template.get('metadata', {}).get('name'), input_parameter['name']) in inputs_consumed_as_parameters ] # Remove output parameters unless they're used downstream for template in container_templates + pipeline_templates: spec = template.get('spec', {}) spec['results'] = [ output_parameter for output_parameter in spec.get('results', []) if (template.get('metadata', {}).get('name'), output_parameter['name']) in outputs_consumed_as_parameters ] # tekton results doesn't support underscore renamed_results_in_pipeline_task = set() for task_result in spec['results']: task_result_old_name = task_result.get('name') task_result_new_name = sanitize_k8s_name(task_result_old_name) if task_result_new_name != task_result_old_name: task_result['name'] = task_result_new_name renamed_results_in_pipeline_task.add( (task_result_old_name, task_result_new_name)) for renamed_result in renamed_results_in_pipeline_task: # Change results.downloaded_resultOutput to results.downloaded-resultoutput template['spec'] = replace_big_data_placeholder( spec, 'results.%s' % renamed_result[0], 'results.%s' % renamed_result[1]) # Remove pipeline task parameters unless they're used downstream for template in pipeline_templates: tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', []) for task in tasks: task['params'] = [ parameter_argument for parameter_argument in task.get('params', []) if (task['taskRef']['name'], parameter_argument['name'] ) in inputs_consumed_as_parameters and (task['taskRef']['name'], parameter_argument['name'] ) not in inputs_consumed_as_artifacts or task['taskRef']['name'] in resource_template_names ] # tekton results doesn't support underscore for argument in task['params']: argument_value = argument.get('value') argument_placeholder_parts = deconstruct_tekton_single_placeholder( argument_value) if len(argument_placeholder_parts) == 4 \ and argument_placeholder_parts[0] == 'tasks': argument['value'] = '$(tasks.%s.%s.%s)' % ( argument_placeholder_parts[1], argument_placeholder_parts[2], sanitize_k8s_name(argument_placeholder_parts[3])) # Need to confirm: # I didn't find the use cases to support workflow parameter consumed as artifacts downstream in tekton. # Whether this case need to be supporting? clean_up_empty_workflow_structures(workflow) return workflow
def _process_output_artifacts(outputs_dict: Dict[Text, Any], volume_mount_step_template: List[Dict[Text, Any]], volume_template: List[Dict[Text, Any]], replaced_param_list: List[Text], artifact_to_result_mapping: Dict[Text, Any]): """Process output artifacts to replicate the same behavior as Argo. For storing artifacts, we will be using the minio/mc image because we need to upload artifacts to any type of object storage and endpoint. The minio/mc is the best image suited for this task because the default KFP is using minio and it also works well with other s3/gcs type of storage. - image: minio/mc name: copy-artifacts script: | #!/usr/bin/env sh mc config host add storage http://minio-service.$NAMESPACE:9000 $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY mc cp /tmp/file.txt storage/$(inputs.params.bucket)/runs/$PIPELINERUN/$TASKRUN/file.txt Args: outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes replaced_param_list {List[Text]}: List of parameters that already set up as results artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results Returns: Dict[Text, Any] """ if outputs_dict.get('artifacts'): # TODO: Pull default values from KFP configmap when integrated with KFP. storage_location = outputs_dict['artifacts'][0].get('s3', {}) insecure = storage_location.get("insecure", True) endpoint = storage_location.get("endpoint", "minio-service.$NAMESPACE:9000") # We want to use the insecure flag to figure out whether to use http or https scheme endpoint = re.sub(r"https?://", "", endpoint) endpoint = 'http://' + endpoint if insecure else 'https://' + endpoint access_key = storage_location.get("accessKeySecret", { "name": "mlpipeline-minio-artifact", "key": "accesskey" }) secret_access_key = storage_location.get("secretKeySecret", { "name": "mlpipeline-minio-artifact", "key": "secretkey" }) bucket = storage_location.get("bucket", "mlpipeline") copy_artifacts_step = { 'image': 'minio/mc', 'name': 'copy-artifacts', 'script': textwrap.dedent('''\ #!/usr/bin/env sh mc config host add storage %s $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY ''' % (endpoint)), 'env': [{ 'name': 'PIPELINERUN', 'valueFrom': { 'fieldRef': { 'fieldPath': "metadata.labels['tekton.dev/pipelineRun']" } } }, { 'name': 'PIPELINETASK', 'valueFrom': { 'fieldRef': { 'fieldPath': "metadata.labels['tekton.dev/pipelineTask']" } } }, { 'name': 'NAMESPACE', 'valueFrom': { 'fieldRef': { 'fieldPath': "metadata.namespace" } } }, { 'name': 'AWS_ACCESS_KEY_ID', 'valueFrom': { 'secretKeyRef': { 'name': access_key['name'], 'key': access_key['key'] } } }, { 'name': 'AWS_SECRET_ACCESS_KEY', 'valueFrom': { 'secretKeyRef': { 'name': secret_access_key['name'], 'key': secret_access_key['key'] } } }] } mounted_artifact_paths = [] for artifact in outputs_dict['artifacts']: if artifact['name'] in replaced_param_list: copy_artifacts_step['script'] = copy_artifacts_step['script'] + \ 'tar -cvzf %s.tgz $(results.%s.path)\n' % (artifact['name'], artifact_to_result_mapping[artifact['name']]) + \ 'mc cp %s.tgz storage/%s/runs/$PIPELINERUN/$PIPELINETASK/%s.tgz\n' % (artifact['name'], bucket, artifact['name']) else: copy_artifacts_step['script'] = copy_artifacts_step['script'] + \ 'tar -cvzf %s.tgz %s\n' % (artifact['name'], artifact['path']) + \ 'mc cp %s.tgz storage/%s/runs/$PIPELINERUN/$PIPELINETASK/%s.tgz\n' % (artifact['name'], bucket, artifact['name']) if artifact['path'].rsplit("/", 1)[0] not in mounted_artifact_paths: volume_mount_step_template.append({ 'name': sanitize_k8s_name(artifact['name']), 'mountPath': artifact['path'].rsplit("/", 1)[0] }) volume_template.append({ 'name': sanitize_k8s_name(artifact['name']), 'emptyDir': {} }) mounted_artifact_paths.append(artifact['path'].rsplit( "/", 1)[0]) return copy_artifacts_step else: return {}
def _validate_workflow(workflow: Dict[Text, Any]): # verify that all names and labels conform to kubernetes naming standards # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/ # https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ def _find_items(obj, search_key, current_path="", results_dict=dict()) -> dict: if isinstance(obj, dict): if search_key in obj: results_dict.update( {"%s.%s" % (current_path, search_key): obj[search_key]}) for k, v in obj.items(): _find_items(v, search_key, "%s.%s" % (current_path, k), results_dict) elif isinstance(obj, list): for i, list_item in enumerate(obj): _find_items(list_item, search_key, "%s[%i]" % (current_path, i), results_dict) return {k.lstrip("."): v for k, v in results_dict.items()} non_k8s_names = { path: name for path, name in _find_items(workflow, "name").items() if "metadata" in path and name != sanitize_k8s_name(name) or "param" in path and name != sanitize_k8s_name(name, allow_capital_underscore=True) } non_k8s_labels = { path: k_v_dict for path, k_v_dict in _find_items(workflow, "labels", "", {}).items() if "metadata" in path and any([ k != sanitize_k8s_name(k, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253) or v != sanitize_k8s_name(v, allow_capital_underscore=True, allow_dot=True) for k, v in k_v_dict.items() ]) } non_k8s_annotations = { path: k_v_dict for path, k_v_dict in _find_items(workflow, "annotations", "", {}).items() if "metadata" in path and any([ k != sanitize_k8s_name(k, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253) for k in k_v_dict.keys() ]) } error_msg_tmplt = textwrap.dedent("""\ Internal compiler error: Found non-compliant Kubernetes %s: %s Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues attaching the pipeline DSL code and the pipeline YAML.""") if non_k8s_names: raise RuntimeError( error_msg_tmplt % ("names", json.dumps(non_k8s_names, sort_keys=False, indent=2))) if non_k8s_labels: raise RuntimeError( error_msg_tmplt % ("labels", json.dumps(non_k8s_labels, sort_keys=False, indent=2))) if non_k8s_annotations: raise RuntimeError( error_msg_tmplt % ("annotations", json.dumps(non_k8s_annotations, sort_keys=False, indent=2))) # TODO: Tekton pipeline parameter validation # workflow = workflow.copy() # # Working around Argo lint issue # for argument in workflow['spec'].get('arguments', {}).get('parameters', []): # if 'value' not in argument: # argument['value'] = '' # yaml_text = dump_yaml(workflow) # if '{{pipelineparam' in yaml_text: # raise RuntimeError( # '''Internal compiler error: Found unresolved PipelineParam. # Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues # attaching the pipeline code and the pipeline package.''' # ) # TODO: Tekton lint, if a tool exists for it # # Running Argo lint if available # import shutil # import subprocess # argo_path = shutil.which('argo') # if argo_path: # result = subprocess.run([argo_path, 'lint', '/dev/stdin'], input=yaml_text.encode('utf-8'), # stdout=subprocess.PIPE, stderr=subprocess.PIPE) # if result.returncode: # raise RuntimeError( # '''Internal compiler error: Compiler has produced Argo-incompatible workflow. # Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues # attaching the pipeline code and the pipeline package. # Error: {}'''.format(result.stderr.decode('utf-8')) # ) pass
def _op_to_template(op: BaseOp, pipelinerun_output_artifacts={}, enable_artifacts=False): """Generate template given an operator inherited from BaseOp.""" # initial local variables for tracking volumes and artifacts volume_mount_step_template = [] volume_template = [] mounted_param_paths = [] replaced_param_list = [] artifact_to_result_mapping = {} # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), # but the _outputs_to_json function changes the output names and we must do the same here, # so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ convert_k8s_obj_to_json( ArtifactLocation.create_artifact_for_s3( op.artifact_location, name=name, path=path, key='runs/$PIPELINERUN/$PIPELINETASK/' + name)) for name, path in output_artifact_paths.items() ] if enable_artifacts else [] # workflow template container = convert_k8s_obj_to_json(processed_op.container) # Calling containerOp step as "main" to align with Argo step = {'name': "main"} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } # Create output artifact tracking annotation. if enable_artifacts: for output_artifact in output_artifacts: output_annotation = pipelinerun_output_artifacts.get( processed_op.name, []) output_annotation.append({ 'name': output_artifact['name'], 'path': output_artifact['path'] }) pipelinerun_output_artifacts[ processed_op.name] = output_annotation elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # Flatten manifest because it needs to replace Argo variables manifest = yaml.dump(convert_k8s_obj_to_json( processed_op.k8s_resource), default_flow_style=False) argo_var = False if manifest.find('{{workflow.name}}') != -1: # Kubernetes Pod arguments only take $() as environment variables manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)") # Remove yaml quote in order to read bash variables manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest) argo_var = True # task template template = _get_resourceOp_template(op, processed_op.name, tekton_api_version, manifest, argo_var=argo_var) # initContainers if processed_op.init_containers: template['spec']['steps'] = _prepend_steps( processed_op.init_containers, template['spec']['steps']) # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) if 'artifacts' in inputs: # Leave artifacts for big data passing template['spec']['artifacts'] = inputs['artifacts'] # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) if outputs_dict: copy_results_step = _process_parameters( processed_op, template, outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, mounted_param_paths) copy_artifacts_step = _process_output_artifacts( outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping) if mounted_param_paths: template['spec']['steps'].append(copy_results_step) _update_volumes(template, volume_mount_step_template, volume_template) if copy_artifacts_step: template['spec']['steps'].append(copy_artifacts_step) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): value for key, value in processed_op.pod_annotations.items() } if processed_op.pod_labels: template['metadata']['labels'] = { sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True, max_length=253): sanitize_k8s_name(value, allow_capital_underscore=True, allow_dot=True) for key, value in processed_op.pod_labels.items() } # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volume', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \ processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \ json.dumps(op._metadata.to_dict(), sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \ str(op.execution_options.caching_strategy.max_cache_staleness) return template
def AnySequencer(any: Iterable[Union[dsl.ContainerOp, ConditionOperator]], name: str = None, statusPath: str = None, skippingPolicy: str = None, errorPolicy: str = None, image: str = ANY_SEQUENCER_IMAGE): """A containerOp that will proceed when any of the dependent containerOps completed successfully Args: name: The name of the containerOp. It does not have to be unique within a pipeline because the pipeline will generate a unique new name in case of conflicts. any: List of `Conditional` containerOps that deploy together with the `main` containerOp, or the condtion that must meet to continue. statusPath: The location to write the output stauts skippingPolicy: Determines for the Any Sequencer reacts to no-dependency-condition-matching case. Values can be one of `skipOnNoMatch` or `errorOnNoMatch`, a status with value "Skipped" will be generated and the exit status will still be succeeded on `skipOnNoMatch`. errorPolicy: The standard field, either `failOnError` or `continueOnError`. On `continueOnError`, a status with value "Failed" will be generated but the exit status will still be succeeded. For `Fail_on_error` the Any Sequencer should truly fail in the Tekton terms, as it does now. image: The image to implement the any sequencer logic. Default to dspipelines/any-sequencer:latest. """ arguments = [ "--namespace", "$(context.pipelineRun.namespace)", "--prName", "$(context.pipelineRun.name)" ] tasks_list = [] condition_list = [] file_outputs = None for cop in any: if isinstance(cop, dsl.ContainerOp): cop_name = sanitize_k8s_name(cop.name) tasks_list.append(cop_name) elif isinstance(cop, ConditionOperator): condition_list.append(cop) if len(tasks_list) > 0: task_list_str = "\'" + ",".join(tasks_list) + "\'" arguments.extend(["--taskList", task_list_str]) if statusPath is not None: file_outputs = '{outputPath: %s}' % statusPath arguments.extend(["--statusPath", file_outputs]) if skippingPolicy is not None: assert skippingPolicy == "skipOnNoMatch" or skippingPolicy == "errorOnNoMatch" arguments.extend(["--skippingPolicy", skippingPolicy]) if errorPolicy is not None: assert errorPolicy == "continueOnError" or errorPolicy == "failOnError" arguments.extend(["--errorPolicy", errorPolicy]) conditonArgs = processConditionArgs(condition_list) arguments.extend(conditonArgs) AnyOp_yaml = '''\ name: %s description: 'Proceed when any of the dependents completed successfully' outputs: - {name: %s, description: 'The output file to create the status'} implementation: container: image: %s command: [any-task] args: [%s] ''' % (name, statusPath, image, ",".join(arguments)) AnyOp_template = components.load_component_from_text(AnyOp_yaml) AnyOp = AnyOp_template() return AnyOp
def big_data_passing_tasks(task: dict, inputs_tasks: set, outputs_tasks: set) -> dict: task_name = task.get('metadata', {}).get('name') task_spec = task.get('spec', {}) # Data passing for the task outputs task_outputs = task_spec.get('results', []) for task_output in task_outputs: if (task_name, task_output.get('name')) in outputs_tasks: if not task_spec.setdefault('workspaces', []): task_spec['workspaces'].append({"name": task_name}) # Replace the args for the outputs in the task_spec # $(results.task_output.get('name').path) --> # $(workspaces.task_name.path)/task_name-task_output.get('name') placeholder = '$(results.%s.path)' % (sanitize_k8s_name(task_output.get('name'))) workspaces_parameter = '$(workspaces.%s.path)/%s-%s' % ( task_name, task_name, task_output.get('name')) task['spec'] = replace_big_data_placeholder( task['spec'], placeholder, workspaces_parameter) # Remove artifacts outputs from results task['spec']['results'] = [ result for result in task_outputs if (task_name, result.get('name')) not in outputs_tasks ] # Data passing for task inputs task_spec = task.get('spec', {}) task_parmas = task_spec.get('params', []) task_artifacts = task_spec.get('artifacts', []) for task_parma in task_parmas: if (task_name, task_parma.get('name')) in inputs_tasks: if not task_spec.setdefault('workspaces', []): task_spec['workspaces'].append({"name": task_name}) # Replace the args for the inputs in the task_spec # /tmp/inputs/text/data ----> # $(workspaces.task_name.path)/task_parma.get('name') placeholder = '/tmp/inputs/text/data' for task_artifact in task_artifacts: if task_artifact.get('name') == task_parma.get('name'): placeholder = task_artifact.get('path') workspaces_parameter = '$(workspaces.%s.path)/%s' % ( task_name, task_parma.get('name')) task['spec'] = replace_big_data_placeholder( task_spec, placeholder, workspaces_parameter) # Handle the case of input artifact without dependent the output of other tasks for task_artifact in task_artifacts: if (task_name, task_artifact.get('name')) not in inputs_tasks: # add input artifact processes task = input_artifacts_tasks(task, task_artifact) # Remove artifacts parameter from params task['spec']['params'] = [ parma for parma in task_parmas if (task_name, parma.get('name')) not in inputs_tasks ] # Remove artifacts from task_spec if 'artifacts' in task_spec: del task['spec']['artifacts'] return task
def big_data_passing_tasks(prname: str, task: dict, pipelinerun_template: dict, inputs_tasks: set, outputs_tasks: set, loops_pipeline: dict, loop_name_prefix: str) -> dict: task_name = task.get('name') task_spec = task.get('taskSpec', {}) # Data passing for the task outputs appended_taskrun_name = False for task_output in task.get('taskSpec', {}).get('results', []): if (task_name, task_output.get('name')) in outputs_tasks: if not task.get('taskSpec', {}).setdefault('workspaces', []): task.get('taskSpec', {})['workspaces'].append({"name": task_name}) # Replace the args for the outputs in the task_spec # $(results.task_output.get('name').path) --> # $(workspaces.task_name.path)/task_name-task_output.get('name') placeholder = '$(results.%s.path)' % (sanitize_k8s_name( task_output.get('name'))) workspaces_parameter = '$(workspaces.%s.path)/%s/%s/%s' % ( task_name, BIG_DATA_MIDPATH, "$(context.taskRun.name)", task_output.get('name')) # For child nodes to know the taskrun name, it has to pass to results via /tekton/results emptydir if not appended_taskrun_name: copy_taskrun_name_step = _get_base_step('output-taskrun-name') copy_taskrun_name_step[ 'script'] += 'echo -n "%s" > $(results.taskrun-name.path)\n' % ( "$(context.taskRun.name)") task['taskSpec']['results'].append({"name": "taskrun-name"}) task['taskSpec']['steps'].append(copy_taskrun_name_step) _append_original_pr_name_env(task) appended_taskrun_name = True task['taskSpec'] = replace_big_data_placeholder( task.get("taskSpec", {}), placeholder, workspaces_parameter) artifact_items = pipelinerun_template['metadata']['annotations'][ 'tekton.dev/artifact_items'] artifact_items[task['name']] = replace_big_data_placeholder( artifact_items[task['name']], placeholder, workspaces_parameter) pipelinerun_template['metadata']['annotations']['tekton.dev/artifact_items'] = \ artifact_items task_spec = task.get('taskSpec', {}) task_params = task_spec.get('params', []) task_artifacts = task_spec.get('artifacts', []) # Data passing for task inputs for task_param in task_params: if (task_name, task_param.get('name')) in inputs_tasks: if not task_spec.setdefault('workspaces', []): task_spec['workspaces'].append({"name": task_name}) # Replace the args for the inputs in the task_spec # /tmp/inputs/text/data ----> # $(workspaces.task_name.path)/task_param.get('name') placeholder = '/tmp/inputs/text/data' for task_artifact in task_artifacts: if task_artifact.get('name') == task_param.get('name'): placeholder = task_artifact.get('path') task_param_task_name = "" task_param_param_name = "" for o_task in outputs_tasks: if '-'.join(o_task) == task_param.get('name'): task_param_task_name = o_task[0] task_param_param_name = o_task[1] break # If the param name is constructed with task_name-param_name, # use the current task_name as the path prefix def append_taskrun_params(task_name_append: str): taskrun_param_name = task_name_append + "-trname" inserted_taskrun_param = False for param in task['taskSpec'].get('params', []): if param.get('name', "") == taskrun_param_name: inserted_taskrun_param = True break if not inserted_taskrun_param: task['taskSpec']['params'].append( {"name": taskrun_param_name}) task['params'].append({ "name": taskrun_param_name, "value": "$(tasks.%s.results.taskrun-name)" % task_name_append }) parent_task_queue = [task['name']] while parent_task_queue: current_task = parent_task_queue.pop(0) for loop_name, loop_spec in loops_pipeline.items(): # print(loop_name, loop_spec) if current_task in loop_spec.get('task_list', []): parent_task_queue.append( loop_name.replace(loop_name_prefix, "")) loop_param_names = [ loop_param['name'] for loop_param in loops_pipeline[loop_name]['spec']['params'] ] if task_name_append + '-taskrun-name' in loop_param_names: continue loops_pipeline[loop_name]['spec'][ 'params'].append({ 'name': task_name_append + '-taskrun-name', 'value': '$(tasks.%s.results.taskrun-name)' % task_name_append }) if task_param_task_name: workspaces_parameter = '$(workspaces.%s.path)/%s/$(params.%s-trname)/%s' % ( task_name, BIG_DATA_MIDPATH, task_param_task_name, task_param_param_name) if task_param_task_name != task_name: append_taskrun_params( task_param_task_name ) # need to get taskrun name from parent path else: workspaces_parameter = '$(workspaces.%s.path)/%s/%s/%s' % ( task_name, BIG_DATA_MIDPATH, "$(context.taskRun.name)", task_param.get('name')) _append_original_pr_name_env(task) task['taskSpec'] = replace_big_data_placeholder( task_spec, placeholder, workspaces_parameter) task_spec = task.get('taskSpec', {}) # Handle the case of input artifact without dependent the output of other tasks for task_artifact in task_artifacts: if (task_name, task_artifact.get('name')) not in inputs_tasks: # add input artifact processes task = input_artifacts_tasks(task, task_artifact) if (prname, task_artifact.get('name')) in inputs_tasks: # add input artifact processes for pipeline parameter if not task_artifact.setdefault('raw', {}): for i in range(len(pipelinerun_template['spec']['params'])): param_name = pipelinerun_template['spec']['params'][i][ 'name'] param_value = pipelinerun_template['spec']['params'][i][ 'value'] if (task_artifact.get('name') == param_name): task_artifact['raw']['data'] = param_value task = input_artifacts_tasks_pr_params( task, task_artifact) # If a task produces a result and artifact, add a step to copy artifact to results. artifact_items = pipelinerun_template['metadata']['annotations'][ 'tekton.dev/artifact_items'] add_copy_results_artifacts_step = False if task.get("taskSpec", {}): if task_spec.get('results', []): copy_results_artifact_step = _get_base_step( 'copy-results-artifacts') copy_results_artifact_step[ 'onError'] = 'continue' # supported by v0.27+ of tekton. copy_results_artifact_step['script'] += 'TOTAL_SIZE=0\n' for result in task_spec['results']: if task['name'] in artifact_items: artifact_i = artifact_items[task['name']] for index, artifact_tuple in enumerate(artifact_i): artifact_name, artifact = artifact_tuple src = artifact dst = '$(results.%s.path)' % sanitize_k8s_name( result['name']) if artifact_name == result['name'] and src != dst: add_copy_results_artifacts_step = True copy_results_artifact_step['script'] += ( 'ARTIFACT_SIZE=`wc -c %s | awk \'{print $1}\'`\n' % src + 'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n' + 'touch ' + dst + '\n' + # create an empty file by default. 'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' + ' cp ' + src + ' ' + dst + '\n' + 'fi\n') _append_original_pr_name_env_to_step(copy_results_artifact_step) if add_copy_results_artifacts_step: task['taskSpec']['steps'].append(copy_results_artifact_step) # Remove artifacts parameter from params task.get("taskSpec", {})['params'] = [ param for param in task_spec.get('params', []) if (task_name, param.get('name')) not in inputs_tasks or param.get('name').endswith("-trname") ] # Remove artifacts from task_spec if 'artifacts' in task_spec: del task['taskSpec']['artifacts'] return task