def test_get_input_artifact_type_schema(self): input_specs = [ structures.InputSpec(name='input1', type='String'), structures.InputSpec(name='input2', type='Model'), structures.InputSpec(name='input3', type=None), ] # input not found. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input0', input_specs) self.assertEqual('Input not found.', str(cm)) # input found, but it doesn't map to an artifact type. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input1', input_specs) self.assertEqual('Input is not an artifact type.', str(cm)) # input found, and a matching artifact type schema returned. self.assertEqual( 'title: kfp.Model\ntype: object\nproperties:\n framework:\n type: string\n framework_version:\n type: string\n', type_utils.get_input_artifact_type_schema('input2', input_specs)) # input found, and the default artifact type schema returned. self.assertEqual( 'title: kfp.Artifact\ntype: object\n', type_utils.get_input_artifact_type_schema('input3', input_specs))
def test_get_input_artifact_type_schema(self): input_specs = [ structures.InputSpec(name='input1', type='String'), structures.InputSpec(name='input2', type='Model'), structures.InputSpec(name='input3', type=None), ] # input not found. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input0', input_specs) self.assertEqual('Input not found.', str(cm)) # input found, but it doesn't map to an artifact type. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input1', input_specs) self.assertEqual('Input is not an artifact type.', str(cm)) # input found, and a matching artifact type schema returned. self.assertEqual( 'system.Model', type_utils.get_input_artifact_type_schema( 'input2', input_specs).schema_title) # input found, and the default artifact type schema returned. self.assertEqual( 'system.Artifact', type_utils.get_input_artifact_type_schema( 'input3', input_specs).schema_title)
def _group_to_dag_spec( self, group: dsl.OpsGroup, inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], dependencies: Dict[str, List[_GroupOrOp]], pipeline_spec: pipeline_spec_pb2.PipelineSpec, deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig, rootgroup_name: str, ) -> None: """Generate IR spec given an OpsGroup. Args: group: The OpsGroup to generate spec for. inputs: The inputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). outputs: The outputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). dependencies: The group dependencies dictionary. The keys are group/op names, and the values are lists of dependent groups/ops. pipeline_spec: The pipeline_spec to update in-place. deployment_config: The deployment_config to hold all executors. rootgroup_name: The name of the group root. Used to determine whether the component spec for the current group should be the root dag. """ group_component_name = dsl_utils.sanitize_component_name(group.name) if group.name == rootgroup_name: group_component_spec = pipeline_spec.root else: group_component_spec = pipeline_spec.components[group_component_name] # Generate task specs and component specs for the dag. subgroups = group.groups + group.ops for subgroup in subgroups: subgroup_task_spec = getattr(subgroup, 'task_spec', pipeline_spec_pb2.PipelineTaskSpec()) subgroup_component_spec = getattr(subgroup, 'component_spec', pipeline_spec_pb2.ComponentSpec()) is_loop_subgroup = (isinstance(group, dsl.ParallelFor)) is_recursive_subgroup = ( isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref) # Special handling for recursive subgroup: use the existing opsgroup name if is_recursive_subgroup: subgroup_key = subgroup.recursive_ref.name else: subgroup_key = subgroup.name subgroup_task_spec.task_info.name = ( subgroup_task_spec.task_info.name or dsl_utils.sanitize_task_name(subgroup_key)) # human_name exists for ops only, and is used to de-dupe component spec. subgroup_component_name = ( subgroup_task_spec.component_ref.name or dsl_utils.sanitize_component_name( getattr(subgroup, 'human_name', subgroup_key))) subgroup_task_spec.component_ref.name = subgroup_component_name if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph': raise NotImplementedError( 'dsl.graph_component is not yet supported in KFP v2 compiler.') if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler': raise NotImplementedError( 'dsl.ExitHandler is not yet supported in KFP v2 compiler.') importer_tasks = [] # Add importer node when applicable for input_name in subgroup_task_spec.inputs.artifacts: if not subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, subgroup._metadata.inputs) importer_name = importer_node.generate_importer_base_name( dependent_task_name=subgroup_task_spec.task_info.name, input_name=input_name) importer_task_spec = importer_node.build_importer_task_spec( importer_name) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name=importer_name, input_name=input_name, input_type_schema=type_schema) importer_task_name = importer_task_spec.task_info.name importer_comp_name = importer_task_spec.component_ref.name importer_exec_label = importer_comp_spec.executor_label group_component_spec.dag.tasks[importer_task_name].CopyFrom( importer_task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( importer_comp_spec) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( importer_task_name) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( importer_node.OUTPUT_KEY) # Retrieve the pre-built importer spec importer_spec = subgroup.importer_specs[input_name] deployment_config.executors[importer_exec_label].importer.CopyFrom( importer_spec) importer_tasks.append(importer_task_name) group_inputs = inputs.get(group.name, []) subgroup_inputs = inputs.get(subgroup.name, []) subgroup_params = [param for param, _ in subgroup_inputs] tasks_in_current_dag = [ dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups ] + importer_tasks is_parent_component_root = group_component_spec == pipeline_spec.root # Additional spec modifications for dsl.ParallelFor's subgroups. if is_loop_subgroup: self._update_loop_specs(group, subgroup, group_component_spec, subgroup_component_spec, subgroup_task_spec) elif isinstance(subgroup, dsl.ContainerOp): dsl_component_spec.update_task_inputs_spec( subgroup_task_spec, group_component_spec.input_definitions, subgroup_params, tasks_in_current_dag, ) if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition': # "punch the hole", adding inputs needed by its subgroup or tasks. dsl_component_spec.build_component_inputs_spec( component_spec=subgroup_component_spec, pipeline_params=subgroup_params, is_root_component=False, ) dsl_component_spec.build_task_inputs_spec( subgroup_task_spec, subgroup_params, tasks_in_current_dag, is_parent_component_root, ) condition = subgroup.condition operand_values = [] for operand in [condition.operand1, condition.operand2]: operand_values.append(self._resolve_value_or_reference(operand)) condition_string = '{} {} {}'.format(operand_values[0], condition.operator, operand_values[1]) subgroup_task_spec.trigger_policy.CopyFrom( pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy( condition=condition_string)) # Generate dependencies section for this task. if dependencies.get(subgroup.name, None): group_dependencies = list(dependencies[subgroup.name]) group_dependencies.sort() subgroup_task_spec.dependent_tasks.extend( [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies]) if isinstance(subgroup, dsl.ParallelFor): if subgroup.parallelism is not None: warnings.warn( 'Setting parallelism in ParallelFor is not supported yet.' 'The setting is ignored.') # Remove loop arguments related inputs from parent group component spec. input_names = [param.full_name for param, _ in inputs[subgroup.name]] for input_name in input_names: if _for_loop.LoopArguments.name_is_loop_argument(input_name): dsl_component_spec.pop_input_from_component_spec( group_component_spec, input_name) if subgroup.items_is_pipeline_param: # These loop args are a 'withParam' rather than 'withItems'. # i.e., rather than a static list, they are either the output of # another task or were input as global pipeline parameters. pipeline_param = subgroup.loop_args.items_or_pipeline_param input_parameter_name = pipeline_param.full_name if pipeline_param.op_name: subgroup_task_spec.inputs.parameters[ input_parameter_name].task_output_parameter.producer_task = ( dsl_utils.sanitize_task_name(pipeline_param.op_name)) subgroup_task_spec.inputs.parameters[ input_parameter_name].task_output_parameter.output_parameter_key = ( pipeline_param.name) else: subgroup_task_spec.inputs.parameters[ input_parameter_name].component_input_parameter = ( input_parameter_name) if pipeline_param.op_name is None: # Input parameter is from pipeline func rather than component output. # Correct loop argument input type in the parent component spec. # The loop argument was categorized as an artifact due to its missing # or non-primitive type annotation. But it should always be String # typed, as its value is a serialized JSON string. dsl_component_spec.pop_input_from_component_spec( group_component_spec, input_parameter_name) group_component_spec.input_definitions.parameters[ input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING # Add component spec if not exists if subgroup_component_name not in pipeline_spec.components: pipeline_spec.components[subgroup_component_name].CopyFrom( subgroup_component_spec) # Add task spec group_component_spec.dag.tasks[ subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec) # Add executor spec, if applicable. container_spec = getattr(subgroup, 'container_spec', None) if container_spec: if compiler_utils.is_v2_component(subgroup): compiler_utils.refactor_v2_container_spec(container_spec) executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[executor_label].container.CopyFrom( container_spec) # Add AIPlatformCustomJobSpec, if applicable. custom_job_spec = getattr(subgroup, 'custom_job_spec', None) if custom_job_spec: executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[ executor_label].custom_job.custom_job.update(custom_job_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config))
def _attach_v2_specs( task: _container_op.ContainerOp, component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], ) -> None: """Attaches v2 specs to a ContainerOp object. Args: task: The ContainerOp object to attach IR specs. component_spec: The component spec object. arguments: The dictionary of component arguments. """ # Attach v2_specs to the ContainerOp object regardless whether the pipeline is # being compiled to v1 (Argo yaml) or v2 (IR json). # However, there're different behaviors for the two cases. Namely, resolved # commands and arguments, error handling, etc. # Regarding the difference in error handling, v2 has a stricter requirement on # input type annotation. For instance, an input without any type annotation is # viewed as an artifact, and if it's paired with InputValuePlaceholder, an # error will be thrown at compile time. However, we cannot raise such an error # in v1, as it wouldn't break existing pipelines. is_compiling_for_v2 = False for frame in inspect.stack(): if '_create_pipeline_v2' in frame: is_compiling_for_v2 = True break def _resolve_commands_and_args_v2( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], ) -> _components._ResolvedCommandLineAndPaths: """Resolves the command line argument placeholders for v2 (IR). Args: component_spec: The component spec object. arguments: The dictionary of component arguments. Returns: A named tuple: _components._ResolvedCommandLineAndPaths. """ inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_uri_placeholder(input_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputUriPlaceholder.'.format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key) def _input_artifact_path_placeholder(input_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputPathPlaceholder.'.format( input_key, inputs_dict[input_key].type)) elif is_compiling_for_v2 and input_key in importer_specs: raise TypeError( 'Input "{}" with type "{}" is not connected to any upstream output. ' 'However it is used with InputPathPlaceholder. ' 'If you want to import an existing artifact using a system-connected' ' importer node, use InputUriPlaceholder instead. ' 'Or if you just want to pass a string parameter, use string type and' ' InputValuePlaceholder instead.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].path}}}}".format( input_key) def _input_parameter_placeholder(input_key: str) -> str: if is_compiling_for_v2 and not type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputValuePlaceholder.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) def _output_artifact_uri_placeholder(output_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( outputs_dict[output_key].type): raise TypeError( 'Output "{}" with type "{}" cannot be paired with ' 'OutputUriPlaceholder.'.format( output_key, outputs_dict[output_key].type)) else: return "{{{{$.outputs.artifacts['{}'].uri}}}}".format( output_key) def _output_artifact_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key) def _output_parameter_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format( output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_path_placeholder(output_key) else: return _output_artifact_path_placeholder(output_key) placeholder_resolver = ExtraPlaceholderResolver() def _resolve_ir_placeholders_v2( arg, component_spec: _structures.ComponentSpec, arguments: dict, ) -> str: inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } if isinstance(arg, _structures.InputValuePlaceholder): input_name = arg.input_name input_value = arguments.get(input_name, None) if input_value is not None: return _input_parameter_placeholder(input_name) else: input_spec = inputs_dict[input_name] if input_spec.optional: return None else: raise ValueError( 'No value provided for input {}'.format( input_name)) elif isinstance(arg, _structures.InputUriPlaceholder): input_name = arg.input_name if input_name in arguments: input_uri = _input_artifact_uri_placeholder(input_name) return input_uri else: input_spec = inputs_dict[input_name] if input_spec.optional: return None else: raise ValueError( 'No value provided for input {}'.format( input_name)) elif isinstance(arg, _structures.OutputUriPlaceholder): output_name = arg.output_name output_uri = _output_artifact_uri_placeholder(output_name) return output_uri return placeholder_resolver.resolve_placeholder( arg=arg, component_spec=component_spec, arguments=arguments, ) resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, input_path_generator=_input_artifact_path_placeholder, output_path_generator=_resolve_output_path_placeholder, placeholder_resolver=_resolve_ir_placeholders_v2, ) return resolved_cmd pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() # Keep track of auto-injected importer spec. importer_specs = {} # Check types of the reference arguments and serialize PipelineParams original_arguments = arguments arguments = arguments.copy() # Preserver input params for ContainerOp.inputs input_params = list( set([ param for param in arguments.values() if isinstance(param, _pipeline_param.PipelineParam) ])) for input_name, argument_value in arguments.items(): if isinstance(argument_value, _pipeline_param.PipelineParam): input_type = component_spec._inputs_dict[input_name].type reference_type = argument_value.param_type types.verify_type_compatibility( reference_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if type_utils.is_parameter_type(input_type): if argument_value.op_name: pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.producer_task = ( dsl_utils.sanitize_task_name( argument_value.op_name)) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( argument_value.name) else: pipeline_task_spec.inputs.parameters[ input_name].component_input_parameter = argument_value.name else: if argument_value.op_name: pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( dsl_utils.sanitize_task_name( argument_value.op_name)) pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( argument_value.name) elif is_compiling_for_v2: # argument_value.op_name could be none, in which case an importer node # will be inserted later. # Importer node is only applicable for v2 engine. pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_specs[ input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, pipeline_param_name=argument_value.name) elif isinstance(argument_value, str): pipeline_params = _pipeline_param.extract_pipelineparams_from_any( argument_value) if pipeline_params and is_compiling_for_v2: # argument_value contains PipelineParam placeholders which needs to be # replaced. And the input needs to be added to the task spec. for param in pipeline_params: # Form the name for the compiler injected input, and make sure it # doesn't collide with any existing input names. additional_input_name = ( dsl_component_spec. additional_input_name_for_pipelineparam(param)) for existing_input_name, _ in arguments.items(): if existing_input_name == additional_input_name: raise ValueError( 'Name collision between existing input name ' '{} and compiler injected input name {}'. format(existing_input_name, additional_input_name)) additional_input_placeholder = ( "{{{{$.inputs.parameters['{}']}}}}".format( additional_input_name)) argument_value = argument_value.replace( param.pattern, additional_input_placeholder) # The output references are subject to change -- the producer task may # not be whitin the same DAG. if param.op_name: pipeline_task_spec.inputs.parameters[ additional_input_name].task_output_parameter.producer_task = ( dsl_utils.sanitize_task_name(param.op_name)) pipeline_task_spec.inputs.parameters[ additional_input_name].task_output_parameter.output_parameter_key = param.name else: pipeline_task_spec.inputs.parameters[ additional_input_name].component_input_parameter = param.full_name input_type = component_spec._inputs_dict[input_name].type if type_utils.is_parameter_type(input_type): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.string_value = ( argument_value) elif is_compiling_for_v2: # An importer node with constant value artifact_uri will be inserted. # Importer node is only applicable for v2 engine. pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_specs[input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, constant_value=argument_value) elif isinstance(argument_value, int): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.int_value = argument_value elif isinstance(argument_value, float): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.double_value = argument_value elif isinstance(argument_value, _container_op.ContainerOp): raise TypeError( 'ContainerOp object {} was passed to component as an input argument. ' 'Pass a single output instead.'.format(input_name)) else: if is_compiling_for_v2: raise NotImplementedError( 'Input argument supports only the following types: PipelineParam' ', str, int, float. Got: "{}".'.format(argument_value)) if not component_spec.name: component_spec.name = _components._default_component_name # task.name is unique at this point. pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name( task.name)) resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec, arguments=original_arguments) task.container_spec = ( pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec( image=component_spec.implementation.container.image, command=resolved_cmd.command, args=resolved_cmd.args)) # TODO(chensun): dedupe IR component_spec and contaienr_spec pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name( task.name)) executor_label = dsl_utils.sanitize_executor_label(task.name) task.component_spec = dsl_component_spec.build_component_spec_from_structure( component_spec, executor_label, arguments.keys()) task.task_spec = pipeline_task_spec task.importer_specs = importer_specs # Override command and arguments if compiling to v2. if is_compiling_for_v2: task.command = resolved_cmd.command task.arguments = resolved_cmd.args # limit this to v2 compiling only to avoid possible behavior change in v1. task.inputs = input_params