def test_get_parameter_type(self, given_type, expected_type): self.assertEqual(expected_type, type_utils.get_parameter_type(given_type)) # Test get parameter by Python type. self.assertEqual(pb.ParameterType.NUMBER_INTEGER, type_utils.get_parameter_type(int))
def build_component_spec_for_task( task: pipeline_task.PipelineTask, is_exit_task: bool = False, ) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a pipeline task. Args: task: The task to build a ComponentSpec for. is_exit_task: Whether the task is used as exit task in Exit Handler. Returns: A ComponentSpec object for the task. """ component_spec = pipeline_spec_pb2.ComponentSpec() component_spec.executor_label = component_utils.sanitize_executor_label( task.name) for input_name, input_spec in (task.component_spec.inputs or {}).items(): # Special handling for PipelineTaskFinalStatus first. if type_utils.is_task_final_status_type(input_spec.type): if not is_exit_task: raise ValueError( 'PipelineTaskFinalStatus can only be used in an exit task.' ) component_spec.input_definitions.parameters[ input_name].parameter_type = pipeline_spec_pb2.ParameterType.STRUCT continue # skip inputs not present, as a workaround to support optional inputs. if input_name not in task.inputs: continue if type_utils.is_parameter_type(input_spec.type): component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) else: component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_name, output_spec in (task.component_spec.outputs or {}).items(): if type_utils.is_parameter_type(output_spec.type): component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( output_spec.type) else: component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return component_spec
def build_component_inputs_spec( component_spec: pipeline_spec_pb2.ComponentSpec, pipeline_params: List[_pipeline_param.PipelineParam], is_root_component: bool, ) -> None: """Builds component inputs spec from pipeline params. Args: component_spec: The component spec to fill in its inputs spec. pipeline_params: The list of pipeline params. is_root_component: Whether the component is the root. """ for param in pipeline_params: param_name = param.full_name if _for_loop.LoopArguments.name_is_loop_argument(param_name): param.param_type = param.param_type or 'String' input_name = ( param_name if is_root_component else additional_input_name_for_pipelineparam(param_name)) if type_utils.is_parameter_type(param.param_type): component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( param.param_type) elif input_name not in getattr(component_spec.input_definitions, 'parameters', []): component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(param.param_type))
def build_component_spec_from_structure( component_spec: structures.ComponentSpec, executor_label: str, actual_inputs: List[str], ) -> pipeline_spec_pb2.ComponentSpec: """Builds an IR ComponentSpec instance from structures.ComponentSpec. Args: component_spec: The structure component spec. executor_label: The executor label. actual_inputs: The actual arugments passed to the task. This is used as a short term workaround to support optional inputs in component spec IR. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = executor_label for input_spec in component_spec.inputs or []: # skip inputs not present if input_spec.name not in actual_inputs: continue if type_utils.is_parameter_type(input_spec.type): result.input_definitions.parameters[ input_spec.name].parameter_type = type_utils.get_parameter_type( input_spec.type) else: result.input_definitions.artifacts[ input_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_spec in component_spec.outputs or []: if type_utils.is_parameter_type(output_spec.type): result.output_definitions.parameters[ output_spec .name].parameter_type = type_utils.get_parameter_type( output_spec.type) else: result.output_definitions.artifacts[ output_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return result
def build_component_spec_for_task( task: pipeline_task.PipelineTask) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a pipeline task. Args: task: The task to build a ComponentSpec for. Returns: A ComponentSpec object for the task. """ component_spec = pipeline_spec_pb2.ComponentSpec() component_spec.executor_label = component_utils.sanitize_executor_label( task.name) for input_name, input_spec in (task.component_spec.inputs or {}).items(): # skip inputs not present, as a workaround to support optional inputs. if input_name not in task.inputs: continue if type_utils.is_parameter_type(input_spec.type): component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) else: component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_name, output_spec in (task.component_spec.outputs or {}).items(): if type_utils.is_parameter_type(output_spec.type): component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( output_spec.type) else: component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return component_spec
def build_component_spec_for_group( pipeline_channels: List[pipeline_channel.PipelineChannel], is_root_group: bool, ) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a TasksGroup. Args: group: The group to build a ComponentSpec for. pipeline_channels: The list of pipeline channels referenced by the group. Returns: A PipelineTaskSpec object representing the loop group. """ component_spec = pipeline_spec_pb2.ComponentSpec() for channel in pipeline_channels: input_name = ( channel.name if is_root_group else _additional_input_name_for_pipeline_channel(channel)) if isinstance(channel, pipeline_channel.PipelineArtifactChannel): component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(channel.channel_type)) else: # channel is one of PipelineParameterChannel, LoopArgument, or # LoopArgumentVariable. component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( channel.channel_type) # TODO: should we fill in default value for all groups and tasks? if is_root_group: _fill_in_component_input_default_value( component_spec=component_spec, input_name=input_name, default_value=channel.value, ) return component_spec
def build_component_outputs_spec( component_spec: pipeline_spec_pb2.ComponentSpec, pipeline_params: List[_pipeline_param.PipelineParam], ) -> None: """Builds component outputs spec from pipeline params. Args: component_spec: The component spec to fill in its outputs spec. pipeline_params: The list of pipeline params. """ for param in pipeline_params or []: output_name = param.full_name if type_utils.is_parameter_type(param.param_type): component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( param.param_type) elif output_name not in getattr(component_spec.output_definitions, 'parameters', []): component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(param.param_type))
def _get_value(param: _pipeline_param.PipelineParam) -> struct_pb2.Value: assert param.value is not None, 'None values should be filtered out.' result = struct_pb2.Value() # TODO(chensun): remove defaulting to 'String' for None param_type once we # fix importer behavior. param_type = type_utils.get_parameter_type(param.param_type or 'String') if (param_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER or param_type == pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE): result.number_value = float(param.value) elif param_type == pipeline_spec_pb2.ParameterType.STRING: result.string_value = param.value elif param_type == pipeline_spec_pb2.ParameterType.BOOLEAN: result.bool_value = param.value elif param_type == pipeline_spec_pb2.ParameterType.LIST: result.list_value.extend(param.value) elif param_type == pipeline_spec_pb2.ParameterType.STRUCT: result.struct_value.update(param.value) else: raise ValueError('Unknown type for PipelineParam {}'.format(param)) return result
def _resolve_condition_operands( left_operand: Union[str, pipeline_channel.PipelineChannel], right_operand: Union[str, pipeline_channel.PipelineChannel], ) -> Tuple[str, str]: """Resolves values and PipelineChannels for condition operands. Args: left_operand: The left operand of a condition expression. right_operand: The right operand of a condition expression. Returns: A tuple of the resolved operands values: (left_operand_value, right_operand_value). """ # Pre-scan the operand to get the type of constant value if there's any. # The value_type can be used to backfill missing PipelineChannel.channel_type. value_type = None for value_or_reference in [left_operand, right_operand]: if isinstance(value_or_reference, pipeline_channel.PipelineChannel): parameter_type = type_utils.get_parameter_type( value_or_reference.channel_type) if parameter_type in [ pipeline_spec_pb2.ParameterType.STRUCT, pipeline_spec_pb2.ParameterType.LIST, pipeline_spec_pb2.ParameterType .PARAMETER_TYPE_ENUM_UNSPECIFIED, ]: input_name = _additional_input_name_for_pipeline_channel( value_or_reference) raise ValueError('Conditional requires scalar parameter values' ' for comparison. Found input "{}" of type {}' ' in pipeline definition instead.'.format( input_name, value_or_reference.channel_type)) parameter_types = set() for value_or_reference in [left_operand, right_operand]: if isinstance(value_or_reference, pipeline_channel.PipelineChannel): parameter_type = type_utils.get_parameter_type( value_or_reference.channel_type) else: parameter_type = type_utils.get_parameter_type( type(value_or_reference).__name__) parameter_types.add(parameter_type) if len(parameter_types) == 2: # Two different types being compared. The only possible types are # String, Boolean, Double and Integer. We'll promote the other type # using the following precedence: # String > Boolean > Double > Integer if pipeline_spec_pb2.ParameterType.STRING in parameter_types: canonical_parameter_type = pipeline_spec_pb2.ParameterType.STRING elif pipeline_spec_pb2.ParameterType.BOOLEAN in parameter_types: canonical_parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN else: # Must be a double and int, promote to double. assert pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE in parameter_types, \ 'Types: {} [{} {}]'.format( parameter_types, left_operand, right_operand) assert pipeline_spec_pb2.ParameterType.NUMBER_INTEGER in parameter_types, \ 'Types: {} [{} {}]'.format( parameter_types, left_operand, right_operand) canonical_parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE elif len(parameter_types) == 1: # Both operands are the same type. canonical_parameter_type = parameter_types.pop() else: # Probably shouldn't happen. raise ValueError('Unable to determine operand types for' ' "{}" and "{}"'.format(left_operand, right_operand)) operand_values = [] for value_or_reference in [left_operand, right_operand]: if isinstance(value_or_reference, pipeline_channel.PipelineChannel): input_name = _additional_input_name_for_pipeline_channel( value_or_reference) operand_value = "inputs.parameter_values['{input_name}']".format( input_name=input_name) parameter_type = type_utils.get_parameter_type( value_or_reference.channel_type) if parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER: operand_value = 'int({})'.format(operand_value) elif isinstance(value_or_reference, str): operand_value = "'{}'".format(value_or_reference) parameter_type = pipeline_spec_pb2.ParameterType.STRING elif isinstance(value_or_reference, bool): # Booleans need to be compared as 'true' or 'false' in CEL. operand_value = str(value_or_reference).lower() parameter_type = pipeline_spec_pb2.ParameterType.BOOLEAN elif isinstance(value_or_reference, int): operand_value = str(value_or_reference) parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_INTEGER else: assert isinstance(value_or_reference, float), value_or_reference operand_value = str(value_or_reference) parameter_type = pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE if parameter_type != canonical_parameter_type: # Type-cast to so CEL does not complain. if canonical_parameter_type == pipeline_spec_pb2.ParameterType.STRING: assert parameter_type in [ pipeline_spec_pb2.ParameterType.BOOLEAN, pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, ] operand_value = "'{}'".format(operand_value) elif canonical_parameter_type == pipeline_spec_pb2.ParameterType.BOOLEAN: assert parameter_type in [ pipeline_spec_pb2.ParameterType.NUMBER_INTEGER, pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE, ] operand_value = 'true' if int(operand_value) == 0 else 'false' else: assert canonical_parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_DOUBLE assert parameter_type == pipeline_spec_pb2.ParameterType.NUMBER_INTEGER operand_value = 'double({})'.format(operand_value) operand_values.append(operand_value) return tuple(operand_values)
def _create_container_op_from_component_and_arguments( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: Optional[_structures.ComponentReference] = None, ) -> _container_op.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: (only for v1) The component references. Returns: A ContainerOp instance. """ # Add component inputs with default value to the arguments dict if they are not # in the arguments dict already. arguments = arguments.copy() for input_spec in component_spec.inputs or []: if input_spec.name not in arguments and input_spec.default is not None: default_value = input_spec.default if input_spec.type == 'Integer': default_value = int(default_value) elif input_spec.type == 'Float': default_value = float(default_value) elif (type_utils.is_parameter_type(input_spec.type) and kfp.COMPILING_FOR_V2): parameter_type = type_utils.get_parameter_type(input_spec.type) default_value = type_utils.deserialize_parameter_value( value=default_value, parameter_type=parameter_type) arguments[input_spec.name] = default_value # Check types of the reference arguments and serialize PipelineParams original_arguments = arguments arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, _pipeline_param.PipelineParam): input_type = component_spec._inputs_dict[input_name].type argument_type = argument_value.param_type types.verify_type_compatibility( argument_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if isinstance(argument_value, _container_op.ContainerOp): raise TypeError( 'ContainerOp object was passed to component as an input argument. ' 'Pass a single output instead.') placeholder_resolver = ExtraPlaceholderResolver() resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, placeholder_resolver=placeholder_resolver.resolve_placeholder, ) container_spec = component_spec.implementation.container old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True output_paths = collections.OrderedDict(resolved_cmd.output_paths or {}) output_paths.update(placeholder_resolver.output_paths) input_paths = collections.OrderedDict(resolved_cmd.input_paths or {}) input_paths.update(placeholder_resolver.input_paths) artifact_argument_paths = [ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in input_paths.items() ] task = _container_op.ContainerOp( name=component_spec.name or _components._default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=output_paths, artifact_argument_paths=artifact_argument_paths, ) _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta, original_arguments) if component_ref: component_ref_without_spec = copy.copy(component_ref) component_ref_without_spec.spec = None task._component_ref = component_ref_without_spec task._parameter_arguments = resolved_cmd.inputs_consumed_by_value name_to_spec_type = {} if component_meta.inputs: name_to_spec_type = { input.name: { 'type': input.type, 'default': input.default, } for input in component_meta.inputs } if kfp.COMPILING_FOR_V2: for name, spec_type in name_to_spec_type.items(): if (name in original_arguments and type_utils.is_parameter_type(spec_type['type'])): if isinstance(original_arguments[name], (list, dict)): task._parameter_arguments[name] = json.dumps( original_arguments[name]) else: task._parameter_arguments[name] = str( original_arguments[name]) for name in list(task.artifact_arguments.keys()): if name in task._parameter_arguments: del task.artifact_arguments[name] for name in list(task.input_artifact_paths.keys()): if name in task._parameter_arguments: del task.input_artifact_paths[name] # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. # Outputs with non-pythonic names could not be accessed using their original # names. Now ContainerOp supports any output names, so we're now using the # original output names. However to support legacy pipelines, we're also # adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = _naming.generate_unique_name_conversion_table( output_names, _naming._sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if container_spec.env: from kubernetes import client as k8s_client for name, value in container_spec.env.items(): task.container.add_env_variable( k8s_client.V1EnvVar(name=name, value=value)) if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' _attach_v2_specs(task, component_spec, original_arguments) return task