def test_command_if_input_value_then(self): component_text = '''\ inputs: - {name: Do test, type: Boolean, optional: true} - {name: Test data, type: Integer, optional: true} - {name: Test parameter 1, optional: true} implementation: container: image: busybox args: - if: cond: {inputValue: Do test} then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}] ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1(True, 'test_data.txt', '42') resolved_cmd_then = _resolve_command_line_and_paths( task_then.component_ref.spec, task_then.arguments) self.assertEqual( resolved_cmd_then.args, ['--test-data', 'test_data.txt', '--test-param1', '42']) task_else = task_factory1() resolved_cmd_else = _resolve_command_line_and_paths( task_else.component_ref.spec, task_else.arguments) self.assertEqual(resolved_cmd_else.args, [])
def helper_test_component_using_local_call(self, component_task_factory: Callable, arguments: dict = None, expected_output_values: dict = None): arguments = arguments or {} expected_output_values = expected_output_values or {} with tempfile.TemporaryDirectory() as temp_dir_name: # Creating task from the component. # We do it in a special context that allows us to control the output file locations. inputs_path = Path(temp_dir_name) / 'inputs' outputs_path = Path(temp_dir_name) / 'outputs' with components_override_input_output_dirs_context(str(inputs_path), str(outputs_path)): task = component_task_factory(**arguments) resolved_cmd = _resolve_command_line_and_paths( task.component_ref.spec, task.arguments, ) # Preparing input files for input_name, input_file_path in (resolved_cmd.input_paths or {}).items(): Path(input_file_path).parent.mkdir(parents=True, exist_ok=True) Path(input_file_path).write_text(str(arguments[input_name])) # Constructing the full command-line from resolved command+args full_command = resolved_cmd.command + resolved_cmd.args # Executing the command-line locally subprocess.run(full_command, check=True) actual_output_values_dict = {output_name: Path(output_path).read_text() for output_name, output_path in resolved_cmd.output_paths.items()} self.assertDictEqual(actual_output_values_dict, expected_output_values)
def test_conflicting_name_renaming_stability(self): # Checking that already pythonic input names are not renamed # Checking that renaming is deterministic component_text = textwrap.dedent('''\ inputs: - {name: Input 1} - {name: Input_1} - {name: Input-1} - {name: input_1} # Last in the list, but is pythonic, so it should not be renamed implementation: container: image: busybox command: - inputValue: Input 1 - inputValue: Input_1 - inputValue: Input-1 - inputValue: input_1 ''') task_factory1 = comp.load_component(text=component_text) task1 = task_factory1( input_1_2='value_1_2', input_1_3='value_1_3', input_1_4='value_1_4', input_1='value_1', # Expecting this input not to be renamed ) resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.command, ['value_1_2', 'value_1_3', 'value_1_4', 'value_1'])
def helper_test_2_in_2_out_component_using_local_call( self, func, op, output_names): arg1 = float(3) arg2 = float(5) expected_tuple = func(arg1, arg2) expected1_str = str(expected_tuple[0]) expected2_str = str(expected_tuple[1]) with tempfile.TemporaryDirectory() as temp_dir_name: with components_local_output_dir_context(temp_dir_name): task = op(arg1, arg2) resolved_cmd = _resolve_command_line_and_paths( task.component_ref.spec, task.arguments, ) full_command = resolved_cmd.command + resolved_cmd.args subprocess.run(full_command, check=True) (output_path1, output_path2) = (resolved_cmd.output_paths[output_names[0]], resolved_cmd.output_paths[output_names[1]]) actual1_str = Path(output_path1).read_text() actual2_str = Path(output_path2).read_text() self.assertEqual(float(actual1_str), float(expected1_str)) self.assertEqual(float(actual2_str), float(expected2_str))
def test_handle_default_values_in_task_factory(self): component_text = '''\ inputs: - {name: Data, default: '123'} implementation: container: image: busybox args: - {inputValue: Data} ''' task_factory1 = comp.load_component_from_text(text=component_text) task1 = task_factory1() resolved_cmd1 = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd1.args, ['123']) task2 = task_factory1('456') resolved_cmd2 = _resolve_command_line_and_paths( task2.component_ref.spec, task2.arguments) self.assertEqual(resolved_cmd2.args, ['456'])
def test_command_if_is_present_then_else(self): component_text = '''\ inputs: - {name: In, optional: true} implementation: container: image: busybox args: - if: cond: {isPresent: In} then: [--in, {inputValue: In}] else: --no-in ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1('data') resolved_cmd_then = _resolve_command_line_and_paths( task_then.component_ref.spec, task_then.arguments) self.assertEqual(resolved_cmd_then.args, ['--in', 'data']) task_else = task_factory1() resolved_cmd_else = _resolve_command_line_and_paths( task_else.component_ref.spec, task_else.arguments) self.assertEqual(resolved_cmd_else.args, ['--no-in'])
def test_handling_list_arguments_containing_pipelineparam(self): '''Checks that lists containing PipelineParam can be properly serialized''' def consume_list(list_param: list) -> int: pass import kfp task_factory = comp.func_to_container_op(consume_list) task = task_factory([1, 2, 3, kfp.dsl.PipelineParam("aaa"), 4, 5, 6]) resolved_cmd = _resolve_command_line_and_paths( task.component_ref.spec, task.arguments, ) full_command_line = resolved_cmd.command + resolved_cmd.args for arg in full_command_line: self.assertNotIn('PipelineParam', arg)
def test_command_if_true_string_then_else(self): component_text = '''\ implementation: container: image: busybox args: - if: cond: 'true' then: --true-arg else: --false-arg ''' task_factory1 = comp.load_component(text=component_text) task = task_factory1() resolved_cmd = _resolve_command_line_and_paths(task.component_ref.spec, task.arguments) self.assertEqual(resolved_cmd.args, ['--true-arg'])
def test_command_concat(self): component_text = '''\ inputs: - {name: In1} - {name: In2} implementation: container: image: busybox args: - concat: [{inputValue: In1}, {inputValue: In2}] ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some', 'data') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args, ['somedata'])
def test_input_value_resolving(self): component_text = '''\ inputs: - {name: Data} implementation: container: image: busybox args: - --data - inputValue: Data ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some-data') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args, ['--data', 'some-data'])
def test_input_path_placeholder_with_constant_argument(self): component_text = '''\ inputs: - {name: input 1} implementation: container: image: busybox command: - --input-data - {inputPath: input 1} ''' task_factory1 = comp.load_component_from_text(component_text) task1 = task_factory1('Text') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.command, ['--input-data', resolved_cmd.input_paths['input 1']]) self.assertEqual(task1.arguments, {'input 1': 'Text'})
def test_automatic_output_resolving(self): component_text = '''\ outputs: - {name: Data} implementation: container: image: busybox args: - --output-data - {outputPath: Data} ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1() resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(len(resolved_cmd.args), 2) self.assertEqual(resolved_cmd.args[0], '--output-data') self.assertTrue(resolved_cmd.args[1].startswith('/'))
def _test_load_component_from_file(self, component_path: str): task_factory1 = comp.load_component_from_file(component_path) arg1 = 3 arg2 = 5 task1 = task_factory1(arg1, arg2) self.assertEqual(task_factory1.__name__, 'Add') self.assertEqual(task_factory1.__doc__.strip(), 'Add\nReturns sum of two arguments') self.assertEqual( task1.component_ref.spec.implementation.container.image, 'python:3.5') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args[0], str(arg1)) self.assertEqual(resolved_cmd.args[1], str(arg2))
def test_missing_optional_input_file_argument(self): '''Missing optional inputs should resolve to nothing''' component_text = '''\ inputs: - {name: input 1, optional: true} implementation: container: image: busybox command: - a - {inputPath: input 1} - z ''' task_factory1 = comp.load_component_from_text(component_text) task1 = task_factory1() resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.command, ['a', 'z'])
def helper_test_2_in_1_out_component_using_local_call(self, func, op, arguments=[3., 5.]): expected = func(arguments[0], arguments[1]) if isinstance(expected, tuple): expected = expected[0] expected_str = str(expected) with tempfile.TemporaryDirectory() as temp_dir_name: with components_local_output_dir_context(temp_dir_name): task = op(arguments[0], arguments[1]) resolved_cmd = _resolve_command_line_and_paths( task.component_ref.spec, task.arguments, ) full_command = resolved_cmd.command + resolved_cmd.args subprocess.run(full_command, check=True) output_path = list(resolved_cmd.output_paths.values())[0] actual_str = Path(output_path).read_text() self.assertEqual(float(actual_str), float(expected_str))
def test_load_component_from_url(self): url = 'https://raw.githubusercontent.com/kubeflow/pipelines/e54fe675432cfef1d115a7a2909f08ed95ea8933/sdk/python/tests/components/test_data/python_add.component.yaml' import requests resp = requests.get(url) component_text = resp.content component_dict = load_yaml(component_text) task_factory1 = comp.load_component_from_url(url) self.assertEqual( task_factory1.__doc__, component_dict['name'] + '\n' + component_dict['description']) arg1 = 3 arg2 = 5 task1 = task_factory1(arg1, arg2) self.assertEqual( task1.component_ref.spec.implementation.container.image, component_dict['implementation']['container']['image']) resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args[0], str(arg1)) self.assertEqual(resolved_cmd.args[1], str(arg2))
import kfp.components as comp from kfp.components._components import _resolve_command_line_and_paths my_op = kfp.components.load_component_from_file('tarsan_gen_images.yaml') print("**** LOADED COMPONENT:") print(my_op.component_spec) sample_count = 20 cmd_args = { "samples_count": sample_count, } cmd = _resolve_command_line_and_paths( component_spec = my_op.component_spec, arguments = cmd_args, ) print("\nIMAGE:\n") print(my_op.component_spec.implementation.container.image) print("\nCOMMAND\n") print(cmd.command) print("\nARGS\n") print(cmd.args) import docker docker_client = docker.from_env() print("\nCOMMAND:")
def _resolve_commands_and_args_v2( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], ) -> _components._ResolvedCommandLineAndPaths: """Resolves the command line argument placeholders for v2 (IR). Args: component_spec: The component spec object. arguments: The dictionary of component arguments. Returns: A named tuple: _components._ResolvedCommandLineAndPaths. """ inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_uri_placeholder(input_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputUriPlaceholder.'.format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key) def _input_artifact_path_placeholder(input_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputPathPlaceholder.'.format( input_key, inputs_dict[input_key].type)) elif is_compiling_for_v2 and input_key in importer_specs: raise TypeError( 'Input "{}" with type "{}" is not connected to any upstream output. ' 'However it is used with InputPathPlaceholder. ' 'If you want to import an existing artifact using a system-connected' ' importer node, use InputUriPlaceholder instead. ' 'Or if you just want to pass a string parameter, use string type and' ' InputValuePlaceholder instead.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].path}}}}".format( input_key) def _input_parameter_placeholder(input_key: str) -> str: if is_compiling_for_v2 and not type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with ' 'InputValuePlaceholder.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) def _output_artifact_uri_placeholder(output_key: str) -> str: if is_compiling_for_v2 and type_utils.is_parameter_type( outputs_dict[output_key].type): raise TypeError( 'Output "{}" with type "{}" cannot be paired with ' 'OutputUriPlaceholder.'.format( output_key, outputs_dict[output_key].type)) else: return "{{{{$.outputs.artifacts['{}'].uri}}}}".format( output_key) def _output_artifact_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key) def _output_parameter_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format( output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_path_placeholder(output_key) else: return _output_artifact_path_placeholder(output_key) resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, input_value_generator=_input_parameter_placeholder, input_uri_generator=_input_artifact_uri_placeholder, output_uri_generator=_output_artifact_uri_placeholder, input_path_generator=_input_artifact_path_placeholder, output_path_generator=_resolve_output_path_placeholder, ) return resolved_cmd
def create_container_op_from_component_and_arguments( component_spec: structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: Optional[structures.ComponentReference] = None, ) -> container_op.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: (not used in v2) Returns: A ContainerOp instance. """ pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() # Keep track of auto-injected importer spec. importer_spec = {} # Check types of the reference arguments and serialize PipelineParams arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, dsl.PipelineParam): input_type = component_spec._inputs_dict[input_name].type reference_type = argument_value.param_type types.verify_type_compatibility( reference_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if type_utils.is_parameter_type(input_type): if argument_value.op_name: pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.producer_task = ( argument_value.op_name) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( argument_value.name) else: pipeline_task_spec.inputs.parameters[ input_name].runtime_value.runtime_parameter = argument_value.name else: if argument_value.op_name: pipeline_task_spec.inputs.artifacts[input_name].producer_task = ( argument_value.op_name) pipeline_task_spec.inputs.artifacts[ input_name].output_artifact_key = ( argument_value.name) else: # argument_value.op_name could be none, in which case an importer node # will be inserted later. pipeline_task_spec.inputs.artifacts[input_name].producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_spec[input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, pipeline_param_name=argument_value.name) elif isinstance(argument_value, str): input_type = component_spec._inputs_dict[input_name].type if type_utils.is_parameter_type(input_type): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.string_value = ( argument_value) else: # An importer node with constant value artifact_uri will be inserted. pipeline_task_spec.inputs.artifacts[input_name].producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_spec[input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, constant_value=argument_value) elif isinstance(argument_value, int): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.int_value = argument_value elif isinstance(argument_value, float): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.double_value = argument_value elif isinstance(argument_value, dsl.ContainerOp): raise TypeError( 'ContainerOp object {} was passed to component as an input argument. ' 'Pass a single output instead.'.format(input_name)) else: raise NotImplementedError( 'Input argument supports only the following types: PipelineParam' ', str, int, float. Got: "{}".'.format(argument_value)) for output in component_spec.outputs or []: if type_utils.is_parameter_type(output.type): pipeline_task_spec.outputs.parameters[ output.name].type = type_utils.get_parameter_type(output.type) else: pipeline_task_spec.outputs.artifacts[ output.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(output.type)) inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_uri_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.' .format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key) def _input_artifact_path_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.' .format(input_key, inputs_dict[input_key].type)) elif input_key in importer_spec: raise TypeError( 'Input "{}" with type "{}" is not connected to any upstream output. ' 'However it is used with InputPathPlaceholder. ' 'If you want to import an existing artifact using a system-connected ' 'importer node, use InputUriPlaceholder instead. ' 'Or if you just want to pass a string parameter, use string type and ' 'InputValuePlaceholder instead.' .format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key) def _input_parameter_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) else: raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.' .format(input_key, inputs_dict[input_key].type)) def _output_artifact_uri_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): raise TypeError( 'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.' .format(output_key, outputs_dict[output_key].type)) else: return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key) def _output_artifact_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key) def _output_parameter_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_path_placeholder(output_key) else: return _output_artifact_path_placeholder(output_key) resolved_cmd = _resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, input_value_generator=_input_parameter_placeholder, input_uri_generator=_input_artifact_uri_placeholder, output_uri_generator=_output_artifact_uri_placeholder, input_path_generator=_input_artifact_path_placeholder, output_path_generator=_resolve_output_path_placeholder, ) container_spec = component_spec.implementation.container pipeline_container_spec = ( pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec()) pipeline_container_spec.image = container_spec.image pipeline_container_spec.command.extend(resolved_cmd.command) pipeline_container_spec.args.extend(resolved_cmd.args) output_uris_and_paths = resolved_cmd.output_uris.copy() output_uris_and_paths.update(resolved_cmd.output_paths) input_uris_and_paths = resolved_cmd.input_uris.copy() input_uris_and_paths.update(resolved_cmd.input_paths) old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True task = container_op.ContainerOp( name=component_spec.name or _default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=output_uris_and_paths, artifact_argument_paths=[ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in input_uris_and_paths.items() ], ) # task.name is unique at this point. pipeline_task_spec.task_info.name = task.name pipeline_task_spec.executor_label = task.name task.task_spec = pipeline_task_spec task.importer_spec = importer_spec task.container_spec = pipeline_container_spec dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta) # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. Outputs with non-pythonic names could not be accessed using # their original names. Now ContainerOp supports any output names, so we're # now using the original output names. However to support legacy pipelines, # we're also adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = generate_unique_name_conversion_table( output_names, _sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' return task
def create_container_op_from_component_and_arguments( component_spec: structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: structures.ComponentReference = None, ) -> dsl.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: The component reference. Optional. Returns: A ContainerOp instance. """ pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() pipeline_task_spec.task_info.name = component_spec.name # might need to append suffix to exuector_label to ensure its uniqueness? pipeline_task_spec.executor_label = component_spec.name # Check types of the reference arguments and serialize PipelineParams arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, dsl.PipelineParam): input_type = component_spec._inputs_dict[input_name].type reference_type = argument_value.param_type types.verify_type_compatibility( reference_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if type_utils.is_artifact_type(input_type): # argument_value.op_name could be none, in which case an importer node # will be inserted later. Use output_artifact_key to preserve the name # of pipeline parameter which is needed by importer. pipeline_task_spec.inputs.artifacts[ input_name].producer_task = (argument_value.op_name or '') pipeline_task_spec.inputs.artifacts[ input_name].output_artifact_key = (argument_value.name) elif type_utils.is_parameter_type(input_type): if argument_value.op_name: pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.producer_task = ( argument_value.op_name) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( argument_value.name) else: pipeline_task_spec.inputs.parameters[ input_name].runtime_value.runtime_parameter = argument_value.name else: raise NotImplementedError( 'Unsupported input type: "{}". The type must be one of the following: {}.' .format(input_type, type_utils.all_types())) elif isinstance(argument_value, str): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.string_value = argument_value elif isinstance(argument_value, int): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.int_value = argument_value elif isinstance(argument_value, float): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.double_value = argument_value elif isinstance(argument_value, dsl.ContainerOp): raise TypeError( 'ContainerOp object {} was passed to component as an input argument. ' 'Pass a single output instead.'.format(input_name)) else: raise NotImplementedError( 'Input argument supports only the following types: PipelineParam' ', str, int, float. Got: "{}".'.format(argument_value)) for output in component_spec.outputs or []: if type_utils.is_artifact_type(output.type): pipeline_task_spec.outputs.artifacts[ output.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(output.type)) elif type_utils.is_parameter_type(output.type): pipeline_task_spec.outputs.parameters[ output.name].type = type_utils.get_parameter_type(output.type) else: raise NotImplementedError( 'Unsupported output type: "{}". The type must be one of the following: {}.' .format(output.type, type_utils.all_types())) outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_placeholder(input_key: str) -> str: return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key) def _input_parameter_placeholder(input_key: str) -> str: return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) def _output_artifact_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key) def _output_parameter_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format( output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_placeholder(output_key) else: return _output_artifact_placeholder(output_key) # IR placeholders are decided merely based on the declared type of the input. # It doesn't matter wether it's InputValuePlaceholder or InputPathPlaceholder # from component_spec. placeholder_arguments = { input_spec.name: _input_artifact_placeholder(input_spec.name) if type_utils.is_artifact_type(input_spec.type) else _input_parameter_placeholder(input_spec.name) for input_spec in component_spec.inputs or [] } resolved_cmd_ir = _resolve_command_line_and_paths( component_spec=component_spec, arguments=placeholder_arguments, input_path_generator=_input_artifact_placeholder, output_path_generator=_resolve_output_path_placeholder, ) resolved_cmd = _resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, ) container_spec = component_spec.implementation.container pipeline_container_spec = ( pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec()) pipeline_container_spec.image = container_spec.image pipeline_container_spec.command.extend(resolved_cmd_ir.command) pipeline_container_spec.args.extend(resolved_cmd_ir.args) old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True task = dsl.ContainerOp( name=component_spec.name or _default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=resolved_cmd.output_paths, artifact_argument_paths=[ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in resolved_cmd.input_paths.items() ], ) task.task_spec = pipeline_task_spec task.container_spec = pipeline_container_spec dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta) component_ref_without_spec = copy.copy(component_ref) component_ref_without_spec.spec = None task._component_ref = component_ref_without_spec # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. Outputs with non-pythonic names could not be accessed using # their original names. Now ContainerOp supports any output names, so we're # now using the original output names. However to support legacy pipelines, # we're also adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = generate_unique_name_conversion_table( output_names, _sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' return task
def _create_container_op_from_component_and_arguments( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: Optional[_structures.ComponentReference] = None, ) -> _container_op.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: (only for v1) The component references. Returns: A ContainerOp instance. """ # Add component inputs with default value to the arguments dict if they are not # in the arguments dict already. arguments = arguments.copy() for input_spec in component_spec.inputs or []: if input_spec.name not in arguments and input_spec.default is not None: default_value = input_spec.default if input_spec.type == 'Integer': default_value = int(default_value) elif input_spec.type == 'Float': default_value = float(default_value) arguments[input_spec.name] = default_value # Check types of the reference arguments and serialize PipelineParams original_arguments = arguments arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, _pipeline_param.PipelineParam): input_type = component_spec._inputs_dict[input_name].type argument_type = argument_value.param_type types.verify_type_compatibility( argument_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if isinstance(argument_value, _container_op.ContainerOp): raise TypeError( 'ContainerOp object was passed to component as an input argument. ' 'Pass a single output instead.') placeholder_resolver = ExtraPlaceholderResolver() resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, placeholder_resolver=placeholder_resolver.resolve_placeholder, ) container_spec = component_spec.implementation.container old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True output_paths = collections.OrderedDict(resolved_cmd.output_paths or {}) output_paths.update(placeholder_resolver.output_paths) input_paths = collections.OrderedDict(resolved_cmd.input_paths or {}) input_paths.update(placeholder_resolver.input_paths) artifact_argument_paths = [ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in input_paths.items() ] task = _container_op.ContainerOp( name=component_spec.name or _components._default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=output_paths, artifact_argument_paths=artifact_argument_paths, ) _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta) if component_ref: component_ref_without_spec = copy.copy(component_ref) component_ref_without_spec.spec = None task._component_ref = component_ref_without_spec task._parameter_arguments = resolved_cmd.inputs_consumed_by_value # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. # Outputs with non-pythonic names could not be accessed using their original # names. Now ContainerOp supports any output names, so we're now using the # original output names. However to support legacy pipelines, we're also # adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = _naming.generate_unique_name_conversion_table( output_names, _naming._sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if container_spec.env: from kubernetes import client as k8s_client for name, value in container_spec.env.items(): task.container.add_env_variable( k8s_client.V1EnvVar(name=name, value=value)) if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' _attach_v2_specs(task, component_spec, original_arguments) return task
def _resolve_commands_and_args_v2( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], ) -> _components._ResolvedCommandLineAndPaths: """Resolves the command line argument placeholders for v2 (IR). Args: component_spec: The component spec object. arguments: The dictionary of component arguments. Returns: A named tuple: _components._ResolvedCommandLineAndPaths. """ inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_uri_placeholder(input_key: str) -> str: if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError('Input "{}" with type "{}" cannot be paired with ' 'InputUriPlaceholder.'.format( input_key, inputs_dict[input_key].type)) else: return _generate_input_uri_placeholder(input_key) def _input_artifact_path_placeholder(input_key: str) -> str: if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError('Input "{}" with type "{}" cannot be paired with ' 'InputPathPlaceholder.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key) def _input_parameter_placeholder(input_key: str) -> str: if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type( inputs_dict[input_key].type): raise TypeError('Input "{}" with type "{}" cannot be paired with ' 'InputValuePlaceholder.'.format( input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) def _output_artifact_uri_placeholder(output_key: str) -> str: if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type( outputs_dict[output_key].type): raise TypeError('Output "{}" with type "{}" cannot be paired with ' 'OutputUriPlaceholder.'.format( output_key, outputs_dict[output_key].type)) else: return _generate_output_uri_placeholder(output_key) def _output_artifact_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key) def _output_parameter_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_path_placeholder(output_key) else: return _output_artifact_path_placeholder(output_key) placeholder_resolver = ExtraPlaceholderResolver() def _resolve_ir_placeholders_v2( arg, component_spec: _structures.ComponentSpec, arguments: dict, ) -> str: inputs_dict = {input_spec.name: input_spec for input_spec in component_spec.inputs or []} if isinstance(arg, _structures.InputValuePlaceholder): input_name = arg.input_name input_value = arguments.get(input_name, None) if input_value is not None: return _input_parameter_placeholder(input_name) else: input_spec = inputs_dict[input_name] if input_spec.optional: return None else: raise ValueError('No value provided for input {}'.format(input_name)) elif isinstance(arg, _structures.InputUriPlaceholder): input_name = arg.input_name if input_name in arguments: input_uri = _input_artifact_uri_placeholder(input_name) return input_uri else: input_spec = inputs_dict[input_name] if input_spec.optional: return None else: raise ValueError('No value provided for input {}'.format(input_name)) elif isinstance(arg, _structures.OutputUriPlaceholder): output_name = arg.output_name output_uri = _output_artifact_uri_placeholder(output_name) return output_uri return placeholder_resolver.resolve_placeholder( arg=arg, component_spec=component_spec, arguments=arguments, ) resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, input_path_generator=_input_artifact_path_placeholder, output_path_generator=_resolve_output_path_placeholder, placeholder_resolver=_resolve_ir_placeholders_v2, ) return resolved_cmd