def test_decorator_metadata(self): """Test @pipeline decorator with metadata.""" @pipeline(name='p1', description='description1') def my_pipeline1(a: {'Schema': { 'file_type': 'csv' }} = 'good', b: Integer() = 12): pass golden_meta = ComponentSpec(name='p1', description='description1', inputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'Schema': { 'file_type': 'csv' }}, default='good', optional=True)) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) pipeline_meta = _extract_pipeline_metadata(my_pipeline1) self.assertEqual(pipeline_meta, golden_meta)
def test_component_metadata_standard_type_annotation(self): """Test component decorator metadata.""" class MockContainerOp: def _set_metadata(self, component_meta): self._metadata = component_meta @component def componentA(a: float, b: List[int], c: Optional[str] = None) -> None: return MockContainerOp() containerOp = componentA('str_value', '[1,2,3]') golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=None) golden_meta.inputs.append(InputSpec(name='a', type='Float')) golden_meta.inputs.append( InputSpec( name='b', type='typing.List[int]' if sys.version_info >= (3, 7) else 'List')) golden_meta.inputs.append( InputSpec(name='c', type='String', default=None, optional=True)) self.assertEqual(containerOp._metadata, golden_meta)
def test_component_metadata(self): """Test component decorator metadata.""" class MockContainerOp: def _set_metadata(self, component_meta): self._metadata = component_meta @component def componentA( a: {'ArtifactA': { 'file_type': 'csv' }}, b: Integer() = 12, c: {'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }} = 'gs://hello/world' ) -> { 'model': Integer() }: return MockContainerOp() containerOp = componentA(1, 2, c=3) golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'ArtifactA': { 'file_type': 'csv' }})) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) golden_meta.inputs.append( InputSpec( name='c', type={'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }}, default='gs://hello/world', optional=True)) golden_meta.outputs.append( OutputSpec(name='model', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } })) self.assertEqual(containerOp._metadata, golden_meta)
def _create_io_from_component_spec( spec: Type[SageMakerComponentSpec]) -> IOArgs: """Parses the set of inputs and outputs from a component spec into the YAML spec form. Args: spec: A component specification definition. Returns: IOArgs: The IO arguments object filled with the fields from the component spec definition. """ inputs = [] outputs = [] args = [] # Iterate through all inputs adding them to the argument list for key, _input in spec.INPUTS.__dict__.items(): # We know all of these values are validators as we have validated the spec input_validator: SageMakerComponentInputValidator = cast( SageMakerComponentInputValidator, _input) # Map from argsparser to KFP component input_spec = InputSpec( name=key, description=input_validator.description, type=SageMakerComponentCompiler.KFP_TYPE_FROM_ARGS.get( input_validator.input_type, "String"), ) # Add optional fields if input_validator.default is not None: input_spec.__dict__["default"] = str(input_validator.default) elif not input_validator.required: # If not required and has no default, add empty string input_spec.__dict__["default"] = "" inputs.append(input_spec) # Add arguments to input list args.append(f"--{key}") args.append(InputValuePlaceholder(input_name=key)) for key, _output in spec.OUTPUTS.__dict__.items(): output_validator: SageMakerComponentOutputValidator = cast( SageMakerComponentOutputValidator, _output) outputs.append( OutputSpec(name=key, description=output_validator.description)) # Add arguments to input list args.append( f"--{key}{SageMakerComponentSpec.OUTPUT_ARGUMENT_SUFFIX}") args.append(OutputPathPlaceholder(output_name=key)) return IOArgs(inputs=inputs, outputs=outputs, args=args)
def test_handle_constructing_graph_component(self): task1 = TaskSpec(component_ref=ComponentReference(name='comp 1'), arguments={'in1 1': 11}) task2 = TaskSpec(component_ref=ComponentReference(name='comp 2'), arguments={ 'in2 1': 21, 'in2 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 1') }) task3 = TaskSpec( component_ref=ComponentReference(name='comp 3'), arguments={ 'in3 1': TaskOutputArgument.construct(task_id='task 2', output_name='out2 1'), 'in3 2': GraphInputReference(input_name='graph in 1').as_argument() }) graph_component1 = ComponentSpec( inputs=[ InputSpec(name='graph in 1'), InputSpec(name='graph in 2'), ], outputs=[ OutputSpec(name='graph out 1'), OutputSpec(name='graph out 2'), ], implementation=GraphImplementation(graph=GraphSpec( tasks={ 'task 1': task1, 'task 2': task2, 'task 3': task3, }, output_values={ 'graph out 1': TaskOutputArgument.construct(task_id='task 3', output_name='out3 1'), 'graph out 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 2'), })))
def _create_workflow(self, pipeline_func: Callable, pipeline_name: Text=None, pipeline_description: Text=None, params_list: List[dsl.PipelineParam]=None, pipeline_conf: dsl.PipelineConf = None, ) -> List[Dict[Text, Any]]: # Tekton change, signature """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.') args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance(default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec( name=param.name, type=param.param_type, default=param.value) for param in params_list] op_transformers = [add_pod_env] op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) import json pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0] # Tekton change pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) return workflow
def component_yaml_generator(**kwargs): input_specs = [] input_args = [] input_kwargs = {} serialized_args = {INIT_KEY: {}, METHOD_KEY: {}} init_kwargs = {} method_kwargs = {} for key, value in kwargs.items(): if key in init_arg_names: prefix_key = INIT_KEY init_kwargs[key] = value signature = init_signature else: prefix_key = METHOD_KEY method_kwargs[key] = value signature = method_signature # no need to add this argument because it's optional # this param is validated against the signature because # of init_kwargs, method_kwargs if value is None: continue param_type = signature.parameters[key].annotation param_type = resolve_annotation(param_type) serializer = get_serializer(param_type) if serializer: param_type = str value = serializer(value) # TODO remove PipelineParam check when Metadata Importer component available # if we serialize we need to include the argument as input # perhaps, another option is to embed in yaml as json serialized list component_param_name = component_param_name_to_mb_sdk_param_name.get( key, key ) if isinstance(value, kfp.dsl._pipeline_param.PipelineParam) or serializer: if is_mb_sdk_resource_noun_type(param_type): metadata_type = map_resource_to_metadata_type(param_type)[1] component_param_type = metadata_type else: component_param_type = 'String' input_specs.append( InputSpec( name=key, type=component_param_type, ) ) input_args.append(f'--{prefix_key}.{component_param_name}') if is_mb_sdk_resource_noun_type(param_type): input_args.append(InputUriPlaceholder(input_name=key)) else: input_args.append(InputValuePlaceholder(input_name=key)) input_kwargs[key] = value else: # Serialized arguments must always be strings value = str(value) serialized_args[prefix_key][component_param_name] = value # validate parameters if should_serialize_init: init_signature.bind(**init_kwargs) method_signature.bind(**method_kwargs) component_spec = ComponentSpec( name=f'{cls_name}-{method_name}', inputs=input_specs, outputs=output_specs, implementation=ContainerImplementation( container=ContainerSpec( image=DEFAULT_CONTAINER_IMAGE, command=[ 'python3', '-m', 'google_cloud_pipeline_components.aiplatform.remote_runner', '--cls_name', cls_name, '--method_name', method_name, ], args=make_args(serialized_args) + output_args + input_args, ) ) ) component_path = tempfile.mktemp() component_spec.save(component_path) return components.load_component_from_file(component_path)( **input_kwargs )
class ComponentCompilerTestCase(unittest.TestCase): # These should always match the dummy spec DUMMY_IO_ARGS = IOArgs( inputs=[ InputSpec( name="input1", description="The first input.", type="String", default="input1-default", ), InputSpec(name="input2", description="The second input.", type="Integer"), ], outputs=[ OutputSpec(name="output1", description="The first output."), OutputSpec(name="output2", description="The second output."), ], args=[ "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], ) DUMMY_COMPONENT_SPEC = ComponentSpec( name="Dummy component", description="Dummy description", inputs=DUMMY_IO_ARGS.inputs, outputs=DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) EXTRA_IO_ARGS = IOArgs( inputs=[ InputSpec(name="inputStr", description="str", type="String"), InputSpec(name="inputInt", description="int", type="Integer"), InputSpec(name="inputBool", description="bool", type="Bool"), InputSpec(name="inputDict", description="dict", type="JsonObject"), InputSpec(name="inputList", description="list", type="JsonArray"), InputSpec( name="inputOptional", description="optional", type="String", default="default-string", ), InputSpec( name="inputOptionalNoDefault", description="optional", type="String", default="", ), ], outputs=[], args=[ "--inputStr", InputValuePlaceholder(input_name="inputStr"), "--inputInt", InputValuePlaceholder(input_name="inputInt"), "--inputBool", InputValuePlaceholder(input_name="inputBool"), "--inputDict", InputValuePlaceholder(input_name="inputDict"), "--inputList", InputValuePlaceholder(input_name="inputList"), "--inputOptional", InputValuePlaceholder(input_name="inputOptional"), "--inputOptionalNoDefault", InputValuePlaceholder(input_name="inputOptionalNoDefault"), ], ) @classmethod def setUpClass(cls): cls.compiler = SageMakerComponentCompiler() def test_create_io_from_component_spec(self): response = SageMakerComponentCompiler._create_io_from_component_spec( DummySpec) # type: ignore self.assertEqual(self.DUMMY_IO_ARGS, response) def test_create_io_from_component_spec_extra_types(self): response = SageMakerComponentCompiler._create_io_from_component_spec( ExtraSpec) # type: ignore self.assertEqual(self.EXTRA_IO_ARGS, response) def test_create_component_spec_composes_correctly(self): image_uri = "my-image" image_tag = "my-tag" file_path = "fake-path" expected = ComponentSpec( name="Dummy component", description="Dummy description", inputs=self.DUMMY_IO_ARGS.inputs, outputs=self.DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) with patch( "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec", MagicMock(return_value=self.DUMMY_IO_ARGS), ): response = SageMakerComponentCompiler._create_component_spec( DummyComponent, file_path, image_uri, image_tag) self.assertEqual(expected, response) def test_write_component(self): DummyComponent.save = MagicMock() SageMakerComponentCompiler._write_component(DummyComponent, "/tmp/fake-path") DummyComponent.save.assert_called_once_with("/tmp/fake-path")
def test_extract_component_interface(self): from typing import NamedTuple def my_func( # noqa: F722 required_param, int_param: int = 42, float_param: float = 3.14, str_param: str = 'string', bool_param: bool = True, none_param=None, custom_type_param: 'Custom type' = None, custom_struct_type_param: { 'CustomType': { 'param1': 'value1', 'param2': 'value2' } } = None, ) -> NamedTuple( 'DummyName', [ #('required_param',), # All typing.NamedTuple fields must have types ('int_param', int), ('float_param', float), ('str_param', str), ('bool_param', bool), #('custom_type_param', 'Custom type'), #SyntaxError: Forward reference must be an expression -- got 'Custom type' ('custom_type_param', 'CustomType'), #('custom_struct_type_param', {'CustomType': {'param1': 'value1', 'param2': 'value2'}}), # TypeError: NamedTuple('Name', [(f0, t0), (f1, t1), ...]); each t must be a type Got {'CustomType': {'param1': 'value1', 'param2': 'value2'}} ]): '''Function docstring''' pass component_spec = comp._python_op._extract_component_interface(my_func) from kfp.components.structures import InputSpec, OutputSpec self.assertEqual( component_spec.inputs, [ InputSpec(name='required_param'), InputSpec(name='int_param', type='Integer', default='42', optional=True), InputSpec(name='float_param', type='Float', default='3.14', optional=True), InputSpec(name='str_param', type='String', default='string', optional=True), InputSpec(name='bool_param', type='Boolean', default='True', optional=True), InputSpec(name='none_param', optional=True), # No default='None' InputSpec(name='custom_type_param', type='Custom type', optional=True), InputSpec(name='custom_struct_type_param', type={ 'CustomType': { 'param1': 'value1', 'param2': 'value2' } }, optional=True), ]) self.assertEqual( component_spec.outputs, [ OutputSpec(name='int_param', type='Integer'), OutputSpec(name='float_param', type='Float'), OutputSpec(name='str_param', type='String'), OutputSpec(name='bool_param', type='Boolean'), #OutputSpec(name='custom_type_param', type='Custom type', default='None'), OutputSpec(name='custom_type_param', type='CustomType'), #OutputSpec(name='custom_struct_type_param', type={'CustomType': {'param1': 'value1', 'param2': 'value2'}}, optional=True), ]) self.maxDiff = None self.assertDictEqual( component_spec.to_dict(), { 'name': 'My func', 'description': 'Function docstring', 'inputs': [ { 'name': 'required_param' }, { 'name': 'int_param', 'type': 'Integer', 'default': '42', 'optional': True }, { 'name': 'float_param', 'type': 'Float', 'default': '3.14', 'optional': True }, { 'name': 'str_param', 'type': 'String', 'default': 'string', 'optional': True }, { 'name': 'bool_param', 'type': 'Boolean', 'default': 'True', 'optional': True }, { 'name': 'none_param', 'optional': True }, # No default='None' { 'name': 'custom_type_param', 'type': 'Custom type', 'optional': True }, { 'name': 'custom_struct_type_param', 'type': { 'CustomType': { 'param1': 'value1', 'param2': 'value2' } }, 'optional': True }, ], 'outputs': [ { 'name': 'int_param', 'type': 'Integer' }, { 'name': 'float_param', 'type': 'Float' }, { 'name': 'str_param', 'type': 'String' }, { 'name': 'bool_param', 'type': 'Boolean' }, { 'name': 'custom_type_param', 'type': 'CustomType' }, #{'name': 'custom_struct_type_param', 'type': {'CustomType': {'param1': 'value1', 'param2': 'value2'}}, 'optional': True}, ] })
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Text = None, pipeline_description: Text = None, params_list: List[dsl.PipelineParam] = None, pipeline_conf: dsl.PipelineConf = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError( 'Either specify pipeline params in the pipeline function, or in "params_list", but not both.' ) args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Configuration passed to the compiler is overriding. Unfortunately, it is # not trivial to detect whether the dsl_pipeline.conf was ever modified. pipeline_conf = pipeline_conf or dsl_pipeline.conf self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args ] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance( default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec(name=param.name, type=param.param_type, default=param.value) for param in params_list ] op_transformers = [add_pod_env] # # By default adds telemetry instruments. Users can opt out toggling # # allow_telemetry. # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4. # if allow_telemetry: # pod_labels = get_default_telemetry_labels() # op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) workflow = fix_big_data_passing(workflow) workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \ json.dumps(pipeline_meta.to_dict(), sort_keys=True) # recursively strip empty structures, DANGER: this may remove necessary empty elements ?! def remove_empty_elements(obj) -> dict: if not isinstance(obj, (dict, list)): return obj if isinstance(obj, list): return [remove_empty_elements(o) for o in obj if o != []] return { k: remove_empty_elements(v) for k, v in obj.items() if v != [] } workflow = remove_empty_elements(workflow) return workflow
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Optional[Text] = None, pipeline_description: Optional[Text] = None, params_list: Optional[List[dsl.PipelineParam]] = None, pipeline_conf: Optional[dsl.PipelineConf] = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = OrderedDict() if self._pipeline_root_param: params_list.append(self._pipeline_root_param) if self._pipeline_name_param: params_list.append(self._pipeline_name_param) for param in params_list: default_param_values[param.name] = param.value param.value = None args_list = [] kwargs_dict = dict() signature = inspect.signature(pipeline_func) for arg_name, arg in signature.parameters.items(): arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type) if arg.kind == inspect.Parameter.KEYWORD_ONLY: kwargs_dict[arg_name] = param else: args_list.append(param) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list, **kwargs_dict) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values by merging two param lists. args_list_with_defaults = OrderedDict() if pipeline_meta.inputs: args_list_with_defaults = OrderedDict([ (sanitize_k8s_name(input_spec.name, True), input_spec.default) for input_spec in pipeline_meta.inputs ]) if params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for k, v in default_param_values.items(): args_list_with_defaults[k] = v pipeline_meta.inputs = pipeline_meta.inputs or [] for param in params_list: pipeline_meta.inputs.append( InputSpec( name=param.name, type=param.param_type, default=default_param_values[param.name])) op_transformers = [add_pod_env] pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT} op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs # if they don't exist already. for op in dsl_pipeline.ops.values(): insert_pipeline_name_param = True insert_pipeline_root_param = True for param in op.inputs: if param.name == self._pipeline_name_param.name: insert_pipeline_name_param = False elif param.name == self._pipeline_root_param.name: insert_pipeline_root_param = False if insert_pipeline_name_param: op.inputs.append(self._pipeline_name_param) if insert_pipeline_root_param: op.inputs.append(self._pipeline_root_param) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) workflow = _data_passing_rewriter.add_pod_name_passing( workflow, str(self._pipeline_root_param or None)) if pipeline_conf and pipeline_conf.data_passing_method != None: workflow = pipeline_conf.data_passing_method(workflow) metadata = workflow.setdefault('metadata', {}) annotations = metadata.setdefault('annotations', {}) labels = metadata.setdefault('labels', {}) annotations[_SDK_VERSION_LABEL] = kfp.__version__ annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat() annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: annotations['pipelines.kubeflow.org/v2_pipeline'] = "true" labels['pipelines.kubeflow.org/v2_pipeline'] = "true" # Labels might be logged better than annotations so adding some information here as well labels[_SDK_VERSION_LABEL] = kfp.__version__ return workflow
def test_to_dict(self): component_meta = ComponentSpec( name='foobar', description='foobar example', inputs=[ InputSpec(name='input1', description='input1 desc', type={ 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, default='default1'), InputSpec(name='input2', description='input2 desc', type={ 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, default='default2'), InputSpec(name='input3', description='input3 desc', type='Integer', default='default3'), ], outputs=[ OutputSpec( name='output1', description='output1 desc', type={'Schema': { 'file_type': 'tsv' }}, ) ]) golden_meta = { 'name': 'foobar', 'description': 'foobar example', 'inputs': [{ 'name': 'input1', 'description': 'input1 desc', 'type': { 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, 'default': 'default1' }, { 'name': 'input2', 'description': 'input2 desc', 'type': { 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, 'default': 'default2' }, { 'name': 'input3', 'description': 'input3 desc', 'type': 'Integer', 'default': 'default3' }], 'outputs': [{ 'name': 'output1', 'description': 'output1 desc', 'type': { 'Schema': { 'file_type': 'tsv' } }, }] } self.assertEqual(component_meta.to_dict(), golden_meta)