def test_pop_input_from_component_spec(self): component_spec = pipeline_spec_pb2.ComponentSpec( executor_label='exec-component1') component_spec.input_definitions.artifacts[ 'input1'].artifact_type.schema_title = 'system.Dataset' component_spec.input_definitions.parameters[ 'input2'].type = pipeline_spec_pb2.PrimitiveType.STRING component_spec.input_definitions.parameters[ 'input3'].type = pipeline_spec_pb2.PrimitiveType.DOUBLE # pop an artifact, and there're other inputs left dsl_component_spec.pop_input_from_component_spec( component_spec, 'input1') expected_dict = { 'inputDefinitions': { 'parameters': { 'input2': { 'type': 'STRING' }, 'input3': { 'type': 'DOUBLE' } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop an parameter, and there're other inputs left dsl_component_spec.pop_input_from_component_spec( component_spec, 'input2') expected_dict = { 'inputDefinitions': { 'parameters': { 'input3': { 'type': 'DOUBLE' } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop the last input, expect no inputDefinitions dsl_component_spec.pop_input_from_component_spec( component_spec, 'input3') expected_dict = {'executorLabel': 'exec-component1'} expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop an input that doesn't exist, expect no-op. dsl_component_spec.pop_input_from_component_spec( component_spec, 'input4') self.assertEqual(expected_spec, component_spec)
def testBuildLatestBlessedModelStrategySucceed(self): latest_blessed_resolver = resolver.Resolver( strategy_class=latest_blessed_model_strategy. LatestBlessedModelStrategy, model=channel.Channel(type=standard_artifacts.Model), model_blessing=channel.Channel( type=standard_artifacts.ModelBlessing)).with_id('my_resolver2') test_pipeline_info = data_types.PipelineInfo( pipeline_name='test-pipeline', pipeline_root='gs://path/to/my/root') deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=latest_blessed_resolver, deployment_config=deployment_config, pipeline_info=test_pipeline_info, component_defs=component_defs) actual_step_specs = my_builder.build() model_blessing_resolver_id = 'my_resolver2-model-blessing-resolver' model_resolver_id = 'my_resolver2-model-resolver' self.assertSameElements( actual_step_specs.keys(), [model_blessing_resolver_id, model_resolver_id]) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_blessed_model_resolver_component_1.pbtxt', pipeline_pb2.ComponentSpec()), component_defs[model_blessing_resolver_id]) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_blessed_model_resolver_task_1.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_specs[model_blessing_resolver_id]) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_blessed_model_resolver_component_2.pbtxt', pipeline_pb2.ComponentSpec()), component_defs[model_resolver_id]) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_blessed_model_resolver_task_2.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_specs[model_resolver_id]) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_blessed_model_resolver_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def test_build_importer_component_spec(self): expected_importer_component = { 'inputDefinitions': { 'parameters': { 'uri': { 'type': 'STRING' } } }, 'outputDefinitions': { 'artifacts': { 'artifact': { 'artifactType': { 'schemaTitle': 'system.Artifact' } } } }, 'executorLabel': 'exec-importer-1' } expected_importer_comp_spec = pb.ComponentSpec() json_format.ParseDict(expected_importer_component, expected_importer_comp_spec) importer_comp_spec = importer_node._build_importer_component_spec( importer_base_name='importer-1', artifact_type_schema=pb.ArtifactTypeSchema( schema_title='system.Artifact')) self.maxDiff = None self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
def _populate_metrics_in_dag_outputs( self, ops: List[dsl.ContainerOp], op_to_parent_groups: Dict[str, List[str]], pipeline_spec: pipeline_spec_pb2.PipelineSpec, ) -> None: """Populates metrics artifacts in dag outputs. Args: ops: The list of ops that may produce metrics outputs. op_to_parent_groups: The dict of op name to parent groups. Key is the op's name. Value is a list of ancestor groups including the op itself. The list of a given op is sorted in a way that the farthest group is the first and the op itself is the last. pipeline_spec: The pipeline_spec to update in-place. """ for op in ops: op_task_spec = getattr(op, 'task_spec', pipeline_spec_pb2.PipelineTaskSpec()) op_component_spec = getattr(op, 'component_spec', pipeline_spec_pb2.ComponentSpec()) # Get the tuple of (component_name, task_name) of all its parent groups. parent_components_and_tasks = [('_root', '')] # skip the op itself and the root group which cannot be retrived via name. for group_name in op_to_parent_groups[op.name][1:-1]: parent_components_and_tasks.append( (dsl_utils.sanitize_component_name(group_name), dsl_utils.sanitize_task_name(group_name))) # Reverse the order to make the farthest group in the end. parent_components_and_tasks.reverse() for output_name, artifact_spec in \ op_component_spec.output_definitions.artifacts.items(): if artifact_spec.artifact_type.WhichOneof( 'kind' ) == 'schema_title' and artifact_spec.artifact_type.schema_title in [ io_types.Metrics.TYPE_NAME, io_types.ClassificationMetrics.TYPE_NAME, ]: unique_output_name = '{}-{}'.format(op_task_spec.task_info.name, output_name) sub_task_name = op_task_spec.task_info.name sub_task_output = output_name for component_name, task_name in parent_components_and_tasks: group_component_spec = ( pipeline_spec.root if component_name == '_root' else pipeline_spec.components[component_name]) group_component_spec.output_definitions.artifacts[ unique_output_name].CopyFrom(artifact_spec) group_component_spec.dag.outputs.artifacts[ unique_output_name].artifact_selectors.append( pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec( producer_subtask=sub_task_name, output_artifact_key=sub_task_output, )) sub_task_name = task_name sub_task_output = unique_output_name
def test_build_importer_component_spec(self): expected_importer_component = { 'inputDefinitions': { 'parameters': { 'input1': { 'type': 'STRING' } } }, 'outputDefinitions': { 'artifacts': { 'result': { 'artifactType': { 'instanceSchema': 'title: kfp.Artifact' } } } }, 'executorLabel': 'exec-importer-task0-input1' } expected_importer_comp_spec = pb.ComponentSpec() json_format.ParseDict(expected_importer_component, expected_importer_comp_spec) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name='importer-task0-input1', input_name='input1', input_type_schema='title: kfp.Artifact') self.maxDiff = None self.assertEqual(expected_importer_comp_spec, importer_comp_spec)
def testBuildFileBasedExampleGen(self): example_gen = components.CsvExampleGen( input_base='path/to/data/root').with_beam_pipeline_args( ['--runner=DataflowRunner']) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=example_gen, image='gcr.io/tensorflow/tfx:latest', image_cmds=_TEST_CMDS, deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_csv_example_gen_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_csv_example_gen_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_csv_example_gen_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildImporterWithRuntimeParam(self): param = data_types.RuntimeParameter(name='runtime_flag', ptype=str) impt = importer.Importer( source_uri=param, artifact_type=standard_artifacts.Examples).with_id('my_importer') deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} with parameter_utils.ParameterContext() as pc: my_builder = step_builder.StepBuilder( node=impt, deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_component_with_runtime_param.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_task_with_runtime_param.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_executor_with_runtime_param.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config) self.assertListEqual([param], pc.parameters)
def build_root_spec_from_pipeline_params( pipeline_params: List[dsl.PipelineParam], ) -> pipeline_spec_pb2.ComponentSpec: """Builds the root component spec instance from pipeline params. This is useful when building the component spec for a pipeline (aka piipeline root). Such a component spec doesn't need output_definitions, and its implementation field will be filled in later. Args: pipeline_params: The list of pipeline params. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() for param in pipeline_params or []: if type_utils.is_parameter_type(param.param_type): result.input_definitions.parameters[ param.name].type = type_utils.get_parameter_type( param.param_type) else: result.input_definitions.artifacts[ param.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(param.param_type)) return result
def testBuildExitHandler(self): task = test_utils.dummy_producer_component( param1=decorators.FinalStatusStr('value1'), ) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=task, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs, is_exit_handler=True) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_exit_handler_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_exit_handler_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_exit_handler_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildLatestArtifactResolverSucceed(self): latest_model_resolver = resolver.Resolver( strategy_class=latest_artifact_strategy.LatestArtifactStrategy, model=channel.Channel(type=standard_artifacts.Model), examples=channel.Channel( type=standard_artifacts.Examples)).with_id('my_resolver') deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} test_pipeline_info = data_types.PipelineInfo( pipeline_name='test-pipeline', pipeline_root='gs://path/to/my/root') my_builder = step_builder.StepBuilder( node=latest_model_resolver, deployment_config=deployment_config, pipeline_info=test_pipeline_info, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_artifact_resolver_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_artifact_resolver_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_latest_artifact_resolver_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildTask(self): query = 'SELECT * FROM TABLE' bq_example_gen = big_query_example_gen_component.BigQueryExampleGen( query=query) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=bq_example_gen, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs, enable_cache=True) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_bq_example_gen_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_bq_example_gen_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_bq_example_gen_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def build_importer_component_spec( importer_base_name: str, input_name: str, input_type_schema: str, ) -> pipeline_spec_pb2.ComponentSpec: """Builds an importer component spec. Args: importer_base_name: The base name of the importer node. dependent_task: The task requires importer node. input_name: The name of the input artifact needs to be imported. input_type_schema: The type of the input artifact. Returns: An importer node component spec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = dsl_utils.sanitize_executor_label( importer_base_name) result.input_definitions.parameters[ input_name].type = pipeline_spec_pb2.PrimitiveType.STRING result.output_definitions.artifacts[ OUTPUT_KEY].artifact_type.instance_schema = input_type_schema return result
def build_component_spec_from_structure( component_spec: structures.ComponentSpec, ) -> pipeline_spec_pb2.ComponentSpec: """Builds an IR ComponentSpec instance from structures.ComponentSpec. Args: component_spec: The structure component spec. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = dsl_utils.sanitize_executor_label(component_spec.name) for input_spec in component_spec.inputs or []: if type_utils.is_parameter_type(input_spec.type): result.input_definitions.parameters[ input_spec.name].type = type_utils.get_parameter_type(input_spec.type) else: result.input_definitions.artifacts[ input_spec.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(input_spec.type)) for output_spec in component_spec.outputs or []: if type_utils.is_parameter_type(output_spec.type): result.output_definitions.parameters[ output_spec.name].type = type_utils.get_parameter_type( output_spec.type) else: result.output_definitions.artifacts[ output_spec.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(output_spec.type)) return result
def testBuildContainerTask(self): task = test_utils.DummyProducerComponent( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value1', ) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=task, image= 'gcr.io/tensorflow/tfx:latest', # Note this has no effect here. deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_container_spec_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildImporter(self): impt = importer.Importer( source_uri='m/y/u/r/i', properties={ 'split_names': '["train", "eval"]', }, custom_properties={ 'str_custom_property': 'abc', 'int_custom_property': 123, }, artifact_type=standard_artifacts.Examples).with_id('my_importer') deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=impt, deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_importer_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def testBuildFileBasedExampleGenWithInputConfig(self): input_config = example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='train', pattern='*train.tfr'), example_gen_pb2.Input.Split(name='eval', pattern='*test.tfr') ]) example_gen = components.ImportExampleGen( input_base='path/to/data/root', input_config=input_config) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=example_gen, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_import_example_gen_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_import_example_gen_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_import_example_gen_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def test_build_component_spec_from_structure(self): structure_component_spec = structures.ComponentSpec( name='component1', description='component1 desc', inputs=[ structures.InputSpec(name='input1', description='input1 desc', type='Dataset'), structures.InputSpec(name='input2', description='input2 desc', type='String'), structures.InputSpec(name='input3', description='input3 desc', type='Integer'), ], outputs=[ structures.OutputSpec(name='output1', description='output1 desc', type='Model') ]) expected_dict = { 'inputDefinitions': { 'artifacts': { 'input1': { 'artifactType': { 'instanceSchema': 'properties:\ntitle: kfp.Dataset\ntype: object\n' } } }, 'parameters': { 'input2': { 'type': 'STRING' }, 'input3': { 'type': 'INT' } } }, 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'instanceSchema': 'properties:\ntitle: kfp.Model\ntype: object\n' } } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = ( dsl_component_spec.build_component_spec_from_structure( structure_component_spec)) self.assertEqual(expected_spec, component_spec)
def build_component_spec_for_task( task: pipeline_task.PipelineTask, is_exit_task: bool = False, ) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a pipeline task. Args: task: The task to build a ComponentSpec for. is_exit_task: Whether the task is used as exit task in Exit Handler. Returns: A ComponentSpec object for the task. """ component_spec = pipeline_spec_pb2.ComponentSpec() component_spec.executor_label = component_utils.sanitize_executor_label( task.name) for input_name, input_spec in (task.component_spec.inputs or {}).items(): # Special handling for PipelineTaskFinalStatus first. if type_utils.is_task_final_status_type(input_spec.type): if not is_exit_task: raise ValueError( 'PipelineTaskFinalStatus can only be used in an exit task.' ) component_spec.input_definitions.parameters[ input_name].parameter_type = pipeline_spec_pb2.ParameterType.STRUCT continue # skip inputs not present, as a workaround to support optional inputs. if input_name not in task.inputs and input_spec.default is None: continue if type_utils.is_parameter_type(input_spec.type): component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) if input_spec.default is not None: component_spec.input_definitions.parameters[ input_name].default_value.CopyFrom( _to_protobuf_value(input_spec.default)) else: component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_name, output_spec in (task.component_spec.outputs or {}).items(): if type_utils.is_parameter_type(output_spec.type): component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( output_spec.type) else: component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return component_spec
def test_build_component_inputs_spec(self, is_root_component, expected_result): pipeline_params = [ _pipeline_param.PipelineParam(name='input1', param_type='Dataset'), _pipeline_param.PipelineParam(name='input2', param_type='Integer'), _pipeline_param.PipelineParam(name='input3', param_type='String'), _pipeline_param.PipelineParam(name='input4', param_type='Float'), ] expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_result, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_inputs_spec(component_spec, pipeline_params, is_root_component) self.assertEqual(expected_spec, component_spec)
def test_build_component_spec_from_structure(self): structure_component_spec = structures.ComponentSpec( name='component1', description='component1 desc', inputs=[ structures.InputSpec( name='input1', description='input1 desc', type='Dataset'), structures.InputSpec( name='input2', description='input2 desc', type='String'), structures.InputSpec( name='input3', description='input3 desc', type='Integer'), structures.InputSpec( name='input4', description='optional inputs', optional=True), ], outputs=[ structures.OutputSpec( name='output1', description='output1 desc', type='Model') ]) expected_dict = { 'inputDefinitions': { 'artifacts': { 'input1': { 'artifactType': { 'schemaTitle': 'system.Dataset' } } }, 'parameters': { 'input2': { 'type': 'STRING' }, 'input3': { 'type': 'INT' } } }, 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'schemaTitle': 'system.Model' } } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = ( dsl_component_spec.build_component_spec_from_structure( component_spec=structure_component_spec, executor_label='exec-component1', actual_inputs=['input1', 'input2', 'input3'], )) self.assertEqual(expected_spec, component_spec)
def test_build_component_outputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset'), _pipeline_param.PipelineParam(name='output2', param_type='Integer'), _pipeline_param.PipelineParam(name='output3', param_type='String'), _pipeline_param.PipelineParam(name='output4', param_type='Float'), ] expected_dict = { 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'instanceSchema': 'title: kfp.Dataset\ntype: object\nproperties:\n ' 'payload_format:\n type: string\n ' 'container_format:\n type: string\n' } } }, 'parameters': { 'output2': { 'type': 'INT' }, 'output3': { 'type': 'STRING' }, 'output4': { 'type': 'DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_outputs_spec( component_spec, pipeline_params) self.assertEqual(expected_spec, component_spec)
def test_build_component_outputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset'), _pipeline_param.PipelineParam(name='output2', param_type='Integer'), _pipeline_param.PipelineParam(name='output3', param_type='String'), _pipeline_param.PipelineParam(name='output4', param_type='Float'), ] expected_dict = { 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'schemaTitle': 'system.Dataset', 'schemaVersion': '0.0.1' } } }, 'parameters': { 'output2': { 'parameterType': 'NUMBER_INTEGER' }, 'output3': { 'parameterType': 'STRING' }, 'output4': { 'parameterType': 'NUMBER_DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_outputs_spec( component_spec, pipeline_params) self.assertEqual(expected_spec, component_spec)
def testBuildDummyConsumerWithCondition(self): producer_task_1 = test_utils.dummy_producer_component( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value1', ).with_id('producer_task_1') producer_task_2 = test_utils.dummy_producer_component_2( output1=channel_utils.as_channel([standard_artifacts.Model()]), param1='value2', ).with_id('producer_task_2') # This test tests two things: # 1. Nested conditions. The condition string of consumer_task should contain # both predicates. # 2. Implicit channels. consumer_task only takes producer_task_1's output. # But producer_task_2 is used in condition, hence producer_task_2 should # be added to the dependency of consumer_task. # See testdata for detail. with conditional.Cond( producer_task_1.outputs['output1'].future()[0].uri != 'uri'): with conditional.Cond(producer_task_2.outputs['output1'].future() [0].property('property') == 'value1'): consumer_task = test_utils.dummy_consumer_component( input1=producer_task_1.outputs['output1'], param1=1, ) # Need to construct a pipeline to set producer_component_id. unused_pipeline = tfx.dsl.Pipeline( pipeline_name='pipeline-with-condition', pipeline_root='', components=[producer_task_1, producer_task_2, consumer_task], ) deployment_config = pipeline_pb2.PipelineDeploymentConfig() component_defs = {} my_builder = step_builder.StepBuilder( node=consumer_task, image='gcr.io/tensorflow/tfx:latest', deployment_config=deployment_config, component_defs=component_defs) actual_step_spec = self._sole(my_builder.build()) actual_component_def = self._sole(component_defs) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_component.pbtxt', pipeline_pb2.ComponentSpec()), actual_component_def) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_task.pbtxt', pipeline_pb2.PipelineTaskSpec()), actual_step_spec) self.assertProtoEquals( test_utils.get_proto_from_test_data( 'expected_dummy_consumer_with_condition_executor.pbtxt', pipeline_pb2.PipelineDeploymentConfig()), deployment_config)
def _build_resolver_for_latest_blessed_model( self, model_channel_key: str, model_blessing_resolver_name: str, model_blessing_channel_key: str) -> pipeline_pb2.PipelineTaskSpec: """Builds the resolver spec for latest blessed Model artifact.""" name = '{}{}'.format(self._name, _MODEL_RESOLVER_SUFFIX) # Component def. component_def = pipeline_pb2.ComponentSpec() executor_label = _EXECUTOR_LABEL_PATTERN.format(name) component_def.executor_label = executor_label input_artifact_spec = compiler_utils.build_input_artifact_spec( self._outputs[model_blessing_channel_key]) component_def.input_definitions.artifacts[ _MODEL_RESOLVER_INPUT_KEY].CopyFrom(input_artifact_spec) output_artifact_spec = compiler_utils.build_output_artifact_spec( self._outputs[model_channel_key]) component_def.output_definitions.artifacts[model_channel_key].CopyFrom( output_artifact_spec) self._component_defs[name] = component_def # Task spec. task_spec = pipeline_pb2.PipelineTaskSpec() task_spec.task_info.name = name task_spec.component_ref.name = name input_artifact_spec = pipeline_pb2.TaskInputsSpec.InputArtifactSpec() input_artifact_spec.task_output_artifact.producer_task = model_blessing_resolver_name input_artifact_spec.task_output_artifact.output_artifact_key = model_blessing_channel_key task_spec.inputs.artifacts[_MODEL_RESOLVER_INPUT_KEY].CopyFrom( input_artifact_spec) # Resolver executor spec. executor = pipeline_pb2.PipelineDeploymentConfig.ExecutorSpec() artifact_queries = {} query_filter = ( 'schema_title="{type}" AND ' 'state={state} AND ' 'name="{{{{$.inputs.artifacts[\'{input_key}\']' '.metadata[\'{property_key}\']}}}}"').format( type=compiler_utils.get_artifact_title(standard_artifacts.Model), state=metadata_store_pb2.Artifact.State.Name( metadata_store_pb2.Artifact.LIVE), input_key=_MODEL_RESOLVER_INPUT_KEY, property_key=constants.ARTIFACT_PROPERTY_CURRENT_MODEL_ID_KEY) artifact_queries[model_channel_key] = ResolverSpec.ArtifactQuerySpec( filter=query_filter) executor.resolver.CopyFrom( ResolverSpec(output_artifact_queries=artifact_queries)) self._deployment_config.executors[executor_label].CopyFrom(executor) return task_spec
def build_component_spec_from_structure( component_spec: structures.ComponentSpec, executor_label: str, actual_inputs: List[str], ) -> pipeline_spec_pb2.ComponentSpec: """Builds an IR ComponentSpec instance from structures.ComponentSpec. Args: component_spec: The structure component spec. executor_label: The executor label. actual_inputs: The actual arugments passed to the task. This is used as a short term workaround to support optional inputs in component spec IR. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = executor_label for input_spec in component_spec.inputs or []: # skip inputs not present if input_spec.name not in actual_inputs: continue if type_utils.is_parameter_type(input_spec.type): result.input_definitions.parameters[ input_spec.name].type = type_utils.get_parameter_type( input_spec.type) else: result.input_definitions.artifacts[ input_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema_message( input_spec.type)) for output_spec in component_spec.outputs or []: if type_utils.is_parameter_type(output_spec.type): result.output_definitions.parameters[ output_spec.name].type = type_utils.get_parameter_type( output_spec.type) else: result.output_definitions.artifacts[ output_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema_message( output_spec.type)) return result
def test_fill_in_component_input_default_value(self, parameter_type, default_value, expected): component_spec = pipeline_spec_pb2.ComponentSpec( input_definitions=pipeline_spec_pb2.ComponentInputsSpec( parameters={ 'input1': pipeline_spec_pb2.ComponentInputsSpec.ParameterSpec( parameter_type=parameter_type) })) pipeline_spec_builder._fill_in_component_input_default_value( component_spec=component_spec, input_name='input1', default_value=default_value) self.assertEqual( expected, component_spec.input_definitions.parameters['input1']. default_value, )
def build_component_spec_for_task( task: pipeline_task.PipelineTask) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a pipeline task. Args: task: The task to build a ComponentSpec for. Returns: A ComponentSpec object for the task. """ component_spec = pipeline_spec_pb2.ComponentSpec() component_spec.executor_label = component_utils.sanitize_executor_label( task.name) for input_name, input_spec in (task.component_spec.inputs or {}).items(): # skip inputs not present, as a workaround to support optional inputs. if input_name not in task.inputs: continue if type_utils.is_parameter_type(input_spec.type): component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( input_spec.type) else: component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_name, output_spec in (task.component_spec.outputs or {}).items(): if type_utils.is_parameter_type(output_spec.type): component_spec.output_definitions.parameters[ output_name].parameter_type = type_utils.get_parameter_type( output_spec.type) else: component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return component_spec
def build_component_spec_for_group( pipeline_channels: List[pipeline_channel.PipelineChannel], is_root_group: bool, ) -> pipeline_spec_pb2.ComponentSpec: """Builds ComponentSpec for a TasksGroup. Args: group: The group to build a ComponentSpec for. pipeline_channels: The list of pipeline channels referenced by the group. Returns: A PipelineTaskSpec object representing the loop group. """ component_spec = pipeline_spec_pb2.ComponentSpec() for channel in pipeline_channels: input_name = ( channel.name if is_root_group else _additional_input_name_for_pipeline_channel(channel)) if isinstance(channel, pipeline_channel.PipelineArtifactChannel): component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(channel.channel_type)) else: # channel is one of PipelineParameterChannel, LoopArgument, or # LoopArgumentVariable. component_spec.input_definitions.parameters[ input_name].parameter_type = type_utils.get_parameter_type( channel.channel_type) # TODO: should we fill in default value for all groups and tasks? if is_root_group: _fill_in_component_input_default_value( component_spec=component_spec, input_name=input_name, default_value=channel.value, ) return component_spec
def _build_resolver_for_latest_model_blessing( self, model_blessing_channel_key: str) -> pipeline_pb2.PipelineTaskSpec: """Builds the resolver spec for latest valid ModelBlessing artifact.""" name = '{}{}'.format(self._name, _MODEL_BLESSING_RESOLVER_SUFFIX) # Component def. component_def = pipeline_pb2.ComponentSpec() executor_label = _EXECUTOR_LABEL_PATTERN.format(name) component_def.executor_label = executor_label output_artifact_spec = compiler_utils.build_output_artifact_spec( self._outputs[model_blessing_channel_key]) component_def.output_definitions.artifacts[ model_blessing_channel_key].CopyFrom(output_artifact_spec) self._component_defs[name] = component_def # Task spec. task_spec = pipeline_pb2.PipelineTaskSpec() task_spec.task_info.name = name task_spec.component_ref.name = name # Builds the resolver executor spec for latest valid ModelBlessing. executor = pipeline_pb2.PipelineDeploymentConfig.ExecutorSpec() artifact_queries = {} query_filter = ('artifact_type="{type}" and state={state}' ' and metadata.{key}.number_value={value}').format( type=compiler_utils.get_artifact_title( standard_artifacts.ModelBlessing), state=metadata_store_pb2.Artifact.State.Name( metadata_store_pb2.Artifact.LIVE), key=constants.ARTIFACT_PROPERTY_BLESSED_KEY, value=constants.BLESSED_VALUE) artifact_queries[ model_blessing_channel_key] = ResolverSpec.ArtifactQuerySpec( filter=query_filter) executor.resolver.CopyFrom( ResolverSpec(output_artifact_queries=artifact_queries)) self._deployment_config.executors[executor_label].CopyFrom(executor) return task_spec
def test_build_root_spec_from_pipeline_params(self): pipeline_params = [ dsl.PipelineParam(name='input1', param_type='Dataset'), dsl.PipelineParam(name='input2', param_type='Integer'), dsl.PipelineParam(name='input3', param_type='String'), dsl.PipelineParam(name='input4', param_type='Float'), ] expected_dict = { 'inputDefinitions': { 'artifacts': { 'input1': { 'artifactType': { 'instanceSchema': 'properties:\ntitle: kfp.Dataset\ntype: object\n' } } }, 'parameters': { 'input2': { 'type': 'INT' }, 'input3': { 'type': 'STRING' }, 'input4': { 'type': 'DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = ( dsl_component_spec.build_root_spec_from_pipeline_params( pipeline_params)) self.assertEqual(expected_spec, component_spec)