def test_get_input_artifact_type_schema(self): input_specs = [ structures.InputSpec(name='input1', type='String'), structures.InputSpec(name='input2', type='Model'), structures.InputSpec(name='input3', type=None), ] # input not found. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input0', input_specs) self.assertEqual('Input not found.', str(cm)) # input found, but it doesn't map to an artifact type. with self.assertRaises(AssertionError) as cm: type_utils.get_input_artifact_type_schema('input1', input_specs) self.assertEqual('Input is not an artifact type.', str(cm)) # input found, and a matching artifact type schema returned. self.assertEqual( 'properties:\ntitle: kfp.Model\ntype: object\n', type_utils.get_input_artifact_type_schema('input2', input_specs)) # input found, and the default artifact type schema returned. self.assertEqual( 'properties:\ntitle: kfp.Artifact\ntype: object\n', type_utils.get_input_artifact_type_schema('input3', input_specs))
def test_optional(self): input_spec = structures.InputSpec(type='str', default='test') self.assertEqual(input_spec.default, 'test') self.assertEqual(input_spec._optional, True) input_spec = structures.InputSpec(type='str', default=None) self.assertEqual(input_spec.default, None) self.assertEqual(input_spec._optional, True) input_spec = structures.InputSpec(type='str') self.assertEqual(input_spec.default, None) self.assertEqual(input_spec._optional, False)
def test_equality(self): self.assertEqual(structures.InputSpec(type='str', default=None), structures.InputSpec(type='str', default=None)) self.assertNotEqual(structures.InputSpec(type='str', default=None), structures.InputSpec(type='str', default='test')) self.assertEqual( structures.InputSpec(type='List', default=None), structures.InputSpec(type='typing.List', default=None)) self.assertEqual( structures.InputSpec(type='List', default=None), structures.InputSpec(type='typing.List[int]', default=None)) self.assertEqual( structures.InputSpec(type='List'), structures.InputSpec(type='typing.List[typing.Dict[str, str]]'))
def test_get_input_artifact_type_schema(self): input_specs = [ structures.InputSpec(name='input1', type='String'), structures.InputSpec(name='input2', type='GCSPath'), ] # input not found. self.assertEqual( None, type_utils.get_input_artifact_type_schema('input0', input_specs)) # input found, but it doesn't map to an artifact type. self.assertEqual( None, type_utils.get_input_artifact_type_schema('input1', input_specs)) # input found, and a matching artifact type schema returned. self.assertEqual( 'title: Artifact\ntype: object\nproperties:\n', type_utils.get_input_artifact_type_schema('input2', input_specs))
def test_simple_component_spec_save_to_component_yaml(self): # tests writing old style (less verbose) and reading in new style (more verbose) original_component_spec = structures.ComponentSpec( name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder(input_name='input1'), structures.OutputParameterPlaceholder( output_name='output1'), ], )), inputs={'input1': structures.InputSpec(type='String')}, outputs={'output1': structures.OutputSpec(type='String')}, ) from kfp.components import yaml_component yaml_component = yaml_component.YamlComponent( component_spec=original_component_spec) with tempfile.TemporaryDirectory() as tempdir: output_path = os.path.join(tempdir, 'component.yaml') compiler.Compiler().compile(yaml_component, output_path) # test that it can be read back correctly with open(output_path, 'r') as f: contents = f.read() new_component_spec = structures.ComponentSpec.load_from_component_yaml( contents) self.assertEqual(original_component_spec, new_component_spec)
def test_component_spec_with_placeholder_referencing_nonexisting_input_output( self): with self.assertRaisesRegex( ValueError, r'^Argument \"InputValuePlaceholder[\s\S]*\'input000\'[\s\S]*references non-existing input.' ): structures.ComponentSpec( name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder( input_name='input000'), structures.OutputPathPlaceholder( output_name='output1'), ], )), inputs={'input1': structures.InputSpec(type='String')}, outputs={'output1': structures.OutputSpec(type='String')}, ) with self.assertRaisesRegex( ValueError, r'^Argument \"OutputPathPlaceholder[\s\S]*\'output000\'[\s\S]*references non-existing output.' ): structures.ComponentSpec( name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder( input_name='input1'), structures.OutputPathPlaceholder( output_name='output000'), ], )), inputs={'input1': structures.InputSpec(type='String')}, outputs={'output1': structures.OutputSpec(type='String')}, )
def test_from_container_dict_no_placeholders(self): component_spec = structures.ComponentSpec( name='test', implementation=structures.Implementation( container=structures.ContainerSpec( image='python:3.7', command=[ 'sh', '-c', '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n', 'sh', '-ec', 'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"\n', '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef concat_message(first: str, second: str) -> str:\n return first + second\n\n' ], args=[ '--executor_input', '{{$}}', '--function_to_execute', 'concat_message' ], env=None, resources=None), graph=None, importer=None), description=None, inputs={ 'first': structures.InputSpec(type='String', default=None), 'second': structures.InputSpec(type='String', default=None) }, outputs={'Output': structures.OutputSpec(type='String')}) container_dict = { 'args': [ '--executor_input', '{{$}}', '--function_to_execute', 'fail_op' ], 'command': [ 'sh', '-c', '\nif ! [ -x "$(command -v pip)" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location \'kfp==2.0.0-alpha.2\' && "$0" "$@"\n', 'sh', '-ec', 'program_path=$(mktemp -d)\nprintf "%s" "$0" > "$program_path/ephemeral_component.py"\npython3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"\n', '\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef fail_op(message: str):\n """Fails."""\n import sys\n print(message)\n sys.exit(1)\n\n' ], 'image': 'python:3.7' } loaded_container_spec = structures.ContainerSpec.from_container_dict( container_dict)
def test_simple_component_spec_save_to_component_yaml(self): open_mock = mock.mock_open() expected_yaml = textwrap.dedent("""\ implementation: container: command: - sh - -c - 'set -ex echo "$0" > "$1"' - {inputValue: input1} - {outputPath: output1} image: alpine inputs: input1: {type: String} name: component_1 outputs: output1: {type: String} """) with mock.patch( "builtins.open", open_mock, create=True), self.assertWarnsRegex( DeprecationWarning, r"Compiling to JSON is deprecated"): structures.ComponentSpec( name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder( input_name='input1'), structures.OutputPathPlaceholder( output_name='output1'), ], )), inputs={ 'input1': structures.InputSpec(type='String') }, outputs={ 'output1': structures.OutputSpec(type='String') }, ).save_to_component_yaml('test_save_file.json') open_mock.assert_called_once_with('test_save_file.json', 'w')
def test_create_pipeline_task_valid(self): expected_component_spec = structures.ComponentSpec( name='component1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=['sh', '-c', 'echo "$0" >> "$1"'], args=[ structures.InputValuePlaceholder(input_name='input1'), structures.OutputPathPlaceholder( output_name='output1'), ], )), inputs={ 'input1': structures.InputSpec(type='String'), }, outputs={ 'output1': structures.OutputSpec(type='Artifact'), }, ) expected_task_spec = structures.TaskSpec( name='component1', inputs={'input1': 'value'}, dependent_tasks=[], component_ref='component1', ) expected_container_spec = structures.ContainerSpec( image='alpine', command=['sh', '-c', 'echo "$0" >> "$1"'], args=[ "{{$.inputs.parameters['input1']}}", "{{$.outputs.artifacts['output1'].path}}", ], ) task = pipeline_task.PipelineTask( component_spec=structures.ComponentSpec.load_from_component_yaml( V2_YAML), args={'input1': 'value'}, ) self.assertEqual(task.task_spec, expected_task_spec) self.assertEqual(task.component_spec, expected_component_spec) self.assertEqual(task.container_spec, expected_container_spec)
def test_simple_component_spec_load_from_v2_component_yaml(self): component_yaml_v2 = textwrap.dedent("""\ name: component_1 inputs: input1: type: String outputs: output1: type: String implementation: container: image: alpine command: - sh - -c - 'set -ex echo "$0" > "$1"' - inputValue: input1 - outputPath: output1 """) generated_spec = structures.ComponentSpec.load_from_component_yaml( component_yaml_v2) expected_spec = structures.ComponentSpec( name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder(input_name='input1'), structures.OutputPathPlaceholder( output_name='output1'), ], )), inputs={'input1': structures.InputSpec(type='String')}, outputs={'output1': structures.OutputSpec(type='String')}) self.assertEqual(generated_spec, expected_spec)
def importer( artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str], artifact_class: Type[artifact_types.Artifact], reimport: bool = False, metadata: Optional[Mapping[str, Any]] = None, ) -> pipeline_task.PipelineTask: """dsl.importer for importing an existing artifact. Only for v2 pipeline. Args: artifact_uri: The artifact uri to import from. artifact_type_schema: The user specified artifact type schema of the artifact to be imported. reimport: Whether to reimport the artifact. Defaults to False. metadata: Properties of the artifact. Returns: A PipelineTask instance. Raises: ValueError if the passed in artifact_uri is neither a PipelineParam nor a constant string value. """ component_spec = structures.ComponentSpec( name='importer', implementation=structures.Implementation( importer=structures.ImporterSpec( artifact_uri=structures.InputValuePlaceholder( INPUT_KEY).to_placeholder(), type_schema=artifact_class.TYPE_NAME, reimport=reimport, metadata=metadata)), inputs={INPUT_KEY: structures.InputSpec(type='String')}, outputs={ OUTPUT_KEY: structures.OutputSpec(type=artifact_class.__name__) }, ) importer = importer_component.ImporterComponent( component_spec=component_spec) return importer(uri=artifact_uri)
def test_simple_component_spec_load_from_v2_component_yaml(self): component_yaml_v2 = textwrap.dedent("""\ components: comp-component-1: executorLabel: exec-component-1 inputDefinitions: parameters: input1: parameterType: STRING outputDefinitions: parameters: output1: parameterType: STRING deploymentSpec: executors: exec-component-1: container: command: - sh - -c - 'set -ex echo "$0" > "$1"' - '{{$.inputs.parameters[''input1'']}}' - '{{$.outputs.parameters[''output1''].output_file}}' image: alpine pipelineInfo: name: component-1 root: dag: tasks: component-1: cachingOptions: enableCache: true componentRef: name: comp-component-1 inputs: parameters: input1: componentInputParameter: input1 taskInfo: name: component-1 inputDefinitions: parameters: input1: parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2 """) generated_spec = structures.ComponentSpec.load_from_component_yaml( component_yaml_v2) expected_spec = structures.ComponentSpec( name='component-1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" > "$1"', structures.InputValuePlaceholder(input_name='input1'), structures.OutputParameterPlaceholder( output_name='output1'), ], )), inputs={'input1': structures.InputSpec(type='String')}, outputs={'output1': structures.OutputSpec(type='String')}) self.assertEqual(generated_spec, expected_spec)
def to_v1_component_spec(self) -> v1_structures.ComponentSpec: """Converts to v1 ComponentSpec. Returns: Component spec in the form of V1 ComponentSpec. Needed until downstream accept new ComponentSpec. """ def _transform_arg(arg: ValidCommandArgs) -> Any: if isinstance(arg, str): return arg if isinstance(arg, InputValuePlaceholder): return v1_structures.InputValuePlaceholder(arg.input_name) if isinstance(arg, InputPathPlaceholder): return v1_structures.InputPathPlaceholder(arg.input_name) if isinstance(arg, InputUriPlaceholder): return v1_structures.InputUriPlaceholder(arg.input_name) if isinstance(arg, OutputPathPlaceholder): return v1_structures.OutputPathPlaceholder(arg.output_name) if isinstance(arg, OutputUriPlaceholder): return v1_structures.OutputUriPlaceholder(arg.output_name) if isinstance(arg, IfPresentPlaceholder): return v1_structures.IfPlaceholder(arg.if_structure) if isinstance(arg, ConcatPlaceholder): return v1_structures.ConcatPlaceholder(arg.concat) raise ValueError( f'Unexpected command/argument type: "{arg}" of type "{type(arg)}".' ) return v1_structures.ComponentSpec( name=self.name, inputs=[ v1_structures.InputSpec( name=name, type=input_spec.type, default=input_spec.default, ) for name, input_spec in self.inputs.items() ], outputs=[ v1_structures.OutputSpec( name=name, type=output_spec.type, ) for name, output_spec in self.outputs.items() ], implementation=v1_structures. ContainerImplementation(container=v1_structures.ContainerSpec( image=self.implementation.container.image, command=[ _transform_arg(cmd) for cmd in self.implementation.container.commands or [] ], args=[ _transform_arg(arg) for arg in self.implementation.container.arguments or [] ], env={ name: _transform_arg(value) for name, value in self.implementation.container.env or {} }, )), )
def create_custom_training_job_op_from_component( component_spec: Callable, # pylint: disable=g-bare-generic display_name: Optional[str] = '', replica_count: Optional[int] = 1, machine_type: Optional[str] = 'n1-standard-4', accelerator_type: Optional[str] = '', accelerator_count: Optional[int] = 1, boot_disk_type: Optional[str] = 'pd-ssd', boot_disk_size_gb: Optional[int] = 100, timeout: Optional[str] = '', restart_job_on_worker_restart: Optional[bool] = False, service_account: Optional[str] = '', network: Optional[str] = '', encryption_spec_key_name: Optional[str] = '', tensorboard: Optional[str] = '', enable_web_access: Optional[bool] = False, base_output_directory: Optional[str] = '', labels: Optional[Dict[str, str]] = None, ) -> Callable: # pylint: disable=g-bare-generic """Create a component spec that runs a custom training in Vertex AI. This utility converts a given component to a CustomTrainingJobOp that runs a custom training in Vertex AI. This simplifies the creation of custom training jobs. All Inputs and Outputs of the supplied component will be copied over to the constructed training job. Note that this utility constructs a ClusterSpec where the master and all the workers use the same spec, meaning all disk/machine spec related parameters will apply to all replicas. This is suitable for use cases such as training with MultiWorkerMirroredStrategy or Mirrored Strategy. This component does not support Vertex AI Python training application. For more details on Vertex AI Training service, please refer to https://cloud.google.com/vertex-ai/docs/training/create-custom-job Args: component_spec: The task (ContainerOp) object to run as Vertex AI custom job. display_name (Optional[str]): The name of the custom job. If not provided the component_spec.name will be used instead. replica_count (Optional[int]): The count of instances in the cluster. One replica always counts towards the master in worker_pool_spec[0] and the remaining replicas will be allocated in worker_pool_spec[1]. For more details see https://cloud.google.com/vertex-ai/docs/training/distributed-training#configure_a_distributed_training_job. machine_type (Optional[str]): The type of the machine to run the custom job. The default value is "n1-standard-4". For more details about this input config, see https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types. accelerator_type (Optional[str]): The type of accelerator(s) that may be attached to the machine as per accelerator_count. For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec#acceleratortype. accelerator_count (Optional[int]): The number of accelerators to attach to the machine. Defaults to 1 if accelerator_type is set. boot_disk_type (Optional[str]): Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_size_gb (Optional[int]): Size in GB of the boot disk (default is 100GB). timeout (Optional[str]): The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's', for example: "3.5s". restart_job_on_worker_restart (Optional[bool]): Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. service_account (Optional[str]): Sets the default service account for workload run-as account. The service account running the pipeline (https://cloud.google.com/vertex-ai/docs/pipelines/configure-project#service-account) submitting jobs must have act-as permission on this run-as account. If unspecified, the Vertex AI Custom Code Service Agent(https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents) for the CustomJob's project. network (Optional[str]): The full name of the Compute Engine network to which the job should be peered. For example, projects/12345/global/networks/myVPC. Format is of the form projects/{project}/global/networks/{network}. Where {project} is a project number, as in 12345, and {network} is a network name. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name (Optional[str]): Customer-managed encryption key options for the CustomJob. If this is set, then all resources created by the CustomJob will be encrypted with the provided encryption key. tensorboard (Optional[str]): The name of a Vertex AI Tensorboard resource to which this CustomJob will upload Tensorboard logs. enable_web_access (Optional[bool]): Whether you want Vertex AI to enable [interactive shell access](https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell) to training containers. If set to `true`, you can access interactive shells at the URIs given by [CustomJob.web_access_uris][]. base_output_directory (Optional[str]): The Cloud Storage location to store the output of this CustomJob or HyperparameterTuningJob. see below for more details: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/GcsDestination labels (Optional[Dict[str, str]]): The labels with user-defined metadata to organize CustomJobs. See https://goo.gl/xmQnxf for more information. Returns: A Custom Job component operator corresponding to the input component operator. """ job_spec = {} input_specs = [] output_specs = [] # pytype: disable=attribute-error if component_spec.component_spec.inputs: input_specs = component_spec.component_spec.inputs if component_spec.component_spec.outputs: output_specs = component_spec.component_spec.outputs def _is_output_parameter(output_key: str) -> bool: for output in component_spec.component_spec.outputs: if output.name == output_key: return type_utils.is_parameter_type(output.type) return False worker_pool_spec = { 'machine_spec': { 'machine_type': machine_type }, 'replica_count': 1, 'container_spec': { 'image_uri': component_spec.component_spec.implementation.container.image, } } if component_spec.component_spec.implementation.container.command: container_command_copy = component_spec.component_spec.implementation.container.command.copy( ) dsl_utils.resolve_cmd_lines(container_command_copy, _is_output_parameter) # Replace executor place holder with the json escaped placeholder. for idx, val in enumerate(container_command_copy): if val == '{{{{$}}}}': container_command_copy[ idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT worker_pool_spec['container_spec']['command'] = container_command_copy if component_spec.component_spec.implementation.container.args: container_args_copy = component_spec.component_spec.implementation.container.args.copy( ) dsl_utils.resolve_cmd_lines(container_args_copy, _is_output_parameter) # Replace executor place holder with the json escaped placeholder. for idx, val in enumerate(container_args_copy): if val == '{{{{$}}}}': container_args_copy[idx] = _EXECUTOR_PLACE_HOLDER_REPLACEMENT worker_pool_spec['container_spec']['args'] = container_args_copy if accelerator_type: worker_pool_spec['machine_spec']['accelerator_type'] = accelerator_type worker_pool_spec['machine_spec'][ 'accelerator_count'] = accelerator_count if boot_disk_type: if 'disk_spec' not in worker_pool_spec: worker_pool_spec['disk_spec'] = {} worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type if 'disk_spec' not in worker_pool_spec: worker_pool_spec['disk_spec'] = {} worker_pool_spec['disk_spec']['boot_disk_size_gb'] = boot_disk_size_gb job_spec['worker_pool_specs'] = [worker_pool_spec] if int(replica_count) > 1: additional_worker_pool_spec = copy.deepcopy(worker_pool_spec) additional_worker_pool_spec['replica_count'] = str(replica_count - 1) job_spec['worker_pool_specs'].append(additional_worker_pool_spec) # TODO(chavoshi): Use input parameter instead of hard coded string label. # This requires Dictionary input type to be supported in V2. if labels is not None: job_spec['labels'] = labels if timeout: if 'scheduling' not in job_spec: job_spec['scheduling'] = {} job_spec['scheduling']['timeout'] = timeout if restart_job_on_worker_restart: if 'scheduling' not in job_spec: job_spec['scheduling'] = {} job_spec['scheduling'][ 'restart_job_on_worker_restart'] = restart_job_on_worker_restart if enable_web_access: job_spec['enable_web_access'] = enable_web_access if encryption_spec_key_name: job_spec['encryption_spec'] = {} job_spec['encryption_spec'][ 'kms_key_name'] = "{{$.inputs.parameters['encryption_spec_key_name']}}" input_specs.append( structures.InputSpec(name='encryption_spec_key_name', type='String', optional=True, default=encryption_spec_key_name), ) # Remove any existing service_account from component input list. input_specs[:] = [ input_spec for input_spec in input_specs if input_spec.name not in ('service_account', 'network', 'tensorboard', 'base_output_directory') ] job_spec['service_account'] = "{{$.inputs.parameters['service_account']}}" job_spec['network'] = "{{$.inputs.parameters['network']}}" job_spec['tensorboard'] = "{{$.inputs.parameters['tensorboard']}}" job_spec['base_output_directory'] = {} job_spec['base_output_directory'][ 'output_uri_prefix'] = "{{$.inputs.parameters['base_output_directory']}}" custom_job_payload = { 'display_name': display_name or component_spec.component_spec.name, 'job_spec': job_spec } custom_job_component_spec = structures.ComponentSpec( name=component_spec.component_spec.name, inputs=input_specs + [ structures.InputSpec(name='base_output_directory', type='String', optional=True, default=base_output_directory), structures.InputSpec(name='tensorboard', type='String', optional=True, default=tensorboard), structures.InputSpec( name='network', type='String', optional=True, default=network), structures.InputSpec(name='service_account', type='String', optional=True, default=service_account), structures.InputSpec(name='project', type='String'), structures.InputSpec(name='location', type='String') ], outputs=output_specs + [structures.OutputSpec(name='gcp_resources', type='String')], implementation=structures. ContainerImplementation(container=structures.ContainerSpec( image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE, command=[ 'python3', '-u', '-m', 'google_cloud_pipeline_components.container.v1.gcp_launcher.launcher' ], args=[ '--type', 'CustomJob', '--payload', json.dumps(custom_job_payload), '--project', structures.InputValuePlaceholder(input_name='project'), '--location', structures.InputValuePlaceholder(input_name='location'), '--gcp_resources', structures.OutputPathPlaceholder(output_name='gcp_resources'), ], ))) # pytype: enable=attribute-error component_path = tempfile.mktemp() custom_job_component_spec.save(component_path) return components.load_component_from_file(component_path)
def component_yaml_generator(**kwargs): input_specs = [] input_args = [] input_kwargs = {} serialized_args = {INIT_KEY: {}, METHOD_KEY: {}} init_kwargs = {} method_kwargs = {} for key, value in kwargs.items(): if key in init_arg_names: prefix_key = INIT_KEY init_kwargs[key] = value signature = init_signature else: prefix_key = METHOD_KEY method_kwargs[key] = value signature = method_signature # no need to add this argument because it's optional # this param is validated against the signature because # of init_kwargs, method_kwargs if value is None: continue param_type = signature.parameters[key].annotation param_type = resolve_annotation(param_type) serializer = get_serializer(param_type) if serializer: param_type = str if not isinstance(value, kfp.dsl._pipeline_param.PipelineParam): value = serializer(value) # TODO remove PipelineParam check when Metadata Importer component available # if we serialize we need to include the argument as input # perhaps, another option is to embed in yaml as json serialized list component_param_name = component_param_name_to_mb_sdk_param_name.get( key, key) component_param_type = None if isinstance(value, kfp.dsl._pipeline_param.PipelineParam) or serializer: if is_mb_sdk_resource_noun_type(param_type): metadata_type = map_resource_to_metadata_type( param_type)[1] component_param_type = metadata_type else: if param_type == int: component_param_type = 'Integer' elif param_type == float: component_param_type = 'Float' elif param_type == bool: component_param_type = 'Bool' elif param_type in (list, collections.abc.Sequence, Sequence): component_param_type = 'List' elif param_type in (dict, Dict): component_param_type = 'Dict' elif param_type in PROTO_PLUS_CLASS_TYPES: component_param_type = 'String' else: component_param_type = 'String' input_specs.append( structures.InputSpec( name=key, type=component_param_type, )) input_args.append(f'--{prefix_key}.{component_param_name}') if is_mb_sdk_resource_noun_type(param_type): input_args.append( f'{{{{$.inputs.artifacts[\'{key}\'].metadata[\'resourceName\']}}}}' ) else: input_args.append( structures.InputValuePlaceholder(input_name=key)) input_kwargs[key] = value else: # Serialized arguments must always be strings value = str(value) serialized_args[prefix_key][component_param_name] = value # validate parameters if should_serialize_init: init_signature.bind(**init_kwargs) method_signature.bind(**method_kwargs) component_spec = structures.ComponentSpec( name=f'{cls_name}-{method_name}', inputs=input_specs, outputs=output_specs, implementation=structures. ContainerImplementation(container=structures.ContainerSpec( image=DEFAULT_CONTAINER_IMAGE, command=[ 'python3', '-m', 'google_cloud_pipeline_components.container.aiplatform.remote_runner', '--cls_name', cls_name, '--method_name', method_name, ], args=make_args(serialized_args) + output_args + input_args, ))) component_path = tempfile.mktemp() component_spec.save(component_path) return components.load_component_from_file(component_path)( **input_kwargs)
name='component_1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=[ 'sh', '-c', 'set -ex\necho "$0" "$1" "$2" > "$3"', structures.InputValuePlaceholder(input_name='input1'), structures.InputValuePlaceholder(input_name='input2'), structures.InputValuePlaceholder(input_name='input3'), structures.OutputPathPlaceholder(output_name='output1'), ], )), inputs={ 'input1': structures.InputSpec(type='String'), 'input2': structures.InputSpec(type='Integer'), 'input3': structures.InputSpec(type='Float', default=3.14), 'input4': structures.InputSpec(type='Optional[Float]', default=None), }, outputs={ 'output1': structures.OutputSpec(type='String'), }, )) class BaseComponentTest(unittest.TestCase): @patch.object(pipeline_task, 'create_pipeline_task', autospec=True) def test_instantiate_component_with_keyword_arguments( self, mock_create_pipeline_task):
def test_if_placeholder(self): compiled_yaml = textwrap.dedent(""" components: comp-if: executorLabel: exec-if inputDefinitions: parameters: optional_input_1: parameterType: STRING deploymentSpec: executors: exec-if: container: args: - 'input: ' - '{{$.inputs.parameters[''optional_input_1'']}}' command: - sh - -c - echo "$0" "$1" image: alpine pipelineInfo: name: if root: dag: tasks: if: cachingOptions: enableCache: true componentRef: name: comp-if inputs: parameters: optional_input_1: componentInputParameter: optional_input_1 taskInfo: name: if inputDefinitions: parameters: optional_input_1: parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") loaded_component_spec = structures.ComponentSpec.load_from_component_yaml( compiled_yaml) component_spec = structures.ComponentSpec( name='if', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=['sh', '-c', 'echo "$0" "$1"'], args=[ 'input: ', structures.InputValuePlaceholder( input_name='optional_input_1') ], env=None, resources=None), graph=None, importer=None), description=None, inputs={ 'optional_input_1': structures.InputSpec(type='String', default=None) }, outputs=None) self.assertEqual(loaded_component_spec, component_spec)
def test_simple_placeholder(self): compiled_yaml = textwrap.dedent(""" components: comp-component1: executorLabel: exec-component1 inputDefinitions: parameters: input1: parameterType: STRING outputDefinitions: artifacts: output1: artifactType: schemaTitle: system.Artifact schemaVersion: 0.0.1 deploymentSpec: executors: exec-component1: container: args: - '{{$.inputs.parameters[''input1'']}}' - '{{$.outputs.artifacts[''output1''].path}}' command: - sh - -c - echo "$0" >> "$1" image: alpine pipelineInfo: name: component1 root: dag: tasks: component1: cachingOptions: enableCache: true componentRef: name: comp-component1 inputs: parameters: input1: componentInputParameter: input1 taskInfo: name: component1 inputDefinitions: parameters: input1: parameterType: STRING schemaVersion: 2.1.0 sdkVersion: kfp-2.0.0-alpha.2""") loaded_component_spec = structures.ComponentSpec.load_from_component_yaml( compiled_yaml) component_spec = structures.ComponentSpec( name='component1', implementation=structures.Implementation( container=structures.ContainerSpec( image='alpine', command=['sh', '-c', 'echo "$0" >> "$1"'], args=[ structures.InputValuePlaceholder(input_name='input1'), structures.OutputPathPlaceholder(output_name='output1') ], env=None, resources=None), graph=None, importer=None), description=None, inputs={ 'input1': structures.InputSpec(type='String', default=None) }, outputs={'output1': structures.OutputSpec(type='Artifact')}) self.assertEqual(loaded_component_spec, component_spec)
def extract_component_interface(func: Callable) -> structures.ComponentSpec: single_output_name_const = 'Output' signature = inspect.signature(func) parameters = list(signature.parameters.values()) parsed_docstring = docstring_parser.parse(inspect.getdoc(func)) doc_dict = {p.arg_name: p.description for p in parsed_docstring.params} inputs = {} outputs = {} input_names = set() output_names = set() for parameter in parameters: parameter_type = type_annotations.maybe_strip_optional_from_annotation( parameter.annotation) passing_style = None io_name = parameter.name if type_annotations.is_artifact_annotation(parameter_type): # passing_style is either type_annotations.InputAnnotation or # type_annotations.OutputAnnotation. passing_style = type_annotations.get_io_artifact_annotation( parameter_type) # parameter_type is type_annotations.Artifact or one of its subclasses. parameter_type = type_annotations.get_io_artifact_class( parameter_type) if not issubclass(parameter_type, artifact_types.Artifact): raise ValueError( 'Input[T] and Output[T] are only supported when T is a ' 'subclass of Artifact. Found `{} with type {}`'.format( io_name, parameter_type)) if parameter.default is not inspect.Parameter.empty: raise ValueError( 'Default values for Input/Output artifacts are not supported.' ) elif isinstance( parameter_type, (type_annotations.InputPath, type_annotations.OutputPath)): passing_style = type(parameter_type) parameter_type = parameter_type.type if parameter.default is not inspect.Parameter.empty and not ( passing_style == type_annotations.InputPath and parameter.default is None): raise ValueError( 'Path inputs only support default values of None. Default' ' values for outputs are not supported.') type_struct = _annotation_to_type_struct(parameter_type) if type_struct is None: raise TypeError('Missing type annotation for argument: {}'.format( parameter.name)) if passing_style in [ type_annotations.OutputAnnotation, type_annotations.OutputPath ]: io_name = _maybe_make_unique(io_name, output_names) output_names.add(io_name) output_spec = structures.OutputSpec(type=type_struct, description=doc_dict.get( parameter.name)) outputs[io_name] = output_spec else: io_name = _maybe_make_unique(io_name, input_names) input_names.add(io_name) if parameter.default is not inspect.Parameter.empty: input_spec = structures.InputSpec( type=type_struct, description=doc_dict.get(parameter.name), default=parameter.default, ) else: input_spec = structures.InputSpec( type=type_struct, description=doc_dict.get(parameter.name), ) inputs[io_name] = input_spec #Analyzing the return type annotations. return_ann = signature.return_annotation if hasattr(return_ann, '_fields'): #NamedTuple # Getting field type annotations. # __annotations__ does not exist in python 3.5 and earlier # _field_types does not exist in python 3.9 and later field_annotations = getattr(return_ann, '__annotations__', None) or getattr( return_ann, '_field_types', None) for field_name in return_ann._fields: type_struct = None if field_annotations: type_struct = _annotation_to_type_struct( field_annotations.get(field_name, None)) output_name = _maybe_make_unique(field_name, output_names) output_names.add(output_name) output_spec = structures.OutputSpec(type=type_struct) outputs[output_name] = output_spec # Deprecated dict-based way of declaring multiple outputs. Was only used by # the @component decorator elif isinstance(return_ann, dict): warnings.warn( 'The ability to specify multiple outputs using the dict syntax' ' has been deprecated. It will be removed soon after release' ' 0.1.32. Please use typing.NamedTuple to declare multiple' ' outputs.') for output_name, output_type_annotation in return_ann.items(): output_type_struct = _annotation_to_type_struct( output_type_annotation) output_spec = structures.OutputSpec(type=output_type_struct) outputs[name] = output_spec elif signature.return_annotation is not None and signature.return_annotation != inspect.Parameter.empty: output_name = _maybe_make_unique(single_output_name_const, output_names) # Fixes exotic, but possible collision: # `def func(output_path: OutputPath()) -> str: ...` output_names.add(output_name) type_struct = _annotation_to_type_struct(signature.return_annotation) output_spec = structures.OutputSpec(type=type_struct) outputs[output_name] = output_spec # Component name and description are derived from the function's name and # docstring. The name can be overridden by setting setting func.__name__ # attribute (of the legacy func._component_human_name attribute). The # description can be overridden by setting the func.__doc__ attribute (or # the legacy func._component_description attribute). component_name = getattr(func, '_component_human_name', None) or _python_function_name_to_component_name( func.__name__) description = getattr(func, '_component_description', None) or parsed_docstring.short_description if description: description = description.strip() component_spec = structures.ComponentSpec( name=component_name, description=description, inputs=inputs if inputs else None, outputs=outputs if outputs else None, # Dummy implementation to bypass model validation. implementation=structures.Implementation(), ) return component_spec
def test_component_spec_load_from_v1_component_yaml(self): component_yaml_v1 = textwrap.dedent("""\ name: Component with 2 inputs and 2 outputs inputs: - {name: Input parameter, type: String} - {name: Input artifact} outputs: - {name: Output 1} - {name: Output 2} implementation: container: image: busybox command: [sh, -c, ' mkdir -p $(dirname "$2") mkdir -p $(dirname "$3") echo "$0" > "$2" cp "$1" "$3" ' ] args: - {inputValue: Input parameter} - {inputPath: Input artifact} - {outputPath: Output 1} - {outputPath: Output 2} """) generated_spec = structures.ComponentSpec.load_from_component_yaml( component_yaml_v1) expected_spec = structures.ComponentSpec( name='Component with 2 inputs and 2 outputs', implementation=structures.Implementation( container=structures.ContainerSpec( image='busybox', command=[ 'sh', '-c', (' mkdir -p $(dirname "$2") mkdir -p $(dirname "$3") ' 'echo "$0" > "$2" cp "$1" "$3" '), ], args=[ structures.InputValuePlaceholder( input_name='input_parameter'), structures.InputPathPlaceholder( input_name='input_artifact'), structures.OutputPathPlaceholder( output_name='output_1'), structures.OutputPathPlaceholder( output_name='output_2'), ], env={}, )), inputs={ 'input_parameter': structures.InputSpec(type='String'), 'input_artifact': structures.InputSpec(type='Artifact') }, outputs={ 'output_1': structures.OutputSpec(type='Artifact'), 'output_2': structures.OutputSpec(type='Artifact'), }) self.assertEqual(generated_spec, expected_spec)
def run_as_vertex_ai_custom_job( component_spec: Callable, display_name: Optional[str] = None, replica_count: Optional[int] = None, machine_type: Optional[str] = None, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, boot_disk_type: Optional[str] = None, boot_disk_size_gb: Optional[int] = None, timeout: Optional[str] = None, restart_job_on_worker_restart: Optional[bool] = None, service_account: Optional[str] = None, network: Optional[str] = None, worker_pool_specs: Optional[List[Mapping[str, Any]]] = None, ) -> Callable: """Run a pipeline task using AI Platform (Unified) custom training job. For detailed doc of the service, please refer to https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job Args: component_spec: The task (ContainerOp) object to run as aiplatform custom job. display_name: Optional. The name of the custom job. If not provided the component_spec.name will be used instead. replica_count: Optional. The number of replicas to be split between master workerPoolSpec and worker workerPoolSpec. (master always has 1 replica). machine_type: Optional. The type of the machine to run the custom job. The default value is "n1-standard-4". accelerator_type: Optional. The type of accelerator(s) that may be attached to the machine as per accelerator_count. Optional. accelerator_count: Optional. The number of accelerators to attach to the machine. boot_disk_type: Optional. Type of the boot disk (default is "pd-ssd"). Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or "pd-standard" (Persistent Disk Hard Disk Drive). boot_disk_size_gb: Optional. Size in GB of the boot disk (default is 100GB). timeout: Optional. The maximum job running time. The default is 7 days. A duration in seconds with up to nine fractional digits, terminated by 's'. Example: "3.5s" restart_job_on_worker_restart: Optional. Restarts the entire CustomJob if a worker gets restarted. This feature can be used by distributed training jobs that are not resilient to workers leaving and joining a job. service_account: Optional. Specifies the service account for workload run-as account. network: Optional. The full name of the Compute Engine network to which the job should be peered. For example, projects/12345/global/networks/myVPC. worker_pool_specs: Optional, worker_pool_specs for distributed training. this will overwite all other cluster configurations. For details, please see: https://cloud.google.com/ai-platform-unified/docs/training/distributed-training Returns: A Custom Job component OP correspoinding to the input component OP. """ job_spec = {} # As a temporary work aruond for issue with kfp v2 based compiler where # compiler expects place holders in origional form in args, instead of # using fields from outputs, we add back the args from the origional # component to the custom job component. These args will be ignored # by the remote launcher. copy_of_origional_args = [] if worker_pool_specs is not None: worker_pool_specs = copy.deepcopy(worker_pool_specs) def _is_output_parameter(output_key: str) -> bool: return output_key in (component_spec.component_spec. output_definitions.parameters.keys()) for worker_pool_spec in worker_pool_specs: if 'container_spec' in worker_pool_spec: container_spec = worker_pool_spec['container_spec'] if 'command' in container_spec: dsl_utils.resolve_cmd_lines(container_spec['command'], _is_output_parameter) if 'args' in container_spec: copy_of_origional_args = container_spec['args'].copy() dsl_utils.resolve_cmd_lines(container_spec['args'], _is_output_parameter) elif 'python_package_spec' in worker_pool_spec: # For custom Python training, resolve placeholders in args only. python_spec = worker_pool_spec['python_package_spec'] if 'args' in python_spec: dsl_utils.resolve_cmd_lines(python_spec['args'], _is_output_parameter) else: raise ValueError( 'Expect either "container_spec" or "python_package_spec" in each ' 'workerPoolSpec. Got: {}'.format(worker_pool_spec)) job_spec['worker_pool_specs'] = worker_pool_specs else: def _is_output_parameter(output_key: str) -> bool: for output in component_spec.component_spec.outputs: if output.name == output_key: return type_utils.is_parameter_type(output.type) return False worker_pool_spec = { 'machine_spec': { 'machine_type': machine_type or _DEFAULT_CUSTOM_JOB_MACHINE_TYPE }, 'replica_count': 1, 'container_spec': { 'image_uri': component_spec.component_spec.implementation.container.image, } } if component_spec.component_spec.implementation.container.command: container_command_copy = component_spec.component_spec.implementation.container.command.copy( ) dsl_utils.resolve_cmd_lines(container_command_copy, _is_output_parameter) worker_pool_spec['container_spec'][ 'command'] = container_command_copy if component_spec.component_spec.implementation.container.args: container_args_copy = component_spec.component_spec.implementation.container.args.copy( ) copy_of_origional_args = component_spec.component_spec.implementation.container.args.copy( ) dsl_utils.resolve_cmd_lines(container_args_copy, _is_output_parameter) worker_pool_spec['container_spec']['args'] = container_args_copy if accelerator_type is not None: worker_pool_spec['machine_spec'][ 'accelerator_type'] = accelerator_type if accelerator_count is not None: worker_pool_spec['machine_spec'][ 'accelerator_count'] = accelerator_count if boot_disk_type is not None: if 'disk_spec' not in worker_pool_spec: worker_pool_spec['disk_spec'] = {} worker_pool_spec['disk_spec']['boot_disk_type'] = boot_disk_type if boot_disk_size_gb is not None: if 'disk_spec' not in worker_pool_spec: worker_pool_spec['disk_spec'] = {} worker_pool_spec['disk_spec'][ 'boot_disk_size_gb'] = boot_disk_size_gb job_spec['worker_pool_specs'] = [worker_pool_spec] if replica_count is not None and replica_count > 1: additional_worker_pool_spec = copy.deepcopy(worker_pool_spec) additional_worker_pool_spec['replica_count'] = str(replica_count - 1) job_spec['worker_pool_specs'].append(additional_worker_pool_spec) if timeout is not None: if 'scheduling' not in job_spec: job_spec['scheduling'] = {} job_spec['scheduling']['timeout'] = timeout if restart_job_on_worker_restart is not None: if 'scheduling' not in job_spec: job_spec['scheduling'] = {} job_spec['scheduling'][ 'restart_job_on_worker_restart'] = restart_job_on_worker_restart if service_account is not None: job_spec['service_account'] = service_account if network is not None: job_spec['network'] = network custom_job_payload = { 'display_name': display_name or component_spec.component_spec.name, 'job_spec': job_spec } custom_job_component_spec = structures.ComponentSpec( name=component_spec.component_spec.name, inputs=component_spec.component_spec.inputs + [ structures.InputSpec(name='gcp_project', type='String'), structures.InputSpec(name='gcp_region', type='String') ], outputs=component_spec.component_spec.outputs + [structures.OutputSpec(name='GCP_RESOURCES', type='String')], implementation=structures.ContainerImplementation( container=structures.ContainerSpec( image=_DEFAULT_CUSTOM_JOB_CONTAINER_IMAGE, command=["python", "-u", "-m", "launcher"], args=[ '--type', 'CustomJob', '--gcp_project', structures.InputValuePlaceholder(input_name='gcp_project'), '--gcp_region', structures.InputValuePlaceholder(input_name='gcp_region'), '--payload', json.dumps(custom_job_payload), '--gcp_resources', structures.OutputPathPlaceholder( output_name='GCP_RESOURCES'), ] + copy_of_origional_args, ))) component_path = tempfile.mktemp() custom_job_component_spec.save(component_path) return components.load_component_from_file(component_path)
args=[ structures.IfPresentPlaceholder( if_structure=structures.IfPresentPlaceholderStructure( input_name='optional_input_1', then=[ '--arg1', structures.InputUriPlaceholder( input_name='optional_input_1'), ], otherwise=[ '--arg2', 'default', ])) ])), inputs={ 'optional_input_1': structures.InputSpec(type='String', default=None) }, ) V1_YAML_CONCAT_PLACEHOLDER = textwrap.dedent("""\ name: component_concat implementation: container: args: - concat: ['--arg1', {inputValue: input_prefix}] image: alpine inputs: - {name: input_prefix, type: String} """) COMPONENT_SPEC_CONCAT_PLACEHOLDER = structures.ComponentSpec(