Exemplo n.º 1
0
    def test_get_input_artifact_type_schema(self):
        input_specs = [
            structures.InputSpec(name='input1', type='String'),
            structures.InputSpec(name='input2', type='Model'),
            structures.InputSpec(name='input3', type=None),
        ]
        # input not found.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input0', input_specs)
            self.assertEqual('Input not found.', str(cm))

        # input found, but it doesn't map to an artifact type.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input1', input_specs)
            self.assertEqual('Input is not an artifact type.', str(cm))

        # input found, and a matching artifact type schema returned.
        self.assertEqual(
            'title: kfp.Model\ntype: object\nproperties:\n  framework:\n    type: string\n  framework_version:\n    type: string\n',
            type_utils.get_input_artifact_type_schema('input2', input_specs))

        # input found, and the default artifact type schema returned.
        self.assertEqual(
            'title: kfp.Artifact\ntype: object\n',
            type_utils.get_input_artifact_type_schema('input3', input_specs))
Exemplo n.º 2
0
    def test_get_input_artifact_type_schema(self):
        input_specs = [
            structures.InputSpec(name='input1', type='String'),
            structures.InputSpec(name='input2', type='Model'),
            structures.InputSpec(name='input3', type=None),
        ]
        # input not found.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input0', input_specs)
            self.assertEqual('Input not found.', str(cm))

        # input found, but it doesn't map to an artifact type.
        with self.assertRaises(AssertionError) as cm:
            type_utils.get_input_artifact_type_schema('input1', input_specs)
            self.assertEqual('Input is not an artifact type.', str(cm))

        # input found, and a matching artifact type schema returned.
        self.assertEqual(
            'system.Model',
            type_utils.get_input_artifact_type_schema(
                'input2', input_specs).schema_title)

        # input found, and the default artifact type schema returned.
        self.assertEqual(
            'system.Artifact',
            type_utils.get_input_artifact_type_schema(
                'input3', input_specs).schema_title)
Exemplo n.º 3
0
  def _group_to_dag_spec(
      self,
      group: dsl.OpsGroup,
      inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      dependencies: Dict[str, List[_GroupOrOp]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
      deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig,
      rootgroup_name: str,
  ) -> None:
    """Generate IR spec given an OpsGroup.

    Args:
      group: The OpsGroup to generate spec for.
      inputs: The inputs dictionary. The keys are group/op names and values are
        lists of tuples (param, producing_op_name).
      outputs: The outputs dictionary. The keys are group/op names and values
        are lists of tuples (param, producing_op_name).
      dependencies: The group dependencies dictionary. The keys are group/op
        names, and the values are lists of dependent groups/ops.
      pipeline_spec: The pipeline_spec to update in-place.
      deployment_config: The deployment_config to hold all executors.
      rootgroup_name: The name of the group root. Used to determine whether the
        component spec for the current group should be the root dag.
    """
    group_component_name = dsl_utils.sanitize_component_name(group.name)

    if group.name == rootgroup_name:
      group_component_spec = pipeline_spec.root
    else:
      group_component_spec = pipeline_spec.components[group_component_name]

    # Generate task specs and component specs for the dag.
    subgroups = group.groups + group.ops
    for subgroup in subgroups:
      subgroup_task_spec = getattr(subgroup, 'task_spec',
                                   pipeline_spec_pb2.PipelineTaskSpec())
      subgroup_component_spec = getattr(subgroup, 'component_spec',
                                        pipeline_spec_pb2.ComponentSpec())
      is_loop_subgroup = (isinstance(group, dsl.ParallelFor))
      is_recursive_subgroup = (
          isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref)

      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        subgroup_key = subgroup.recursive_ref.name
      else:
        subgroup_key = subgroup.name

      subgroup_task_spec.task_info.name = (
          subgroup_task_spec.task_info.name or
          dsl_utils.sanitize_task_name(subgroup_key))
      # human_name exists for ops only, and is used to de-dupe component spec.
      subgroup_component_name = (
          subgroup_task_spec.component_ref.name or
          dsl_utils.sanitize_component_name(
              getattr(subgroup, 'human_name', subgroup_key)))
      subgroup_task_spec.component_ref.name = subgroup_component_name

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph':
        raise NotImplementedError(
            'dsl.graph_component is not yet supported in KFP v2 compiler.')

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler':
        raise NotImplementedError(
            'dsl.ExitHandler is not yet supported in KFP v2 compiler.')

      importer_tasks = []
      # Add importer node when applicable
      for input_name in subgroup_task_spec.inputs.artifacts:
        if not subgroup_task_spec.inputs.artifacts[
            input_name].task_output_artifact.producer_task:
          type_schema = type_utils.get_input_artifact_type_schema(
              input_name, subgroup._metadata.inputs)

          importer_name = importer_node.generate_importer_base_name(
              dependent_task_name=subgroup_task_spec.task_info.name,
              input_name=input_name)
          importer_task_spec = importer_node.build_importer_task_spec(
              importer_name)
          importer_comp_spec = importer_node.build_importer_component_spec(
              importer_base_name=importer_name,
              input_name=input_name,
              input_type_schema=type_schema)
          importer_task_name = importer_task_spec.task_info.name
          importer_comp_name = importer_task_spec.component_ref.name
          importer_exec_label = importer_comp_spec.executor_label
          group_component_spec.dag.tasks[importer_task_name].CopyFrom(
              importer_task_spec)
          pipeline_spec.components[importer_comp_name].CopyFrom(
              importer_comp_spec)

          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.producer_task = (
                  importer_task_name)
          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.output_artifact_key = (
                  importer_node.OUTPUT_KEY)

          # Retrieve the pre-built importer spec
          importer_spec = subgroup.importer_specs[input_name]
          deployment_config.executors[importer_exec_label].importer.CopyFrom(
              importer_spec)

          importer_tasks.append(importer_task_name)

      group_inputs = inputs.get(group.name, [])
      subgroup_inputs = inputs.get(subgroup.name, [])
      subgroup_params = [param for param, _ in subgroup_inputs]
      tasks_in_current_dag = [
          dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups
      ] + importer_tasks

      is_parent_component_root = group_component_spec == pipeline_spec.root

      # Additional spec modifications for dsl.ParallelFor's subgroups.
      if is_loop_subgroup:
        self._update_loop_specs(group, subgroup, group_component_spec,
                                subgroup_component_spec, subgroup_task_spec)

      elif isinstance(subgroup, dsl.ContainerOp):
        dsl_component_spec.update_task_inputs_spec(
            subgroup_task_spec,
            group_component_spec.input_definitions,
            subgroup_params,
            tasks_in_current_dag,
        )

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition':

        # "punch the hole", adding inputs needed by its subgroup or tasks.
        dsl_component_spec.build_component_inputs_spec(
            component_spec=subgroup_component_spec,
            pipeline_params=subgroup_params,
            is_root_component=False,
        )
        dsl_component_spec.build_task_inputs_spec(
            subgroup_task_spec,
            subgroup_params,
            tasks_in_current_dag,
            is_parent_component_root,
        )

        condition = subgroup.condition
        operand_values = []

        for operand in [condition.operand1, condition.operand2]:
          operand_values.append(self._resolve_value_or_reference(operand))

        condition_string = '{} {} {}'.format(operand_values[0],
                                             condition.operator,
                                             operand_values[1])

        subgroup_task_spec.trigger_policy.CopyFrom(
            pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(
                condition=condition_string))

      # Generate dependencies section for this task.
      if dependencies.get(subgroup.name, None):
        group_dependencies = list(dependencies[subgroup.name])
        group_dependencies.sort()
        subgroup_task_spec.dependent_tasks.extend(
            [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies])

      if isinstance(subgroup, dsl.ParallelFor):
        if subgroup.parallelism is not None:
          warnings.warn(
              'Setting parallelism in ParallelFor is not supported yet.'
              'The setting is ignored.')

        # Remove loop arguments related inputs from parent group component spec.
        input_names = [param.full_name for param, _ in inputs[subgroup.name]]
        for input_name in input_names:
          if _for_loop.LoopArguments.name_is_loop_argument(input_name):
            dsl_component_spec.pop_input_from_component_spec(
                group_component_spec, input_name)

        if subgroup.items_is_pipeline_param:
          # These loop args are a 'withParam' rather than 'withItems'.
          # i.e., rather than a static list, they are either the output of
          # another task or were input as global pipeline parameters.

          pipeline_param = subgroup.loop_args.items_or_pipeline_param
          input_parameter_name = pipeline_param.full_name

          if pipeline_param.op_name:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.producer_task = (
                    dsl_utils.sanitize_task_name(pipeline_param.op_name))
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.output_parameter_key = (
                    pipeline_param.name)
          else:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].component_input_parameter = (
                    input_parameter_name)

          if pipeline_param.op_name is None:
            # Input parameter is from pipeline func rather than component output.
            # Correct loop argument input type in the parent component spec.
            # The loop argument was categorized as an artifact due to its missing
            # or non-primitive type annotation. But it should always be String
            # typed, as its value is a serialized JSON string.
            dsl_component_spec.pop_input_from_component_spec(
                group_component_spec, input_parameter_name)
            group_component_spec.input_definitions.parameters[
                input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING

      # Add component spec if not exists
      if subgroup_component_name not in pipeline_spec.components:
        pipeline_spec.components[subgroup_component_name].CopyFrom(
            subgroup_component_spec)

      # Add task spec
      group_component_spec.dag.tasks[
          subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec)

      # Add executor spec, if applicable.
      container_spec = getattr(subgroup, 'container_spec', None)
      if container_spec:
        if compiler_utils.is_v2_component(subgroup):
          compiler_utils.refactor_v2_container_spec(container_spec)
        executor_label = subgroup_component_spec.executor_label

        if executor_label not in deployment_config.executors:
          deployment_config.executors[executor_label].container.CopyFrom(
              container_spec)

      # Add AIPlatformCustomJobSpec, if applicable.
      custom_job_spec = getattr(subgroup, 'custom_job_spec', None)
      if custom_job_spec:
        executor_label = subgroup_component_spec.executor_label
        if executor_label not in deployment_config.executors:
          deployment_config.executors[
              executor_label].custom_job.custom_job.update(custom_job_spec)

    pipeline_spec.deployment_spec.update(
        json_format.MessageToDict(deployment_config))
Exemplo n.º 4
0
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
    """Attaches v2 specs to a ContainerOp object.

    Args:
      task: The ContainerOp object to attach IR specs.
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
  """

    # Attach v2_specs to the ContainerOp object regardless whether the pipeline is
    # being compiled to v1 (Argo yaml) or v2 (IR json).
    # However, there're different behaviors for the two cases. Namely, resolved
    # commands and arguments, error handling, etc.
    # Regarding the difference in error handling, v2 has a stricter requirement on
    # input type annotation. For instance, an input without any type annotation is
    # viewed as an artifact, and if it's paired with InputValuePlaceholder, an
    # error will be thrown at compile time. However, we cannot raise such an error
    # in v1, as it wouldn't break existing pipelines.
    is_compiling_for_v2 = False
    for frame in inspect.stack():
        if '_create_pipeline_v2' in frame:
            is_compiling_for_v2 = True
            break

    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            elif is_compiling_for_v2 and input_key in importer_specs:
                raise TypeError(
                    'Input "{}" with type "{}" is not connected to any upstream output. '
                    'However it is used with InputPathPlaceholder. '
                    'If you want to import an existing artifact using a system-connected'
                    ' importer node, use InputUriPlaceholder instead. '
                    'Or if you just want to pass a string parameter, use string type and'
                    ' InputValuePlaceholder instead.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(
                    output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        placeholder_resolver = ExtraPlaceholderResolver()

        def _resolve_ir_placeholders_v2(
            arg,
            component_spec: _structures.ComponentSpec,
            arguments: dict,
        ) -> str:
            inputs_dict = {
                input_spec.name: input_spec
                for input_spec in component_spec.inputs or []
            }
            if isinstance(arg, _structures.InputValuePlaceholder):
                input_name = arg.input_name
                input_value = arguments.get(input_name, None)
                if input_value is not None:
                    return _input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.InputUriPlaceholder):
                input_name = arg.input_name
                if input_name in arguments:
                    input_uri = _input_artifact_uri_placeholder(input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.OutputUriPlaceholder):
                output_name = arg.output_name
                output_uri = _output_artifact_uri_placeholder(output_name)
                return output_uri

            return placeholder_resolver.resolve_placeholder(
                arg=arg,
                component_spec=component_spec,
                arguments=arguments,
            )

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
            placeholder_resolver=_resolve_ir_placeholders_v2,
        )
        return resolved_cmd

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Keep track of auto-injected importer spec.
    importer_specs = {}

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()

    # Preserver input params for ContainerOp.inputs
    input_params = list(
        set([
            param for param in arguments.values()
            if isinstance(param, _pipeline_param.PipelineParam)
        ]))

    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
                elif is_compiling_for_v2:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    # Importer node is only applicable for v2 engine.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_specs[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params and is_compiling_for_v2:
                # argument_value contains PipelineParam placeholders which needs to be
                # replaced. And the input needs to be added to the task spec.
                for param in pipeline_params:
                    # Form the name for the compiler injected input, and make sure it
                    # doesn't collide with any existing input names.
                    additional_input_name = (
                        dsl_component_spec.
                        additional_input_name_for_pipelineparam(param))
                    for existing_input_name, _ in arguments.items():
                        if existing_input_name == additional_input_name:
                            raise ValueError(
                                'Name collision between existing input name '
                                '{} and compiler injected input name {}'.
                                format(existing_input_name,
                                       additional_input_name))

                    additional_input_placeholder = (
                        "{{{{$.inputs.parameters['{}']}}}}".format(
                            additional_input_name))
                    argument_value = argument_value.replace(
                        param.pattern, additional_input_placeholder)

                    # The output references are subject to change -- the producer task may
                    # not be whitin the same DAG.
                    if param.op_name:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.producer_task = (
                                dsl_utils.sanitize_task_name(param.op_name))
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.output_parameter_key = param.name
                    else:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].component_input_parameter = param.full_name

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            elif is_compiling_for_v2:
                # An importer node with constant value artifact_uri will be inserted.
                # Importer node is only applicable for v2 engine.
                pipeline_task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_specs[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            if is_compiling_for_v2:
                raise NotImplementedError(
                    'Input argument supports only the following types: PipelineParam'
                    ', str, int, float. Got: "{}".'.format(argument_value))

    if not component_spec.name:
        component_spec.name = _components._default_component_name

    # task.name is unique at this point.
    pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(
        task.name))

    resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec,
                                                 arguments=original_arguments)

    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=component_spec.implementation.container.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args))

    # TODO(chensun): dedupe IR component_spec and contaienr_spec
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        task.name))
    executor_label = dsl_utils.sanitize_executor_label(task.name)

    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec, executor_label, arguments.keys())

    task.task_spec = pipeline_task_spec
    task.importer_specs = importer_specs

    # Override command and arguments if compiling to v2.
    if is_compiling_for_v2:
        task.command = resolved_cmd.command
        task.arguments = resolved_cmd.args

        # limit this to v2 compiling only to avoid possible behavior change in v1.
        task.inputs = input_params