Пример #1
0
def build_task_inputs_spec(
    task_spec: pipeline_spec_pb2.PipelineTaskSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    tasks_in_current_dag: List[str],
    is_parent_component_root: bool,
) -> None:
    """Builds task inputs spec from pipeline params.

    Args:
      task_spec: The task spec to fill in its inputs spec.
      pipeline_params: The list of pipeline params.
      tasks_in_current_dag: The list of tasks names for tasks in the same dag.
      is_parent_component_root: Whether the task is in the root component.
    """
    for param in pipeline_params or []:

        param_full_name, subvar_name = _exclude_loop_arguments_variables(param)
        input_name = additional_input_name_for_pipelineparam(param.full_name)

        param_name = param.name
        if subvar_name:
            task_spec.inputs.parameters[
                input_name].parameter_expression_selector = (
                    'parseJson(string_value)["{}"]'.format(subvar_name))
            param_name = _for_loop.LoopArguments.remove_loop_item_base_name(
                _exclude_loop_arguments_variables(param_name)[0])

        if type_utils.is_parameter_type(param.param_type):
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.output_parameter_key = (
                        param_name)
            else:
                task_spec.inputs.parameters[
                    input_name].component_input_parameter = (
                        param_full_name if is_parent_component_root else
                        additional_input_name_for_pipelineparam(param_full_name)
                    )
        else:
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.output_artifact_key = (
                        param_name)
            else:
                task_spec.inputs.artifacts[
                    input_name].component_input_artifact = (
                        param_full_name
                        if is_parent_component_root else input_name)
Пример #2
0
def _build_importer_task_spec(
    importer_base_name: str,
    artifact_uri: Union[_pipeline_param.PipelineParam, str],
) -> pipeline_spec_pb2.PipelineTaskSpec:
    """Builds an importer task spec.

  Args:
    importer_base_name: The base name of the importer node.
    artifact_uri: The artifact uri to import from.

  Returns:
    An importer node task spec.
  """
    result = pipeline_spec_pb2.PipelineTaskSpec()
    result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
    result.component_ref.name = dsl_utils.sanitize_component_name(
        importer_base_name)

    if isinstance(artifact_uri, _pipeline_param.PipelineParam):
        result.inputs.parameters[
            INPUT_KEY].component_input_parameter = artifact_uri.full_name
    elif isinstance(artifact_uri, str):
        result.inputs.parameters[
            INPUT_KEY].runtime_value.constant_value.string_value = artifact_uri

    return result
Пример #3
0
  def _populate_metrics_in_dag_outputs(
      self,
      ops: List[dsl.ContainerOp],
      op_to_parent_groups: Dict[str, List[str]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
  ) -> None:
    """Populates metrics artifacts in dag outputs.

    Args:
      ops: The list of ops that may produce metrics outputs.
      op_to_parent_groups: The dict of op name to parent groups. Key is the op's
        name. Value is a list of ancestor groups including the op itself. The
        list of a given op is sorted in a way that the farthest group is the
        first and the op itself is the last.
      pipeline_spec: The pipeline_spec to update in-place.
    """
    for op in ops:
      op_task_spec = getattr(op, 'task_spec',
                             pipeline_spec_pb2.PipelineTaskSpec())
      op_component_spec = getattr(op, 'component_spec',
                                  pipeline_spec_pb2.ComponentSpec())

      # Get the tuple of (component_name, task_name) of all its parent groups.
      parent_components_and_tasks = [('_root', '')]
      # skip the op itself and the root group which cannot be retrived via name.
      for group_name in op_to_parent_groups[op.name][1:-1]:
        parent_components_and_tasks.append(
            (dsl_utils.sanitize_component_name(group_name),
             dsl_utils.sanitize_task_name(group_name)))
      # Reverse the order to make the farthest group in the end.
      parent_components_and_tasks.reverse()

      for output_name, artifact_spec in \
          op_component_spec.output_definitions.artifacts.items():

        if artifact_spec.artifact_type.WhichOneof(
            'kind'
        ) == 'schema_title' and artifact_spec.artifact_type.schema_title in [
            io_types.Metrics.TYPE_NAME,
            io_types.ClassificationMetrics.TYPE_NAME,
        ]:
          unique_output_name = '{}-{}'.format(op_task_spec.task_info.name,
                                              output_name)

          sub_task_name = op_task_spec.task_info.name
          sub_task_output = output_name
          for component_name, task_name in parent_components_and_tasks:
            group_component_spec = (
                pipeline_spec.root if component_name == '_root' else
                pipeline_spec.components[component_name])
            group_component_spec.output_definitions.artifacts[
                unique_output_name].CopyFrom(artifact_spec)
            group_component_spec.dag.outputs.artifacts[
                unique_output_name].artifact_selectors.append(
                    pipeline_spec_pb2.DagOutputsSpec.ArtifactSelectorSpec(
                        producer_subtask=sub_task_name,
                        output_artifact_key=sub_task_output,
                    ))
            sub_task_name = task_name
            sub_task_output = unique_output_name
Пример #4
0
def build_task_inputs_spec(
    task_spec: pipeline_spec_pb2.PipelineTaskSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    tasks_in_current_dag: List[str],
    is_parent_component_root: bool,
) -> None:
    """Builds task inputs spec from pipeline params.

  Args:
    task_spec: The task spec to fill in its inputs spec.
    pipeline_params: The list of pipeline params.
    tasks_in_current_dag: The list of tasks names for tasks in the same dag.
    is_parent_component_root: Whether the task is in the root component.
  """
    for param in pipeline_params or []:

        input_name = additional_input_name_for_pipelineparam(param)
        if type_utils.is_parameter_type(param.param_type):
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.parameters[
                    input_name].task_output_parameter.output_parameter_key = (
                        param.name)
            else:
                task_spec.inputs.parameters[
                    input_name].component_input_parameter = (
                        param.full_name
                        if is_parent_component_root else input_name)
        else:
            if param.op_name and dsl_utils.sanitize_task_name(
                    param.op_name) in tasks_in_current_dag:
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = (
                        dsl_utils.sanitize_task_name(param.op_name))
                task_spec.inputs.artifacts[
                    input_name].task_output_artifact.output_artifact_key = (
                        param.name)
            else:
                task_spec.inputs.artifacts[
                    input_name].component_input_artifact = (
                        param.full_name
                        if is_parent_component_root else input_name)
Пример #5
0
def build_importer_task_spec(
    importer_base_name: str, ) -> pipeline_spec_pb2.PipelineTaskSpec:
    """Builds an importer task spec.

  Args:
    importer_base_name: The base name of the importer node.

  Returns:
    An importer node task spec.
  """
    result = pipeline_spec_pb2.PipelineTaskSpec()
    result.task_info.name = dsl_utils.sanitize_task_name(importer_base_name)
    result.component_ref.name = dsl_utils.sanitize_component_name(
        importer_base_name)

    return result
Пример #6
0
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
  """Attaches v2 specs to a ContainerOp object.

  Attach v2_specs to the ContainerOp object regardless whether the pipeline is
  being compiled to v1 (Argo yaml) or v2 (IR json).
  However, there're different behaviors for the two cases. Namely, resolved
  commands and arguments, error handling, etc.
  Regarding the difference in error handling, v2 has a stricter requirement on
  input type annotation. For instance, an input without any type annotation is
  viewed as an artifact, and if it's paired with InputValuePlaceholder, an
  error will be thrown at compile time. However, we cannot raise such an error
  in v1, as it wouldn't break existing pipelines.

  Args:
    task: The ContainerOp object to attach IR specs.
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
  """

  def _resolve_commands_and_args_v2(
      component_spec: _structures.ComponentSpec,
      arguments: Mapping[str, Any],
  ) -> _components._ResolvedCommandLineAndPaths:
    """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputUriPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return _generate_input_uri_placeholder(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputPathPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputValuePlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

    def _output_artifact_uri_placeholder(output_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          outputs_dict[output_key].type):
        raise TypeError('Output "{}" with type "{}" cannot be paired with '
                        'OutputUriPlaceholder.'.format(
                            output_key, outputs_dict[output_key].type))
      else:
        return _generate_output_uri_placeholder(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
      if type_utils.is_parameter_type(outputs_dict[output_key].type):
        return _output_parameter_path_placeholder(output_key)
      else:
        return _output_artifact_path_placeholder(output_key)

    placeholder_resolver = ExtraPlaceholderResolver()
    def _resolve_ir_placeholders_v2(
        arg,
        component_spec: _structures.ComponentSpec,
        arguments: dict,
    ) -> str:
      inputs_dict = {input_spec.name: input_spec for input_spec in component_spec.inputs or []}
      if isinstance(arg, _structures.InputValuePlaceholder):
        input_name = arg.input_name
        input_value = arguments.get(input_name, None)
        if input_value is not None:
          return _input_parameter_placeholder(input_name)
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.InputUriPlaceholder):
        input_name = arg.input_name
        if input_name in arguments:
          input_uri = _input_artifact_uri_placeholder(input_name)
          return input_uri
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.OutputUriPlaceholder):
        output_name = arg.output_name
        output_uri = _output_artifact_uri_placeholder(output_name)
        return output_uri

      return placeholder_resolver.resolve_placeholder(
        arg=arg,
        component_spec=component_spec,
        arguments=arguments,
      )

    resolved_cmd = _components._resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
        placeholder_resolver=_resolve_ir_placeholders_v2,
    )
    return resolved_cmd

  pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

  # Check types of the reference arguments and serialize PipelineParams
  arguments = arguments.copy()

  # Preserve input params for ContainerOp.inputs
  input_params_set = set([
      param for param in arguments.values()
      if isinstance(param, _pipeline_param.PipelineParam)
  ])

  for input_name, argument_value in arguments.items():
    input_type = component_spec._inputs_dict[input_name].type
    argument_type = None

    if isinstance(argument_value, _pipeline_param.PipelineParam):
      argument_type = argument_value.param_type

      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      # Loop arguments defaults to 'String' type if type is unknown.
      # This has to be done after the type compatiblity check.
      if argument_type is None and isinstance(
          argument_value, (_for_loop.LoopArguments,
                           _for_loop.LoopArgumentVariable)):
        argument_type = 'String'

      arguments[input_name] = str(argument_value)

      if type_utils.is_parameter_type(input_type):
        if argument_value.op_name:
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.producer_task = (
                  dsl_utils.sanitize_task_name(argument_value.op_name))
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.output_parameter_key = (
                  argument_value.name)
        else:
          pipeline_task_spec.inputs.parameters[
              input_name].component_input_parameter = argument_value.name
      else:
        if argument_value.op_name:
          pipeline_task_spec.inputs.artifacts[
              input_name].task_output_artifact.producer_task = (
                  dsl_utils.sanitize_task_name(argument_value.op_name))
          pipeline_task_spec.inputs.artifacts[
              input_name].task_output_artifact.output_artifact_key = (
                  argument_value.name)
    elif isinstance(argument_value, str):
      argument_type = 'String'
      pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
          argument_value)
      if pipeline_params and kfp.COMPILING_FOR_V2:
        # argument_value contains PipelineParam placeholders which needs to be
        # replaced. And the input needs to be added to the task spec.
        for param in pipeline_params:
          # Form the name for the compiler injected input, and make sure it
          # doesn't collide with any existing input names.
          additional_input_name = (
              dsl_component_spec.additional_input_name_for_pipelineparam(param))
          for existing_input_name, _ in arguments.items():
            if existing_input_name == additional_input_name:
              raise ValueError('Name collision between existing input name '
                               '{} and compiler injected input name {}'.format(
                                   existing_input_name, additional_input_name))

          # Add the additional param to the input params set. Otherwise, it will
          # not be included when the params set is not empty.
          input_params_set.add(param)
          additional_input_placeholder = (
              "{{{{$.inputs.parameters['{}']}}}}".format(additional_input_name))
          argument_value = argument_value.replace(param.pattern,
                                                  additional_input_placeholder)

          # The output references are subject to change -- the producer task may
          # not be whitin the same DAG.
          if param.op_name:
            pipeline_task_spec.inputs.parameters[
                additional_input_name].task_output_parameter.producer_task = (
                    dsl_utils.sanitize_task_name(param.op_name))
            pipeline_task_spec.inputs.parameters[
                additional_input_name].task_output_parameter.output_parameter_key = param.name
          else:
            pipeline_task_spec.inputs.parameters[
                additional_input_name].component_input_parameter = param.full_name

      input_type = component_spec._inputs_dict[input_name].type
      if type_utils.is_parameter_type(input_type):
        pipeline_task_spec.inputs.parameters[
            input_name].runtime_value.constant_value.string_value = (
                argument_value)
    elif isinstance(argument_value, int):
      argument_type = 'Integer'
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.int_value = argument_value
    elif isinstance(argument_value, float):
      argument_type = 'Float'
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.double_value = argument_value
    elif isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object {} was passed to component as an input argument. '
          'Pass a single output instead.'.format(input_name))
    else:
      if kfp.COMPILING_FOR_V2:
        raise NotImplementedError(
            'Input argument supports only the following types: PipelineParam'
            ', str, int, float. Got: "{}".'.format(argument_value))

    argument_is_parameter_type = type_utils.is_parameter_type(argument_type)
    input_is_parameter_type = type_utils.is_parameter_type(input_type)
    if kfp.COMPILING_FOR_V2 and (argument_is_parameter_type !=
                                input_is_parameter_type):
      if isinstance(argument_value, dsl.PipelineParam):
        param_or_value_msg = 'PipelineParam "{}"'.format(
            argument_value.full_name)
      else:
        param_or_value_msg = 'value "{}"'.format(argument_value)

      raise TypeError(
          'Passing '
          '{param_or_value} with type "{arg_type}" (as "{arg_category}") to '
          'component input '
          '"{input_name}" with type "{input_type}" (as "{input_category}") is '
          'incompatible. Please fix the type of the component input.'.format(
              param_or_value=param_or_value_msg,
              arg_type=argument_type,
              arg_category='Parameter'
              if argument_is_parameter_type else 'Artifact',
              input_name=input_name,
              input_type=input_type,
              input_category='Paramter'
              if input_is_parameter_type else 'Artifact',
          ))

  if not component_spec.name:
    component_spec.name = _components._default_component_name

  # task.name is unique at this point.
  pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(task.name))

  resolved_cmd = _resolve_commands_and_args_v2(
      component_spec=component_spec, arguments=arguments)

  task.container_spec = (
      pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
          image=component_spec.implementation.container.image,
          command=resolved_cmd.command,
          args=resolved_cmd.args))

  # TODO(chensun): dedupe IR component_spec and contaienr_spec
  pipeline_task_spec.component_ref.name = (
      dsl_utils.sanitize_component_name(task.name))
  executor_label = dsl_utils.sanitize_executor_label(task.name)

  task.component_spec = dsl_component_spec.build_component_spec_from_structure(
      component_spec, executor_label, arguments.keys())

  task.task_spec = pipeline_task_spec

  # Override command and arguments if compiling to v2.
  if kfp.COMPILING_FOR_V2:
    task.command = resolved_cmd.command
    task.arguments = resolved_cmd.args

    # limit this to v2 compiling only to avoid possible behavior change in v1.
    task.inputs = list(input_params_set)
Пример #7
0
 def test_sanitize_task_name(self):
     self.assertEqual('task-my-component-1',
                      dsl_utils.sanitize_task_name('My component 1'))
Пример #8
0
  def _group_to_dag_spec(
      self,
      group: dsl.OpsGroup,
      inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      dependencies: Dict[str, List[_GroupOrOp]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
      deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig,
      rootgroup_name: str,
  ) -> None:
    """Generate IR spec given an OpsGroup.

    Args:
      group: The OpsGroup to generate spec for.
      inputs: The inputs dictionary. The keys are group/op names and values are
        lists of tuples (param, producing_op_name).
      outputs: The outputs dictionary. The keys are group/op names and values
        are lists of tuples (param, producing_op_name).
      dependencies: The group dependencies dictionary. The keys are group/op
        names, and the values are lists of dependent groups/ops.
      pipeline_spec: The pipeline_spec to update in-place.
      deployment_config: The deployment_config to hold all executors.
      rootgroup_name: The name of the group root. Used to determine whether the
        component spec for the current group should be the root dag.
    """
    group_component_name = dsl_utils.sanitize_component_name(group.name)

    if group.name == rootgroup_name:
      group_component_spec = pipeline_spec.root
    else:
      group_component_spec = pipeline_spec.components[group_component_name]

    # Generate task specs and component specs for the dag.
    subgroups = group.groups + group.ops
    for subgroup in subgroups:
      subgroup_task_spec = getattr(subgroup, 'task_spec',
                                   pipeline_spec_pb2.PipelineTaskSpec())
      subgroup_component_spec = getattr(subgroup, 'component_spec',
                                        pipeline_spec_pb2.ComponentSpec())
      is_loop_subgroup = (isinstance(group, dsl.ParallelFor))
      is_recursive_subgroup = (
          isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref)

      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        subgroup_key = subgroup.recursive_ref.name
      else:
        subgroup_key = subgroup.name

      subgroup_task_spec.task_info.name = (
          subgroup_task_spec.task_info.name or
          dsl_utils.sanitize_task_name(subgroup_key))
      # human_name exists for ops only, and is used to de-dupe component spec.
      subgroup_component_name = (
          subgroup_task_spec.component_ref.name or
          dsl_utils.sanitize_component_name(
              getattr(subgroup, 'human_name', subgroup_key)))
      subgroup_task_spec.component_ref.name = subgroup_component_name

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph':
        raise NotImplementedError(
            'dsl.graph_component is not yet supported in KFP v2 compiler.')

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler':
        raise NotImplementedError(
            'dsl.ExitHandler is not yet supported in KFP v2 compiler.')

      importer_tasks = []
      # Add importer node when applicable
      for input_name in subgroup_task_spec.inputs.artifacts:
        if not subgroup_task_spec.inputs.artifacts[
            input_name].task_output_artifact.producer_task:
          type_schema = type_utils.get_input_artifact_type_schema(
              input_name, subgroup._metadata.inputs)

          importer_name = importer_node.generate_importer_base_name(
              dependent_task_name=subgroup_task_spec.task_info.name,
              input_name=input_name)
          importer_task_spec = importer_node.build_importer_task_spec(
              importer_name)
          importer_comp_spec = importer_node.build_importer_component_spec(
              importer_base_name=importer_name,
              input_name=input_name,
              input_type_schema=type_schema)
          importer_task_name = importer_task_spec.task_info.name
          importer_comp_name = importer_task_spec.component_ref.name
          importer_exec_label = importer_comp_spec.executor_label
          group_component_spec.dag.tasks[importer_task_name].CopyFrom(
              importer_task_spec)
          pipeline_spec.components[importer_comp_name].CopyFrom(
              importer_comp_spec)

          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.producer_task = (
                  importer_task_name)
          subgroup_task_spec.inputs.artifacts[
              input_name].task_output_artifact.output_artifact_key = (
                  importer_node.OUTPUT_KEY)

          # Retrieve the pre-built importer spec
          importer_spec = subgroup.importer_specs[input_name]
          deployment_config.executors[importer_exec_label].importer.CopyFrom(
              importer_spec)

          importer_tasks.append(importer_task_name)

      group_inputs = inputs.get(group.name, [])
      subgroup_inputs = inputs.get(subgroup.name, [])
      subgroup_params = [param for param, _ in subgroup_inputs]
      tasks_in_current_dag = [
          dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups
      ] + importer_tasks

      is_parent_component_root = group_component_spec == pipeline_spec.root

      # Additional spec modifications for dsl.ParallelFor's subgroups.
      if is_loop_subgroup:
        self._update_loop_specs(group, subgroup, group_component_spec,
                                subgroup_component_spec, subgroup_task_spec)

      elif isinstance(subgroup, dsl.ContainerOp):
        dsl_component_spec.update_task_inputs_spec(
            subgroup_task_spec,
            group_component_spec.input_definitions,
            subgroup_params,
            tasks_in_current_dag,
        )

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition':

        # "punch the hole", adding inputs needed by its subgroup or tasks.
        dsl_component_spec.build_component_inputs_spec(
            component_spec=subgroup_component_spec,
            pipeline_params=subgroup_params,
            is_root_component=False,
        )
        dsl_component_spec.build_task_inputs_spec(
            subgroup_task_spec,
            subgroup_params,
            tasks_in_current_dag,
            is_parent_component_root,
        )

        condition = subgroup.condition
        operand_values = []

        for operand in [condition.operand1, condition.operand2]:
          operand_values.append(self._resolve_value_or_reference(operand))

        condition_string = '{} {} {}'.format(operand_values[0],
                                             condition.operator,
                                             operand_values[1])

        subgroup_task_spec.trigger_policy.CopyFrom(
            pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(
                condition=condition_string))

      # Generate dependencies section for this task.
      if dependencies.get(subgroup.name, None):
        group_dependencies = list(dependencies[subgroup.name])
        group_dependencies.sort()
        subgroup_task_spec.dependent_tasks.extend(
            [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies])

      if isinstance(subgroup, dsl.ParallelFor):
        if subgroup.parallelism is not None:
          warnings.warn(
              'Setting parallelism in ParallelFor is not supported yet.'
              'The setting is ignored.')

        # Remove loop arguments related inputs from parent group component spec.
        input_names = [param.full_name for param, _ in inputs[subgroup.name]]
        for input_name in input_names:
          if _for_loop.LoopArguments.name_is_loop_argument(input_name):
            dsl_component_spec.pop_input_from_component_spec(
                group_component_spec, input_name)

        if subgroup.items_is_pipeline_param:
          # These loop args are a 'withParam' rather than 'withItems'.
          # i.e., rather than a static list, they are either the output of
          # another task or were input as global pipeline parameters.

          pipeline_param = subgroup.loop_args.items_or_pipeline_param
          input_parameter_name = pipeline_param.full_name

          if pipeline_param.op_name:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.producer_task = (
                    dsl_utils.sanitize_task_name(pipeline_param.op_name))
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].task_output_parameter.output_parameter_key = (
                    pipeline_param.name)
          else:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].component_input_parameter = (
                    input_parameter_name)

          if pipeline_param.op_name is None:
            # Input parameter is from pipeline func rather than component output.
            # Correct loop argument input type in the parent component spec.
            # The loop argument was categorized as an artifact due to its missing
            # or non-primitive type annotation. But it should always be String
            # typed, as its value is a serialized JSON string.
            dsl_component_spec.pop_input_from_component_spec(
                group_component_spec, input_parameter_name)
            group_component_spec.input_definitions.parameters[
                input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING

      # Add component spec if not exists
      if subgroup_component_name not in pipeline_spec.components:
        pipeline_spec.components[subgroup_component_name].CopyFrom(
            subgroup_component_spec)

      # Add task spec
      group_component_spec.dag.tasks[
          subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec)

      # Add executor spec, if applicable.
      container_spec = getattr(subgroup, 'container_spec', None)
      if container_spec:
        if compiler_utils.is_v2_component(subgroup):
          compiler_utils.refactor_v2_container_spec(container_spec)
        executor_label = subgroup_component_spec.executor_label

        if executor_label not in deployment_config.executors:
          deployment_config.executors[executor_label].container.CopyFrom(
              container_spec)

      # Add AIPlatformCustomJobSpec, if applicable.
      custom_job_spec = getattr(subgroup, 'custom_job_spec', None)
      if custom_job_spec:
        executor_label = subgroup_component_spec.executor_label
        if executor_label not in deployment_config.executors:
          deployment_config.executors[
              executor_label].custom_job.custom_job.update(custom_job_spec)

    pipeline_spec.deployment_spec.update(
        json_format.MessageToDict(deployment_config))
Пример #9
0
def _get_custom_job_op(
    task_name: str,
    job_spec: Dict[str, Any],
    input_artifacts: Optional[Dict[str, dsl.PipelineParam]] = None,
    input_parameters: Optional[Dict[str, _ValueOrPipelineParam]] = None,
    output_artifacts: Optional[Dict[str, Type[artifact.Artifact]]] = None,
    output_parameters: Optional[Dict[str, Any]] = None,
) -> AiPlatformCustomJobOp:
  """Gets an AiPlatformCustomJobOp from job spec and I/O definition."""
  pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
  pipeline_component_spec = pipeline_spec_pb2.ComponentSpec()

  pipeline_task_spec.task_info.CopyFrom(
      pipeline_spec_pb2.PipelineTaskInfo(name=dsl_utils.sanitize_task_name(task_name)))

  # Iterate through the inputs/outputs declaration to get pipeline component
  # spec.
  for input_name, param in input_parameters.items():
    if isinstance(param, dsl.PipelineParam):
      pipeline_component_spec.input_definitions.parameters[
        input_name].type = type_utils.get_parameter_type(param.param_type)
    else:
      pipeline_component_spec.input_definitions.parameters[
        input_name].type = type_utils.get_parameter_type(type(param))

  for input_name, art in input_artifacts.items():
    if not isinstance(art, dsl.PipelineParam):
      raise RuntimeError(
          'Get unresolved input artifact for input %s. Input '
          'artifacts must be connected to a producer task.' % input_name)
    pipeline_component_spec.input_definitions.artifacts[
      input_name].artifact_type.CopyFrom(
        type_utils.get_artifact_type_schema_message(art.param_type))

  for output_name, param_type in output_parameters.items():
    pipeline_component_spec.output_definitions.parameters[
      output_name].type = type_utils.get_parameter_type(param_type)

  for output_name, artifact_type in output_artifacts.items():
    pipeline_component_spec.output_definitions.artifacts[
      output_name].artifact_type.CopyFrom(artifact_type.get_ir_type())

  pipeline_component_spec.executor_label = dsl_utils.sanitize_executor_label(
      task_name)

  # Iterate through the inputs/outputs specs to get pipeline task spec.
  for input_name, param in input_parameters.items():
    if isinstance(param, dsl.PipelineParam) and param.op_name:
      # If the param has a valid op_name, this should be a pipeline parameter
      # produced by an upstream task.
      pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
          pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
              task_output_parameter=pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec.TaskOutputParameterSpec(
                  producer_task=dsl_utils.sanitize_task_name(param.op_name),
                  output_parameter_key=param.name
              )))
    elif isinstance(param, dsl.PipelineParam) and not param.op_name:
      # If a valid op_name is missing, this should be a pipeline parameter.
      pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
          pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
              component_input_parameter=param.name))
    else:
      # If this is not a pipeline param, then it should be a value.
      pipeline_task_spec.inputs.parameters[input_name].CopyFrom(
          pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec(
              runtime_value=pipeline_spec_pb2.ValueOrRuntimeParameter(
                  constant_value=dsl_utils.get_value(param))))

  for input_name, art in input_artifacts.items():
    if art.op_name:
      # If the param has a valid op_name, this should be an artifact produced
      # by an upstream task.
      pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
          pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
              task_output_artifact=pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec.TaskOutputArtifactSpec(
                  producer_task=dsl_utils.sanitize_task_name(art.op_name),
                  output_artifact_key=art.name)))
    else:
      # Otherwise, this should be from the input of the subdag.
      pipeline_task_spec.inputs.artifacts[input_name].CopyFrom(
          pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec(
              component_input_artifact=art.name
          ))

  # TODO: Add task dependencies/trigger policies/caching/iterator
  pipeline_task_spec.component_ref.name = dsl_utils.sanitize_component_name(
      task_name)

  # Construct dummy I/O declaration for the op.
  # TODO: resolve name conflict instead of raising errors.
  dummy_outputs = collections.OrderedDict()
  for output_name, _ in output_artifacts.items():
    dummy_outputs[output_name] = _DUMMY_PATH

  for output_name, _ in output_parameters.items():
    if output_name in dummy_outputs:
      raise KeyError('Got name collision for output key %s. Consider renaming '
                     'either output parameters or output '
                     'artifacts.' % output_name)
    dummy_outputs[output_name] = _DUMMY_PATH

  dummy_inputs = collections.OrderedDict()
  for input_name, art in input_artifacts.items():
    dummy_inputs[input_name] = _DUMMY_PATH
  for input_name, param in input_parameters.items():
    if input_name in dummy_inputs:
      raise KeyError('Got name collision for input key %s. Consider renaming '
                     'either input parameters or input '
                     'artifacts.' % input_name)
    dummy_inputs[input_name] = _DUMMY_PATH

  # Construct the AIP (Unified) custom job op.
  return AiPlatformCustomJobOp(
      name=task_name,
      custom_job_spec=job_spec,
      component_spec=pipeline_component_spec,
      task_spec=pipeline_task_spec,
      task_inputs=[
          dsl.InputArgumentPath(
              argument=dummy_inputs[input_name],
              input=input_name,
              path=path,
          ) for input_name, path in dummy_inputs.items()
      ],
      task_outputs=dummy_outputs
  )
Пример #10
0
def _attach_v2_specs(
    task: _container_op.ContainerOp,
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
) -> None:
    """Attaches v2 specs to a ContainerOp object.

    Args:
      task: The ContainerOp object to attach IR specs.
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.
  """

    # Attach v2_specs to the ContainerOp object regardless whether the pipeline is
    # being compiled to v1 (Argo yaml) or v2 (IR json).
    # However, there're different behaviors for the two cases. Namely, resolved
    # commands and arguments, error handling, etc.
    # Regarding the difference in error handling, v2 has a stricter requirement on
    # input type annotation. For instance, an input without any type annotation is
    # viewed as an artifact, and if it's paired with InputValuePlaceholder, an
    # error will be thrown at compile time. However, we cannot raise such an error
    # in v1, as it wouldn't break existing pipelines.
    is_compiling_for_v2 = False
    for frame in inspect.stack():
        if '_create_pipeline_v2' in frame:
            is_compiling_for_v2 = True
            break

    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            elif is_compiling_for_v2 and input_key in importer_specs:
                raise TypeError(
                    'Input "{}" with type "{}" is not connected to any upstream output. '
                    'However it is used with InputPathPlaceholder. '
                    'If you want to import an existing artifact using a system-connected'
                    ' importer node, use InputUriPlaceholder instead. '
                    'Or if you just want to pass a string parameter, use string type and'
                    ' InputValuePlaceholder instead.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(
                    output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        placeholder_resolver = ExtraPlaceholderResolver()

        def _resolve_ir_placeholders_v2(
            arg,
            component_spec: _structures.ComponentSpec,
            arguments: dict,
        ) -> str:
            inputs_dict = {
                input_spec.name: input_spec
                for input_spec in component_spec.inputs or []
            }
            if isinstance(arg, _structures.InputValuePlaceholder):
                input_name = arg.input_name
                input_value = arguments.get(input_name, None)
                if input_value is not None:
                    return _input_parameter_placeholder(input_name)
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.InputUriPlaceholder):
                input_name = arg.input_name
                if input_name in arguments:
                    input_uri = _input_artifact_uri_placeholder(input_name)
                    return input_uri
                else:
                    input_spec = inputs_dict[input_name]
                    if input_spec.optional:
                        return None
                    else:
                        raise ValueError(
                            'No value provided for input {}'.format(
                                input_name))

            elif isinstance(arg, _structures.OutputUriPlaceholder):
                output_name = arg.output_name
                output_uri = _output_artifact_uri_placeholder(output_name)
                return output_uri

            return placeholder_resolver.resolve_placeholder(
                arg=arg,
                component_spec=component_spec,
                arguments=arguments,
            )

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
            placeholder_resolver=_resolve_ir_placeholders_v2,
        )
        return resolved_cmd

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

    # Keep track of auto-injected importer spec.
    importer_specs = {}

    # Check types of the reference arguments and serialize PipelineParams
    original_arguments = arguments
    arguments = arguments.copy()

    # Preserver input params for ContainerOp.inputs
    input_params = list(
        set([
            param for param in arguments.values()
            if isinstance(param, _pipeline_param.PipelineParam)
        ]))

    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, _pipeline_param.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].component_input_parameter = argument_value.name
            else:
                if argument_value.op_name:
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = (
                            dsl_utils.sanitize_task_name(
                                argument_value.op_name))
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.output_artifact_key = (
                            argument_value.name)
                elif is_compiling_for_v2:
                    # argument_value.op_name could be none, in which case an importer node
                    # will be inserted later.
                    # Importer node is only applicable for v2 engine.
                    pipeline_task_spec.inputs.artifacts[
                        input_name].task_output_artifact.producer_task = ''
                    type_schema = type_utils.get_input_artifact_type_schema(
                        input_name, component_spec.inputs)
                    importer_specs[
                        input_name] = importer_node.build_importer_spec(
                            input_type_schema=type_schema,
                            pipeline_param_name=argument_value.name)
        elif isinstance(argument_value, str):
            pipeline_params = _pipeline_param.extract_pipelineparams_from_any(
                argument_value)
            if pipeline_params and is_compiling_for_v2:
                # argument_value contains PipelineParam placeholders which needs to be
                # replaced. And the input needs to be added to the task spec.
                for param in pipeline_params:
                    # Form the name for the compiler injected input, and make sure it
                    # doesn't collide with any existing input names.
                    additional_input_name = (
                        dsl_component_spec.
                        additional_input_name_for_pipelineparam(param))
                    for existing_input_name, _ in arguments.items():
                        if existing_input_name == additional_input_name:
                            raise ValueError(
                                'Name collision between existing input name '
                                '{} and compiler injected input name {}'.
                                format(existing_input_name,
                                       additional_input_name))

                    additional_input_placeholder = (
                        "{{{{$.inputs.parameters['{}']}}}}".format(
                            additional_input_name))
                    argument_value = argument_value.replace(
                        param.pattern, additional_input_placeholder)

                    # The output references are subject to change -- the producer task may
                    # not be whitin the same DAG.
                    if param.op_name:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.producer_task = (
                                dsl_utils.sanitize_task_name(param.op_name))
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].task_output_parameter.output_parameter_key = param.name
                    else:
                        pipeline_task_spec.inputs.parameters[
                            additional_input_name].component_input_parameter = param.full_name

            input_type = component_spec._inputs_dict[input_name].type
            if type_utils.is_parameter_type(input_type):
                pipeline_task_spec.inputs.parameters[
                    input_name].runtime_value.constant_value.string_value = (
                        argument_value)
            elif is_compiling_for_v2:
                # An importer node with constant value artifact_uri will be inserted.
                # Importer node is only applicable for v2 engine.
                pipeline_task_spec.inputs.artifacts[
                    input_name].task_output_artifact.producer_task = ''
                type_schema = type_utils.get_input_artifact_type_schema(
                    input_name, component_spec.inputs)
                importer_specs[input_name] = importer_node.build_importer_spec(
                    input_type_schema=type_schema,
                    constant_value=argument_value)
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, _container_op.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            if is_compiling_for_v2:
                raise NotImplementedError(
                    'Input argument supports only the following types: PipelineParam'
                    ', str, int, float. Got: "{}".'.format(argument_value))

    if not component_spec.name:
        component_spec.name = _components._default_component_name

    # task.name is unique at this point.
    pipeline_task_spec.task_info.name = (dsl_utils.sanitize_task_name(
        task.name))

    resolved_cmd = _resolve_commands_and_args_v2(component_spec=component_spec,
                                                 arguments=original_arguments)

    task.container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec(
            image=component_spec.implementation.container.image,
            command=resolved_cmd.command,
            args=resolved_cmd.args))

    # TODO(chensun): dedupe IR component_spec and contaienr_spec
    pipeline_task_spec.component_ref.name = (dsl_utils.sanitize_component_name(
        task.name))
    executor_label = dsl_utils.sanitize_executor_label(task.name)

    task.component_spec = dsl_component_spec.build_component_spec_from_structure(
        component_spec, executor_label, arguments.keys())

    task.task_spec = pipeline_task_spec
    task.importer_specs = importer_specs

    # Override command and arguments if compiling to v2.
    if is_compiling_for_v2:
        task.command = resolved_cmd.command
        task.arguments = resolved_cmd.args

        # limit this to v2 compiling only to avoid possible behavior change in v1.
        task.inputs = input_params
Пример #11
0
  def _group_to_dag_spec(
      self,
      group: dsl.OpsGroup,
      inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]],
      dependencies: Dict[str, List[_GroupOrOp]],
      pipeline_spec: pipeline_spec_pb2.PipelineSpec,
      deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig,
      rootgroup_name: str,
      op_to_parent_groups: Dict[str, List[str]],
  ) -> None:
    """Generate IR spec given an OpsGroup.

    Args:
      group: The OpsGroup to generate spec for.
      inputs: The inputs dictionary. The keys are group/op names and values are
        lists of tuples (param, producing_op_name).
      outputs: The outputs dictionary. The keys are group/op names and values
        are lists of tuples (param, producing_op_name).
      dependencies: The group dependencies dictionary. The keys are group/op
        names, and the values are lists of dependent groups/ops.
      pipeline_spec: The pipeline_spec to update in-place.
      deployment_config: The deployment_config to hold all executors.
      rootgroup_name: The name of the group root. Used to determine whether the
        component spec for the current group should be the root dag.
      op_to_parent_groups: The dict of op name to parent groups. Key is the op's
        name. Value is a list of ancestor groups including the op itself. The
        list of a given op is sorted in a way that the farthest group is the
        first and the op itself is the last.
    """
    group_component_name = dsl_utils.sanitize_component_name(group.name)

    if group.name == rootgroup_name:
      group_component_spec = pipeline_spec.root
    else:
      group_component_spec = pipeline_spec.components[group_component_name]

    # Generate task specs and component specs for the dag.
    subgroups = group.groups + group.ops
    for subgroup in subgroups:
      subgroup_task_spec = getattr(subgroup, 'task_spec',
                                   pipeline_spec_pb2.PipelineTaskSpec())
      subgroup_component_spec = getattr(subgroup, 'component_spec',
                                        pipeline_spec_pb2.ComponentSpec())

      is_recursive_subgroup = (
          isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref)

      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        subgroup_key = subgroup.recursive_ref.name
      else:
        subgroup_key = subgroup.name

      subgroup_task_spec.task_info.name = (
          subgroup_task_spec.task_info.name or
          dsl_utils.sanitize_task_name(subgroup_key))
      # human_name exists for ops only, and is used to de-dupe component spec.
      subgroup_component_name = (
          subgroup_task_spec.component_ref.name or
          dsl_utils.sanitize_component_name(
              getattr(subgroup, 'human_name', subgroup_key)))
      subgroup_task_spec.component_ref.name = subgroup_component_name

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph':
        raise NotImplementedError(
            'dsl.graph_component is not yet supported in KFP v2 compiler.')

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler':
        raise NotImplementedError(
            'dsl.ExitHandler is not yet supported in KFP v2 compiler.')

      if isinstance(subgroup, dsl.ContainerOp):
        if hasattr(subgroup, 'importer_spec'):
          importer_task_name = subgroup.task_spec.task_info.name
          importer_comp_name = subgroup.task_spec.component_ref.name
          importer_exec_label = subgroup.component_spec.executor_label
          group_component_spec.dag.tasks[importer_task_name].CopyFrom(
              subgroup.task_spec)
          pipeline_spec.components[importer_comp_name].CopyFrom(
              subgroup.component_spec)
          deployment_config.executors[importer_exec_label].importer.CopyFrom(
              subgroup.importer_spec)

      subgroup_inputs = inputs.get(subgroup.name, [])
      subgroup_params = [param for param, _ in subgroup_inputs]

      tasks_in_current_dag = [
          dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups
      ]

      input_parameters_in_current_dag = [
          input_name
          for input_name in group_component_spec.input_definitions.parameters
      ]
      input_artifacts_in_current_dag = [
          input_name
          for input_name in group_component_spec.input_definitions.artifacts
      ]

      is_parent_component_root = group_component_spec == pipeline_spec.root

      if isinstance(subgroup, dsl.ContainerOp):
        dsl_component_spec.update_task_inputs_spec(
            subgroup_task_spec,
            group_component_spec.input_definitions,
            subgroup_params,
            tasks_in_current_dag,
            input_parameters_in_current_dag,
            input_artifacts_in_current_dag,
        )

      if isinstance(subgroup, dsl.ParallelFor):
        if subgroup.parallelism is not None:
          warnings.warn(
              'Setting parallelism in ParallelFor is not supported yet.'
              'The setting is ignored.')

        # "Punch the hole", adding additional inputs (other than loop arguments
        # which will be handled separately) needed by its subgroup or tasks.
        loop_subgroup_params = []
        for param in subgroup_params:
          if isinstance(
              param, (_for_loop.LoopArguments, _for_loop.LoopArgumentVariable)):
            continue
          loop_subgroup_params.append(param)

        if subgroup.items_is_pipeline_param:
          # This loop_args is a 'withParam' rather than a 'withItems'.
          # i.e., rather than a static list, it is either the output of
          # another task or an input as global pipeline parameters.
          loop_subgroup_params.append(
              subgroup.loop_args.items_or_pipeline_param)

        dsl_component_spec.build_component_inputs_spec(
            component_spec=subgroup_component_spec,
            pipeline_params=loop_subgroup_params,
            is_root_component=False,
        )
        dsl_component_spec.build_task_inputs_spec(
            subgroup_task_spec,
            loop_subgroup_params,
            tasks_in_current_dag,
            is_parent_component_root,
        )

        if subgroup.items_is_pipeline_param:
          input_parameter_name = (
              dsl_component_spec.additional_input_name_for_pipelineparam(
                  subgroup.loop_args.items_or_pipeline_param))
          loop_arguments_item = '{}-{}'.format(
              input_parameter_name, _for_loop.LoopArguments.LOOP_ITEM_NAME_BASE)

          subgroup_component_spec.input_definitions.parameters[
              loop_arguments_item].type = pipeline_spec_pb2.PrimitiveType.STRING
          subgroup_task_spec.parameter_iterator.items.input_parameter = (
              input_parameter_name)
          subgroup_task_spec.parameter_iterator.item_input = (
              loop_arguments_item)

          # If the loop arguments itself is a loop arguments variable, handle
          # the subvar name.
          loop_args_name, subvar_name = (
              dsl_component_spec._exclude_loop_arguments_variables(
                  subgroup.loop_args.items_or_pipeline_param))
          if subvar_name:
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].parameter_expression_selector = (
                    'parseJson(string_value)["{}"]'.format(subvar_name))
            subgroup_task_spec.inputs.parameters[
                input_parameter_name].component_input_parameter = (
                    dsl_component_spec.additional_input_name_for_pipelineparam(
                        loop_args_name))

        else:
          input_parameter_name = (
              dsl_component_spec.additional_input_name_for_pipelineparam(
                  subgroup.loop_args.full_name))
          raw_values = subgroup.loop_args.to_list_for_task_yaml()

          subgroup_component_spec.input_definitions.parameters[
              input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING
          subgroup_task_spec.parameter_iterator.items.raw = json.dumps(
              raw_values, sort_keys=True)
          subgroup_task_spec.parameter_iterator.item_input = (
              input_parameter_name)

      if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition':

        # "punch the hole", adding inputs needed by its subgroup or tasks.
        dsl_component_spec.build_component_inputs_spec(
            component_spec=subgroup_component_spec,
            pipeline_params=subgroup_params,
            is_root_component=False,
        )
        dsl_component_spec.build_task_inputs_spec(
            subgroup_task_spec,
            subgroup_params,
            tasks_in_current_dag,
            is_parent_component_root,
        )

        condition = subgroup.condition
        operand_values = []

        operand1_value, operand2_value = self._resolve_condition_operands(
            condition.operand1, condition.operand2)

        condition_string = '{} {} {}'.format(operand1_value, condition.operator,
                                             operand2_value)

        subgroup_task_spec.trigger_policy.CopyFrom(
            pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy(
                condition=condition_string))

      # Generate dependencies section for this task.
      if dependencies.get(subgroup.name, None):
        group_dependencies = list(dependencies[subgroup.name])
        group_dependencies.sort()
        subgroup_task_spec.dependent_tasks.extend(
            [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies])

      # Add component spec if not exists
      if subgroup_component_name not in pipeline_spec.components:
        pipeline_spec.components[subgroup_component_name].CopyFrom(
            subgroup_component_spec)

      # Add task spec
      group_component_spec.dag.tasks[
          subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec)

      # Add AIPlatformCustomJobSpec, if applicable.
      custom_job_spec = getattr(subgroup, 'custom_job_spec', None)
      if custom_job_spec:
        executor_label = subgroup_component_spec.executor_label
        if executor_label not in deployment_config.executors:
          deployment_config.executors[
              executor_label].custom_job.custom_job.update(custom_job_spec)

      # Add executor spec, if applicable.
      container_spec = getattr(subgroup, 'container_spec', None)
      # Ignore contaienr_spec if custom_job_spec exists.
      if container_spec and not custom_job_spec:
        if compiler_utils.is_v2_component(subgroup):
          compiler_utils.refactor_v2_container_spec(container_spec)
        executor_label = subgroup_component_spec.executor_label

        if executor_label not in deployment_config.executors:
          deployment_config.executors[executor_label].container.CopyFrom(
              container_spec)

    pipeline_spec.deployment_spec.update(
        json_format.MessageToDict(deployment_config))

    # Surface metrics outputs to the top.
    self._populate_metrics_in_dag_outputs(
        group.ops,
        op_to_parent_groups,
        pipeline_spec,
    )