Exemplo n.º 1
0
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
             artifact_class: Type[io_types.Artifact],
             reimport: bool = False) -> _container_op.ContainerOp:
    """dsl.importer for importing an existing artifact. Only for v2 pipeline.

  Args:
    artifact_uri: The artifact uri to import from.
    artifact_type_schema: The user specified artifact type schema of the
      artifact to be imported.
    reimport: Whether to reimport the artifact. Defaults to False.

  Returns:
    A ContainerOp instance.

  Raises:
    ValueError if the passed in artifact_uri is neither a PipelineParam nor a
      constant string value.
  """

    if isinstance(artifact_uri, _pipeline_param.PipelineParam):
        input_param = artifact_uri
    elif isinstance(artifact_uri, str):
        input_param = _pipeline_param.PipelineParam(name='uri',
                                                    value=artifact_uri,
                                                    param_type='String')
    else:
        raise ValueError(
            'Importer got unexpected artifact_uri: {} of type: {}.'.format(
                artifact_uri, type(artifact_uri)))

    old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    task = _container_op.ContainerOp(
        name='importer',
        image='importer_image',  # TODO: need a v1 implementation of importer.
        file_outputs={
            OUTPUT_KEY:
            "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
        },
    )
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
    task.importer_spec = _build_importer_spec(
        artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
    task.task_spec = _build_importer_task_spec(importer_base_name=task.name,
                                               artifact_uri=artifact_uri)
    task.component_spec = _build_importer_component_spec(
        importer_base_name=task.name,
        artifact_type_schema=artifact_type_schema)
    task.inputs = [input_param]

    return task
Exemplo n.º 2
0
    def test_chained_call_resource_setter(self):
        task = _container_op.ContainerOp(name='test_task', image='python:3.7')
        task.container_spec = _PipelineContainerSpec()
        (task.set_cpu_limit('1').set_memory_limit(
            '1G').add_node_selector_constraint(
                'cloud.google.com/gke-accelerator',
                'nvidia-tesla-k80').set_gpu_limit(1))

        expected_container_spec = text_format.Parse(
            _EXPECTED_CONTAINER_WITH_RESOURCE, _PipelineContainerSpec())

        self.assertDictEqual(
            json_format.MessageToDict(task.container_spec),
            json_format.MessageToDict(expected_container_spec))
Exemplo n.º 3
0
    def test_run_as_aiplatform_custom_job_use_specified_worker_pool_specs(
            self):
        task = _container_op.ContainerOp(name='test-task',
                                         image='python:3.7',
                                         command=['python3', 'main.py'],
                                         arguments=['arg1', 'arg2'])
        run_as_aiplatform_custom_job(task,
                                     display_name='custom-job1',
                                     worker_pool_specs=[
                                         {
                                             'containerSpec': {
                                                 'imageUri': 'alpine',
                                                 'command':
                                                 ['sh', '-c', 'echo 1'],
                                             },
                                             'replicaCount': '1',
                                             'machineSpec': {
                                                 'machineType':
                                                 'n1-standard-8',
                                             },
                                         },
                                     ])

        expected_custom_job_spec = {
            'displayName': 'custom-job1',
            'jobSpec': {
                'workerPoolSpecs': [{
                    'containerSpec': {
                        'imageUri': 'alpine',
                        'command': ['sh', '-c', 'echo 1']
                    },
                    'replicaCount': '1',
                    'machineSpec': {
                        'machineType': 'n1-standard-8'
                    }
                }]
            }
        }

        print(task.custom_job_spec)
        self.maxDiff = None
        self.assertDictEqual(task.custom_job_spec, expected_custom_job_spec)
Exemplo n.º 4
0
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
  """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (only for v1) The component references.

  Returns:
    A ContainerOp instance.
  """

  # Add component inputs with default value to the arguments dict if they are not
  # in the arguments dict already.
  arguments = arguments.copy()
  for input_spec in component_spec.inputs or []:
    if input_spec.name not in arguments and input_spec.default is not None:
      default_value = input_spec.default
      if input_spec.type == 'Integer':
        default_value = int(default_value)
      elif input_spec.type == 'Float':
        default_value = float(default_value)
      arguments[input_spec.name] = default_value

  # Check types of the reference arguments and serialize PipelineParams
  original_arguments = arguments
  arguments = arguments.copy()
  for input_name, argument_value in arguments.items():
    if isinstance(argument_value, _pipeline_param.PipelineParam):
      input_type = component_spec._inputs_dict[input_name].type
      argument_type = argument_value.param_type
      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      arguments[input_name] = str(argument_value)
    if isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object was passed to component as an input argument. '
          'Pass a single output instead.')
  placeholder_resolver = ExtraPlaceholderResolver()
  resolved_cmd = _components._resolve_command_line_and_paths(
      component_spec=component_spec,
      arguments=arguments,
      placeholder_resolver=placeholder_resolver.resolve_placeholder,
  )

  container_spec = component_spec.implementation.container

  old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

  output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
  output_paths.update(placeholder_resolver.output_paths)
  input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
  input_paths.update(placeholder_resolver.input_paths)

  artifact_argument_paths = [
      dsl.InputArgumentPath(
          argument=arguments[input_name],
          input=input_name,
          path=path,
      ) for input_name, path in input_paths.items()
  ]

  task = _container_op.ContainerOp(
      name=component_spec.name or _components._default_component_name,
      image=container_spec.image,
      command=resolved_cmd.command,
      arguments=resolved_cmd.args,
      file_outputs=output_paths,
      artifact_argument_paths=artifact_argument_paths,
  )
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

  component_meta = copy.copy(component_spec)
  task._set_metadata(component_meta)
  if component_ref:
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

  task._parameter_arguments = resolved_cmd.inputs_consumed_by_value

  # Previously, ContainerOp had strict requirements for the output names, so we
  # had to convert all the names before passing them to the ContainerOp
  # constructor.
  # Outputs with non-pythonic names could not be accessed using their original
  # names. Now ContainerOp supports any output names, so we're now using the
  # original output names. However to support legacy pipelines, we're also
  # adding output references with pythonic names.
  # TODO: Add warning when people use the legacy output names.
  output_names = [
      output_spec.name for output_spec in component_spec.outputs or []
  ]  # Stabilizing the ordering
  output_name_to_python = _naming.generate_unique_name_conversion_table(
      output_names, _naming._sanitize_python_function_name)
  for output_name in output_names:
    pythonic_output_name = output_name_to_python[output_name]
    # Note: Some component outputs are currently missing from task.outputs
    # (e.g. MLPipeline UI Metadata)
    if pythonic_output_name not in task.outputs and output_name in task.outputs:
      task.outputs[pythonic_output_name] = task.outputs[output_name]

  if container_spec.env:
    from kubernetes import client as k8s_client
    for name, value in container_spec.env.items():
      task.container.add_env_variable(
          k8s_client.V1EnvVar(name=name, value=value))

  if component_spec.metadata:
    annotations = component_spec.metadata.annotations or {}
    for key, value in annotations.items():
      task.add_pod_annotation(key, value)
    for key, value in (component_spec.metadata.labels or {}).items():
      task.add_pod_label(key, value)
    # Disabling the caching for the volatile components by default
    if annotations.get('volatile_component', 'false') == 'true':
      task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

  _attach_v2_specs(task, component_spec, original_arguments)

  return task
Exemplo n.º 5
0
    def test_run_as_aiplatform_custom_job_simple_mode(self):
        task = _container_op.ContainerOp(name='test-task',
                                         image='python:3.7',
                                         command=['python3', 'main.py'],
                                         arguments=['arg1', 'arg2'])
        run_as_aiplatform_custom_job(
            task,
            display_name='custom-job1',
            replica_count=10,
            machine_type='n1-standard-8',
            accelerator_type='NVIDIA_TESLA_K80',
            accelerator_count=2,
            boot_disk_type='pd-ssd',
            boot_disk_size_gb=200,
            timeout='3600s',
            restart_job_on_worker_restart=True,
            service_account='test-sa',
            network='projects/123/global/networks/mypvc',
            output_uri_prefix='gs://bucket/')

        expected_custom_job_spec = {
            'displayName': 'custom-job1',
            'jobSpec': {
                'workerPoolSpecs': [{
                    'replicaCount': '1',
                    'machineSpec': {
                        'machineType': 'n1-standard-8',
                        'acceleratorType': 'NVIDIA_TESLA_K80',
                        'acceleratorCount': 2
                    },
                    'containerSpec': {
                        'imageUri': 'python:3.7',
                        'command': ['python3', 'main.py'],
                        'args': ['arg1', 'arg2']
                    },
                    'diskSpec': {
                        'bootDiskType': 'pd-ssd',
                        'bootDiskSizeGb': 200
                    }
                }, {
                    'replicaCount': '9',
                    'machineSpec': {
                        'machineType': 'n1-standard-8',
                        'acceleratorType': 'NVIDIA_TESLA_K80',
                        'acceleratorCount': 2
                    },
                    'containerSpec': {
                        'imageUri': 'python:3.7',
                        'command': ['python3', 'main.py'],
                        'args': ['arg1', 'arg2']
                    },
                    'diskSpec': {
                        'bootDiskType': 'pd-ssd',
                        'bootDiskSizeGb': 200
                    }
                }],
                'scheduling': {
                    'timeout': '3600s',
                    'restartJobOnWorkerRestart': True
                },
                'serviceAccount':
                'test-sa',
                'network':
                'projects/123/global/networks/mypvc',
                'baseOutputDirectory': {
                    'outputUriPrefix': 'gs://bucket/'
                }
            }
        }
        self.maxDiff = None
        self.assertDictEqual(task.custom_job_spec, expected_custom_job_spec)