Пример #1
0
def two_step_pipeline_with_task_only_dependency() -> tfx_pipeline.Pipeline:
    """Returns a simple 2-step pipeline with task only dependency between them."""

    step_1 = container_component.create_container_component(
        name='Step 1',
        inputs={},
        outputs={},
        parameters={},
        image='step-1-image',
        command=['run', 'step-1'])()

    step_2 = container_component.create_container_component(
        name='Step 2',
        inputs={},
        outputs={},
        parameters={},
        image='step-2-image',
        command=['run', 'step-2'])()
    step_2.add_upstream_node(step_1)

    return tfx_pipeline.Pipeline(
        pipeline_name='two-step-task-only-dependency-pipeline',
        pipeline_root=_TEST_PIPELINE_ROOT,
        components=[step_1, step_2],
    )
Пример #2
0
 def testCreateComponent(self):
     component = kfp_container_component.load_kfp_yaml_container_component(
         os.path.join(self._testdata_path,
                      'kfp_container_component_test.yaml'))
     ref_component = container_component.create_container_component(
         name='Test_Kfp_Container_Component',
         image='image1',
         command=[
             'command1',
             'command2',
             'command3',
             placeholders.InputUriPlaceholder('Directory'),
             placeholders.InputValuePlaceholder('Subpath'),
             placeholders.OutputUriPlaceholder('File'),
             '--arg1',
             placeholders.InputUriPlaceholder('input1'),
             '--arg2',
             placeholders.InputValuePlaceholder('input2'),
             '--arg3',
             placeholders.OutputUriPlaceholder('output1'),
         ],
         inputs={
             'input1': standard_artifacts.String,
             'input2': standard_artifacts.String,
         },
         outputs={
             'output1': standard_artifacts.String,
         },
         parameters={},
     )
     self.assertEqual(type(component), type(ref_component))
     self.assertEqual(ref_component.EXECUTOR_SPEC, component.EXECUTOR_SPEC)
Пример #3
0
def tasks_for_pipeline_with_artifact_value_passing():
    """A simple pipeline with artifact consumed as value."""
    producer_component = container_component.create_container_component(
        name='Produce',
        outputs={
            'data': simple_artifacts.File,
        },
        parameters={
            'message': str,
        },
        image='gcr.io/ml-pipeline/mirrors/cloud-sdk',
        command=[
            'sh',
            '-exc',
            """
            message="$0"
            output_data_uri="$1"
            output_data_path=$(mktemp)

            # Running the main code
            echo "Hello $message" >"$output_data_path"

            # Getting data out of the container
            gsutil cp -r "$output_data_path" "$output_data_uri"
          """,
            placeholders.InputValuePlaceholder('message'),
            placeholders.OutputUriPlaceholder('data'),
        ],
    )

    print_value_component = container_component.create_container_component(
        name='Print',
        inputs={
            'text': simple_artifacts.File,
        },
        image='gcr.io/ml-pipeline/mirrors/cloud-sdk',
        command=[
            'echo',
            placeholders.InputValuePlaceholder('text'),
        ],
    )

    producer_task = producer_component(message='World!')
    print_task = print_value_component(text=producer_task.outputs['data'], )
    return [producer_task, print_task]
Пример #4
0
def load_kfp_yaml_container_component(
        path: Text) -> Callable[..., base_component.BaseComponent]:
    """Creates a container-based component from a Kubeflow component spec.

  See
  https://www.kubeflow.org/docs/pipelines/reference/component-spec/

  Example:
    component = load_kfp_yaml_container_component(
      "kfp_pipelines_root/components/datasets/Chicago_Taxi_Trips/component.yaml"
    )

  Args:
    path: local file path of a Kubeflow Pipelines component YAML file.

  Returns:
    Container component that can be instantiated in a TFX pipeline.
  """
    with open(path) as component_file:
        data = yaml.load(component_file, Loader=yaml.FullLoader)
    _convert_target_fields_to_kv_pair(data)
    component_spec = json_format.ParseDict(
        data, kfp_component_spec_pb2.ComponentSpec())
    container = component_spec.implementation.container
    command = (list(map(_get_command_line_argument_type, container.command)) +
               list(map(_get_command_line_argument_type, container.args)))
    # TODO(ericlege): Support classname to class translation in inputs.type
    inputs = {
        item.name: standard_artifacts.String
        for item in component_spec.inputs
    }
    outputs = {
        item.name: standard_artifacts.String
        for item in component_spec.outputs
    }
    parameters = {}
    return container_component.create_container_component(
        name=component_spec.name,
        image=container.image,
        command=command,
        inputs=inputs,
        outputs=outputs,
        parameters=parameters,
    )
Пример #5
0
            self._id = '{}.{}'.format(self.__class__.__name__, instance_name)
        else:
            self._id = self.__class__.__name__


dummy_transformer_component = container_component.create_container_component(
    name='DummyContainerSpecComponent',
    inputs={
        'input1': standard_artifacts.Model,
    },
    outputs={
        'output1': standard_artifacts.Model,
    },
    parameters={
        'param1': str,
    },
    image='dummy/transformer',
    command=[
        'transformer',
        '--input1',
        placeholders.InputUriPlaceholder('input1'),
        '--output1',
        placeholders.OutputUriPlaceholder('output1'),
        '--param1',
        placeholders.InputValuePlaceholder('param1'),
    ],
)

dummy_producer_component = container_component.create_container_component(
    name='DummyProducerComponent',
    outputs={
        'output1': standard_artifacts.Model,
Пример #6
0
    def _wrap_container_component(
        self,
        component: base_node.BaseNode,
        component_launcher_class: Type[
            base_component_launcher.BaseComponentLauncher],
        component_config: Optional[base_component_config.BaseComponentConfig],
        pipeline: tfx_pipeline.Pipeline,
    ) -> base_node.BaseNode:
        """Wrapper for container component.

    Args:
      component: Component to be executed.
      component_launcher_class: The class of the launcher to launch the
        component.
      component_config: component config to launch the component.
      pipeline: Logical pipeline that contains pipeline related information.

    Returns:
      A container component that runs the wrapped component upon execution.
    """

        component_launcher_class_path = '.'.join([
            component_launcher_class.__module__,
            component_launcher_class.__name__
        ])

        serialized_component = json_utils.dumps(
            node_wrapper.NodeWrapper(component))

        arguments = [
            '--pipeline_name',
            pipeline.pipeline_info.pipeline_name,
            '--pipeline_root',
            pipeline.pipeline_info.pipeline_root,
            '--run_id',
            pipeline.pipeline_info.run_id,
            '--metadata_config',
            json_format.MessageToJson(
                message=get_default_kubernetes_metadata_config(),
                preserving_proto_field_name=True),
            '--beam_pipeline_args',
            json.dumps(pipeline.beam_pipeline_args),
            '--additional_pipeline_args',
            json.dumps(pipeline.additional_pipeline_args),
            '--component_launcher_class_path',
            component_launcher_class_path,
            '--serialized_component',
            serialized_component,
            '--component_config',
            json_utils.dumps(component_config),
        ]

        # Outputs/Parameters fields are not used as they are contained in
        # the serialized component.
        return container_component.create_container_component(
            name=component.__class__.__name__,
            outputs={},
            parameters={},
            image=self._config.tfx_image,
            command=_CONTAINER_COMMAND + arguments)().with_id(component.id +
                                                              _WRAPPER_SUFFIX)
Пример #7
0
from tfx.components import CsvExampleGen
from tfx.utils.dsl_utils import external_input
from tfx.dsl.component.experimental import container_component
from tfx.dsl.component.experimental import placeholders
from tfx.types import standard_artifacts

data_path = Path(__file__).parent / "data" / "data.csv"
# TODO コンテナの中に該当するスクリプトがないのでエラーになる
download_data_component = container_component.create_container_component(
    name='DownloadData',
    outputs={
        'data_uri': standard_artifacts.ExternalArtifact
    },
    image='google/cloud-sdk:278.0.0',
    command=[
        "sh", "-exc",
        """
        data_uri="$0"
        python container_component/download_data.py  --uri "$data_uri" 
        """,
        placeholders.OutputUriPlaceholder("data_uri")
    ]

)

xgb_component = container_component.create_container_component(
    name='XGBTrainer',
    inputs={
        'data': standard_artifacts.Examples
    },
    # outputs={
    #     'model': standard_artifacts.ExternalArtifact
Пример #8
0
downloader_component = container_component.create_container_component(
    name='DownloadFromHttp',
    outputs={
        'data': standard_artifacts.ExternalArtifact,
    },
    parameters={
        'url': str,
    },
    # The component code uses gsutil to upload the data to GCS, so the
    # container image needs to have gsutil installed and configured.
    # Fixing b/150670779 by merging cl/294536017 will lift this limitation.
    image='google/cloud-sdk:278.0.0',
    command=[
        'sh',
        '-exc',
        '''
          url="$0"
          output_data_uri="$1"/data  # TODO(b/150515270) Remove when fixed.
          output_data_path=$(mktemp)

          # Running the main code
          wget "$0" -O "$output_data_path" || curl "$0" > "$output_data_path"

          # Getting data out of the container
          gsutil cp "$output_data_path" "$output_data_uri"
        ''',
        ph.exec_property('url'),
        ph.output('data')[0].uri,
    ],
)