Python KubeflowDagRunner示例，tfx.orchestration.kubeflow.kubeflow_dag_runner.KubeflowDagRunner Python示例

示例#1

0

显示文件

文件： kubeflow_dag_runner_test.py 项目： robertlugg/tfx

    def testDefaultPipelineOperatorFuncs(self):
        kubeflow_dag_runner.KubeflowDagRunner().run(_two_step_pipeline())
        file_path = os.path.join(self.test_dir, 'two_step_pipeline.tar.gz')
        self.assertTrue(tf.gfile.Exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

            # Check that each container has default GCP credentials.

            container_0 = containers[0]
            env = [
                env for env in container_0['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])

            container_1 = containers[0]
            env = [
                env for env in container_1['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])

示例#2

0

显示文件

文件： kubeflow_dag_runner-checkpoint.py 项目： risenW/tfx-adClickPrediction

def run():
    """Define a kubeflow pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'advert-pred'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels).run(
            pipeline.create_pipeline(
                pipeline_name=PIPELINE_NAME,
                pipeline_root=PIPELINE_ROOT,
                data_path=DATA_PATH,
                preprocessing_fn=PREPROCESSING_FN,
                run_fn=RUN_FN,
                train_args=trainer_pb2.TrainArgs(num_steps=TRAIN_NUM_STEPS),
                eval_args=trainer_pb2.EvalArgs(num_steps=EVAL_NUM_STEPS),
                eval_accuracy_threshold=EVAL_ACCURACY_THRESHOLD,
                serving_model_dir=SERVING_MODEL_DIR,
            ))

示例#3

0

显示文件

  def testTwoStepPipeline(self):
    """Sanity-checks the construction and dependencies for a 2-step pipeline."""
    kubeflow_dag_runner.KubeflowDagRunner().run(_two_step_pipeline())
    file_path = os.path.join(self.test_dir, 'two_step_pipeline.tar.gz')
    self.assertTrue(tf.io.gfile.exists(file_path))

    with tarfile.TarFile.open(file_path).extractfile(
        'pipeline.yaml') as pipeline_file:
      self.assertIsNotNone(pipeline_file)
      pipeline = yaml.load(pipeline_file)

      containers = [
          c for c in pipeline['spec']['templates'] if 'container' in c
      ]
      self.assertEqual(2, len(containers))

      big_query_container = [
          c for c in containers if c['name'] == 'bigqueryexamplegen'
      ]
      self.assertEqual(1, len(big_query_container))
      self.assertEqual([
          'python',
          '/tfx-src/tfx/orchestration/kubeflow/container_entrypoint.py'
      ], big_query_container[0]['container']['command'])

      statistics_gen_container = [
          c for c in containers if c['name'] == 'statisticsgen'
      ]
      self.assertEqual(1, len(statistics_gen_container))

      # Ensure dependencies between components are captured.
      dag = [c for c in pipeline['spec']['templates'] if 'dag' in c]
      self.assertEqual(1, len(dag))

      self.assertEqual(
          {
              'tasks': [{
                  'name': 'bigqueryexamplegen',
                  'template': 'bigqueryexamplegen',
                  'arguments': {
                      'parameters': [{
                          'name': 'pipeline-root',
                          'value': '{{inputs.parameters.pipeline-root}}'
                      }, {
                          'name': 'table-name',
                          'value': '{{inputs.parameters.table-name}}'
                      }]
                  }
              }, {
                  'name': 'statisticsgen',
                  'template': 'statisticsgen',
                  'arguments': {
                      'parameters': [{
                          'name': 'pipeline-root',
                          'value': '{{inputs.parameters.pipeline-root}}'
                      }]
                  },
                  'dependencies': ['bigqueryexamplegen'],
              }]
          }, dag[0]['dag'])

示例#4

0

显示文件

文件： taxi_pipeline_kubeflow_gcp.py 项目： rakesh283343/tfx

def main(unused_argv):
    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            module_file=_module_file,
            ai_platform_training_args=_ai_platform_training_args,
            ai_platform_serving_args=_ai_platform_serving_args,
        ))

示例#5

0

显示文件

def run():
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    os.environ[kubeflow_dag_runner.SDK_ENV_LABEL] = 'tfx-template'

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            pipeline_root=pipeline_config.PIPELINE_ROOT_GCS,
            data_path=pipeline_config.DATA_PATH_KUBEFLOW,
            preprocessing_fn=config.PREPROCESSING_FN,
            run_fn=config.RUN_FN,
            train_args=trainer_pb2.TrainArgs(num_steps=config.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=config.EVAL_NUM_STEPS),
            eval_accuracy_threshold=config.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=pipeline_config.SERVING_MODEL_DIR_GCS,
            query=config.BIG_QUERY_QUERY,
            beam_pipeline_args=config.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # beam_pipeline_args=config.DATAFLOW_BEAM_PIPELINE_ARGS,
            # ai_platform_training_args=config.GCP_AI_PLATFORM_TRAINING_ARGS,
            # ai_platform_serving_args=config.GCP_AI_PLATFORM_SERVING_ARGS
        ))

示例#6

0

显示文件

    def _compile_and_run_pipeline(self, pipeline: tfx_pipeline.Pipeline,
                                  **kwargs):
        """Compiles and runs a KFP pipeline.

    In this method, provided TFX pipeline will be submitted via kfp.Client()
    instead of from Argo.

    Args:
      pipeline: The logical pipeline to run.
      **kwargs: Key-value pairs of runtime paramters passed to the pipeline
        execution.
    """
        client = kfp.Client(host=self._KFP_ENDPOINT)

        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(),
            tfx_image=self._CONTAINER_IMAGE)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))

        run_result = client.create_run_from_pipeline_package(
            pipeline_file=file_path, arguments=kwargs)
        run_id = run_result.run_id

        self._assert_successful_run_completion(host=self._KFP_ENDPOINT,
                                               run_id=run_id,
                                               pipeline_name=pipeline_name,
                                               timeout=self._TIME_OUT)

示例#7

0

显示文件

文件： kubeflow_runner.py 项目： EdwardCuiPeacock/node2vec_pipeline

def run(metadata_file: Optional[Text] = None):
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    metadata = get_metadata(metadata_file)
    system_config = get_config(metadata, "system_configurations")
    model_config = get_config(metadata, "model_configurations")
    # tfx_image = system_config.get("TFX_IMAGE", None)
    tfx_image = os.environ.get("KUBEFLOW_TFX_IMAGE", None)
    logging.info(f"Current tfx image used: {tfx_image}")

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=tfx_image,
        #pipeline_operator_funcs=([set_memory_request_and_limits(
        #    system_config["memory_request"], system_config["memory_limit"])]),
    )
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({
        telemetry_utils.LABEL_KFP_SDK_ENV:
        metadata["pipeline_name"] + "_" + metadata["pipeline_version"]
    })

    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=metadata["pipeline_name"] + "_" +
            metadata["pipeline_version"],
            pipeline_root=system_config["PIPELINE_ROOT"],
            query=model_config["query_script_path"],
            preprocessing_fn=system_config["preprocessing_fn"],
            run_fn=system_config["run_fn"],
            train_args=trainer_pb2.TrainArgs(splits=["train"], num_steps=100),
            eval_args=trainer_pb2.EvalArgs(splits=["train"], num_steps=50),
            model_serve_dir=system_config["MODEL_SERVE_DIR"],
            beam_pipeline_args=system_config["DATAFLOW_BEAM_PIPELINE_ARGS"],
            ai_platform_training_args=system_config[
                "GCP_AI_PLATFORM_TRAINING_ARGS"]
            if system_config["enable_gpc_ai_platform_training"] else None,
            # (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=system_config["GCP_AI_PLATFORM_SERVING_ARGS"],
            enable_cache=system_config["enable_cache"],
            system_config=system_config,  # passing config parameters downstream
            model_config=model_config,  # passing model parameters downstream
        ))

示例#8

0

显示文件

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    # tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    tfx_image = 'gcr.io/gcp-nyc/tfx-pipeline'

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            gcp_project=configs.GOOGLE_CLOUD_PROJECT,
            gcs_bucket=configs.GCS_BUCKET_NAME,
            tcga_betas_query=configs.TCGA_BETAS_QUERY,
            tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA,
            tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE,
            cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY,
            cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA,
            cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE,
            pivot_query=configs.PIVOT_DATASET_QUERY,
            pivot_output_table=configs.PIVOT_OUTPUT_TABLE,
            final_dataset_query=configs.TRAIN_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))

示例#9

0

显示文件

文件： kubeflow.py 项目： google/nitroml

def get_default_kubeflow_dag_runner():
  """Returns the default KubeflowDagRunner with its default metadata config."""

  metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
  tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
  logging.info('Using "%s" as  the docker image.', tfx_image)
  runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
      kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

  return kubeflow_dag_runner.KubeflowDagRunner(config=runner_config)

示例#10

0

显示文件

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=conf['kfp']['pipeline_name'],
            pipeline_root=conf['pipeline_root_dir'],
            data_path=conf['train_data'],
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            module_file='pjm_trainer.py',
            #   preprocessing_fn=configs.PREPROCESSING_FN,
            #   run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=conf['serving_model_dir'],
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))

示例#11

0

显示文件

文件： kubeflow_dag_runner_test.py 项目： suryaavala/tfx

    def testDefaultPipelineOperatorFuncs(self):
        kubeflow_dag_runner.KubeflowDagRunner().run(_two_step_pipeline())
        file_path = 'two_step_pipeline.tar.gz'
        self.assertTrue(fileio.exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.safe_load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

示例#12

0

显示文件

 def testPatcherWithOutputFile(self):
     output_filename = 'foo.tar.gz'
     patcher = kubeflow_dag_runner_patcher.KubeflowDagRunnerPatcher(
         call_real_run=False,
         build_image_fn=None,
         use_temporary_output_file=True)
     runner = kubeflow_dag_runner.KubeflowDagRunner(
         output_filename=output_filename)
     pipeline = tfx_pipeline.Pipeline('dummy', 'dummy_root')
     with patcher.patch() as context:
         runner.run(pipeline)
     self.assertFalse(context[patcher.USE_TEMPORARY_OUTPUT_FILE])
     self.assertEqual(os.path.basename(context[patcher.OUTPUT_FILE_PATH]),
                      output_filename)
     self.assertEqual(runner._output_filename, output_filename)

示例#13

0

显示文件

    def _compile_and_run_pipeline(self,
                                  pipeline: tfx_pipeline.Pipeline,
                                  parameters: Dict[Text, Any] = None):
        """Compiles and runs a KFP pipeline.

    Args:
      pipeline: The logical pipeline to run.
      parameters: Value of runtime paramters of the pipeline.
    """
        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(
                pipeline_name),
            tfx_image=self._container_image)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))
        tarfile.TarFile.open(file_path).extract('pipeline.yaml')
        pipeline_file = os.path.join(self._test_dir, 'pipeline.yaml')
        self.assertIsNotNone(pipeline_file)

        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_workflow, pipeline_name)
        self.addCleanup(self._delete_pipeline_output, pipeline_name)
        self.addCleanup(self._delete_pipeline_metadata, pipeline_name)

        # Run the pipeline to completion.
        self._run_workflow(pipeline_file, pipeline_name, parameters)

        # Obtain workflow logs.
        get_logs_command = [
            'argo', '--namespace', 'kubeflow', 'logs', '-w', pipeline_name
        ]
        logs_output = subprocess.check_output(get_logs_command).decode('utf-8')

        # Check if pipeline completed successfully.
        get_workflow_command = [
            'argo', '--namespace', 'kubeflow', 'get', pipeline_name
        ]
        output = subprocess.check_output(get_workflow_command).decode('utf-8')

        self.assertIsNotNone(
            re.search(r'^Status:\s+Succeeded$', output, flags=re.MULTILINE),
            'Pipeline {} failed to complete successfully:\n{}'
            '\nFailed workflow logs:\n{}'.format(pipeline_name, output,
                                                 logs_output))

示例#14

0

显示文件

  def testContainerComponent(self):
    kubeflow_dag_runner.KubeflowDagRunner().run(_container_component_pipeline())
    file_path = os.path.join(self.tmp_dir,
                             'container_component_pipeline.tar.gz')
    self.assertTrue(fileio.exists(file_path))

    with tarfile.TarFile.open(file_path).extractfile(
        'pipeline.yaml') as pipeline_file:
      self.assertIsNotNone(pipeline_file)
      pipeline = yaml.safe_load(pipeline_file)
      containers = [
          c for c in pipeline['spec']['templates'] if 'container' in c
      ]
      self.assertLen(containers, 1)
      component_args = containers[0]['container']['args']
      self.assertIn('--node_id', component_args)

示例#15

0

显示文件

文件： kubeflow_dag_runner.py 项目： wendy2003888/tfx

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    # Set the SDK type label environment.
    os.environ[kubeflow_dag_runner.SDK_ENV_LABEL] = 'tfx-template'

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))

示例#16

0

显示文件

    def _compile_and_run_pipeline(self,
                                  pipeline: tfx_pipeline.Pipeline,
                                  workflow_name: Text = None,
                                  parameters: Dict[Text, Any] = None):
        """Compiles and runs a KFP pipeline.

    Args:
      pipeline: The logical pipeline to run.
      workflow_name: The argo workflow name, default to pipeline name.
      parameters: Value of runtime paramters of the pipeline.
    """
        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(),
            tfx_image=self._CONTAINER_IMAGE)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))
        tarfile.TarFile.open(file_path).extract('pipeline.yaml')
        pipeline_file = os.path.join(self._test_dir, 'pipeline.yaml')
        self.assertIsNotNone(pipeline_file)

        workflow_name = workflow_name or pipeline_name
        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_workflow, workflow_name)
        self.addCleanup(self._delete_pipeline_metadata, pipeline_name)
        self.addCleanup(self._delete_pipeline_output, pipeline_name)

        # Run the pipeline to completion.
        self._run_workflow(pipeline_file, workflow_name, parameters)

        # Obtain workflow logs.
        get_logs_command = [
            'argo', '--namespace', 'kubeflow', 'logs', '-w', workflow_name
        ]
        logs_output = subprocess.check_output(get_logs_command).decode('utf-8')

        # Check if pipeline completed successfully.
        status = self._get_argo_pipeline_status(workflow_name)
        self.assertEqual(
            'Succeeded', status,
            'Pipeline {} failed to complete successfully: {}'
            '\nFailed workflow logs:\n{}'.format(pipeline_name, status,
                                                 logs_output))

示例#17

0

显示文件

文件： kubeflow_dag_runner.py 项目： yakmanyakmanyedatsme/BankRisk

def run():
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            bucket=BUCKET,
            csv_file=CSV_FILE,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
        ))

示例#18

0

显示文件

文件： kubeflow_dag_runner_test.py 项目： suryaavala/tfx

    def testVolumeMountingPipelineOperatorFuncs(self):
        mount_volume_op = onprem.mount_pvc('my-persistent-volume-claim',
                                           'my-volume-name',
                                           '/mnt/volume-mount-path')
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            pipeline_operator_funcs=[mount_volume_op])

        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(
            _two_step_pipeline())
        file_path = 'two_step_pipeline.tar.gz'
        self.assertTrue(fileio.exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.safe_load(pipeline_file)

            container_templates = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(container_templates))

            volumes = [{
                'name': 'my-volume-name',
                'persistentVolumeClaim': {
                    'claimName': 'my-persistent-volume-claim'
                }
            }]

            # Check that the PVC is specified for kfp<=0.1.31.1.
            if 'volumes' in pipeline['spec']:
                self.assertEqual(volumes, pipeline['spec']['volumes'])

            for template in container_templates:
                # Check that each container has the volume mounted.
                self.assertEqual([{
                    'name': 'my-volume-name',
                    'mountPath': '/mnt/volume-mount-path'
                }], template['container']['volumeMounts'])

                # Check that each template has the PVC specified for kfp>=0.1.31.2.
                if 'volumes' in template:
                    self.assertEqual(volumes, template['volumes'])

示例#19

0

显示文件

文件： kfp_timeseries_local_sin_wave.py 项目： weix0011/dataflow-sample-applications

def run():
    """Define a kubeflow pipeline."""

    # Metadata config.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-timeseries'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        timeseries_pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            enable_cache=True,
            run_fn='timeseries.encoder_decoder.encoder_decoder_run_fn.run_fn',
            preprocessing_fn=
            'timeseries.encoder_decoder.encoder_decoder_preprocessing.preprocessing_fn',
            data_path=DATA_PATH,
            pipeline_root=PIPELINE_ROOT,
            serving_model_dir=os.path.join(config.PIPELINE_ROOT, os.pathsep),
            train_args=trainer_pb2.TrainArgs(num_steps=3360),
            eval_args=trainer_pb2.EvalArgs(num_steps=56),
            beam_pipeline_args=config.GCP_DATAFLOW_ARGS,
            trainer_custom_config={
                'train_batches': 500,
                'eval_batches': 250,
                'training_example_count': 28000,
                'eval_example_count': 14000,
                'timesteps': config.MODEL_CONFIG['timesteps'],
                'number_features': 6,
                'outer_units': 16,
                'inner_units': 4
            },
            transformer_custom_config=config.MODEL_CONFIG,
        ))

示例#20

0

显示文件

文件： kubeflow_runner.py 项目： davidcavazos/experiments

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))

示例#21

0

显示文件

文件： kubeflow_runner.py 项目： MichalGasiorowski/tfx-titanic-training

    def run(self):
        """Define a pipeline and run it using KubeFlow."""

        kubeflow_dag_runner.KubeflowDagRunner(config=self.runner_config).run(
            pipeline.create_pipeline(
                pipeline_name=self.PIPELINE_NAME,
                pipeline_root=self.PIPELINE_ROOT,
                data_root_uri=self.DATA_ROOT_URI,
                trainer_config=self.trainer_config,
                tuner_config=self.tuner_config,
                pusher_config=self.pusher_config,
                runtime_parameters_config=self.runtime_parameters_config,
                str_runtime_parameters_supported=True,
                int_runtime_parameters_supported=True,
                enable_cache=self.ENABLE_CACHE,
                code_folder=self.code_folder,
                local_run=self.LOCAL_RUN,
                beam_pipeline_args=self.beam_pipeline_args))

        return self

示例#22

0

显示文件

    def testPatcher(self):
        given_image_name = 'foo/bar'
        built_image_name = 'foo/bar@sha256:1234567890'

        mock_build_image_fn = mock.MagicMock(return_value=built_image_name)
        patcher = kubeflow_dag_runner_patcher.KubeflowDagRunnerPatcher(
            call_real_run=True,
            build_image_fn=mock_build_image_fn,
            use_temporary_output_file=True)
        runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            tfx_image=given_image_name)
        runner = kubeflow_dag_runner.KubeflowDagRunner(config=runner_config)
        pipeline = tfx_pipeline.Pipeline('dummy', 'dummy_root')
        with patcher.patch() as context:
            runner.run(pipeline)
        self.assertTrue(context[patcher.USE_TEMPORARY_OUTPUT_FILE])
        self.assertIn(patcher.OUTPUT_FILE_PATH, context)

        mock_build_image_fn.assert_called_once_with(given_image_name)
        self.assertEqual(runner_config.tfx_image, built_image_name)

示例#23

0

显示文件

文件： kubeflow_dag_runner.py 项目： thewertzgroup/covid

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            #data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery.
            beam_pipeline_args=configs.BIG_QUERY_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))

示例#24

0

显示文件

def _compile_pipeline(pipeline_def, 
                     project_id,
                     pipeline_name,
                     pipeline_image,
                     pipeline_spec_path):
    """Compiles the pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
    
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
      kubeflow_metadata_config=metadata_config,
      # Specify custom docker image to use.
      # tfx_image=tfx_image
    )
    
    runner = kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config,
        output_filename=pipeline_spec_path)

    # Compile the pipeline
    runner.run(pipeline_def)

示例#25

0

显示文件

文件： kubeflow_dag_runner_test.py 项目： robertlugg/tfx

    def testVolumeMountingPipelineOperatorFuncs(self):
        mount_volume_op = onprem.mount_pvc('my-persistent-volume-claim',
                                           'my-volume-name',
                                           '/mnt/volume-mount-path')
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            pipeline_operator_funcs=[mount_volume_op])

        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(
            _two_step_pipeline())
        file_path = os.path.join(self.test_dir, 'two_step_pipeline.tar.gz')
        self.assertTrue(tf.gfile.Exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

            # Check that each container has the volume mounted.
            self.assertEqual([{
                'name': 'my-volume-name',
                'mountPath': '/mnt/volume-mount-path'
            }], containers[0]['container']['volumeMounts'])

            self.assertEqual([{
                'name': 'my-volume-name',
                'mountPath': '/mnt/volume-mount-path'
            }], containers[1]['container']['volumeMounts'])

            # Check that the PVC is specified.
            self.assertEqual([{
                'name': 'my-volume-name',
                'persistentVolumeClaim': {
                    'claimName': 'my-persistent-volume-claim'
                }
            }], pipeline['spec']['volumes'])

示例#26

0

显示文件

文件： tfx-kfp.py 项目： rakesh283343/kfp_notebook_example

def main(unused_argv):
    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image)

    if FLAGS.distributed_training:
        _ai_platform_training_args.update({
            # You can specify the machine types, the number of replicas for workers
            # and parameter servers.
            # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#ScaleTier
            'scaleTier': 'CUSTOM',
            'masterType': 'large_model',
            'workerType': 'standard',
            'parameterServerType': 'standard',
            'workerCount': 2,
            'parameterServerCount': 1
        })

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        _create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            query=_query,
            module_file=_module_file,
            beam_pipeline_args=_beam_pipeline_args,
            ai_platform_training_args=_ai_platform_training_args,
            ai_platform_serving_args=_ai_platform_serving_args,
        ))

示例#27

0

显示文件

文件： kubeflow_dag_runner_test.py 项目： suryaavala/tfx

    def testMountGcpServiceAccount(self):
        kubeflow_dag_runner.KubeflowDagRunner(
            config=kubeflow_dag_runner.KubeflowDagRunnerConfig(
                pipeline_operator_funcs=kubeflow_dag_runner.
                get_default_pipeline_operator_funcs(use_gcp_sa=True))).run(
                    _two_step_pipeline())
        file_path = 'two_step_pipeline.tar.gz'
        self.assertTrue(fileio.exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.safe_load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

            # Check that each container has default GCP credentials.

            container_0 = containers[0]
            env = [
                env for env in container_0['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])

            container_1 = containers[0]
            env = [
                env for env in container_1['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])

示例#28

0

显示文件

文件： titanic_keras_kfp.py 项目： wendy2003888/kagglepipe

def main(unused_argv):
    serving_model_dir = os.path.join(FLAGS.project_root, 'serving_model',
                                     FLAGS.pipeline_name)

    module_file = os.path.join(FLAGS.project_root, 'titanic_keras_utils.py')
    # Root directory to store pipeline artifacts.
    pipeline_root = os.path.join(FLAGS.project_root, 'pipeline')
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image,
        pipeline_operator_funcs=(
            # If running on K8s Engine (GKE) on Google Cloud Platform (GCP),
            # kubeflow_dag_runner.get_default_pipeline_operator_funcs() provides
            # default configurations specifically for GKE on GCP, such as secrets.
            [
                onprem.mount_pvc(_persistent_volume_claim, _persistent_volume,
                                 _persistent_volume_mount)
            ]))

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_tfx_pipeline(
            pipeline_name=FLAGS.pipeline_name,
            pipeline_root=pipeline_root,
            data_root=FLAGS.data_root,
            module_file=module_file,
            serving_model_dir=serving_model_dir,
            # 0 means auto-detect based on on the number of CPUs available during
            # execution time.
            direct_num_workers=0))

示例#29

0

显示文件

def main(_):
    pipeline = generate_pipeline(flags.FLAGS.pipeline_name,
                                 flags.FLAGS.pipeline_root,
                                 flags.FLAGS.train_data, flags.FLAGS.test_data,
                                 flags.FLAGS.train_steps,
                                 flags.FLAGS.eval_steps,
                                 flags.FLAGS.pusher_target, flags.FLAGS.runner)

    if flags.FLAGS.runner == 'local':
        BeamDagRunner().run(pipeline)
    #elif flags.FLAGS.runner == 'flink':
    # need to slightly change TFX codes to support other Beam-runners
    # BeamDagRunner(pipelineOptions).run(pipeline)
    elif flags.FLAGS.runner == 'kubeflow':
        metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
        )
        tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
        runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
        kubeflow_dag_runner.KubeflowDagRunner(
            config=runner_config).run(pipeline)
    else:
        exit(1)

示例#30

0

显示文件

文件： kubeflow_dag_runner.py 项目： Shubhangiwaghmare7/Data-Science

def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            query=configs.BIG_QUERY_QUERY,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))