Пример #1
0
def run():
    """Define a pipeline to be executed using Kubeflow V2 runner."""

    runner_config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig(
        default_image=configs.PIPELINE_IMAGE)

    dsl_pipeline = pipeline.create_pipeline(
        pipeline_name=configs.PIPELINE_NAME,
        pipeline_root=_PIPELINE_ROOT,
        data_path=_DATA_PATH,
        # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
        # query=configs.BIG_QUERY_QUERY,
        preprocessing_fn=configs.PREPROCESSING_FN,
        run_fn=configs.RUN_FN,
        train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
        eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
        eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
        serving_model_dir=_SERVING_MODEL_DIR,
        # TODO(step 7): (Optional) Uncomment here to use provide GCP related
        #               config for BigQuery with Beam DirectRunner.
        # beam_pipeline_args=configs.
        # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        # TODO(step 8): (Optional) Uncomment below to use Dataflow.
        # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
        # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
        # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
        # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
        # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
    )

    runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner(config=runner_config)

    runner.run(pipeline=dsl_pipeline)
Пример #2
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Пример #3
0
def run():
  """Define a pipeline to be executed using Kubeflow V2 runner."""
  # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
  # pipeline DSL file, instead of using environment vars.
  tfx_image = os.environ.get(labels.TFX_IMAGE_ENV)
  project_id = os.environ.get(labels.GCP_PROJECT_ID_ENV)
  api_key = os.environ.get(labels.API_KEY_ENV)

  runner_config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig(
      project_id=project_id,
      display_name='tfx-kubeflow-v2-pipeline-{}'.format(configs.PIPELINE_NAME),
      default_image=tfx_image)

  dsl_pipeline = pipeline.create_pipeline(
      pipeline_name=configs.PIPELINE_NAME,
      pipeline_root=_PIPELINE_ROOT,
      data_path=_DATA_PATH,
      # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
      # query=configs.BIG_QUERY_QUERY,
      preprocessing_fn=configs.PREPROCESSING_FN,
      run_fn=configs.RUN_FN,
      train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
      eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
      eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
      serving_model_dir=_SERVING_MODEL_DIR,
      # TODO(step 7): (Optional) Uncomment here to use provide GCP related
      #               config for BigQuery with Beam DirectRunner.
      # beam_pipeline_args=configs.
      # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
      # TODO(step 8): (Optional) Uncomment below to use Dataflow.
      # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
      # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
      # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
      # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
      # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
  )

  runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner(
      config=runner_config)

  if os.environ.get(labels.RUN_FLAG_ENV, False):
    # Only trigger the execution when invoked by 'run' command.
    runner.run(
        pipeline=dsl_pipeline, api_key=api_key)
  else:
    runner.compile(pipeline=dsl_pipeline, write_out=True)
Пример #4
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = tfx.orchestration.experimental.get_default_kubeflow_metadata_config(
    )

    runner_config = tfx.orchestration.experimental.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=configs.PIPELINE_IMAGE)
    pod_labels = {
        'add-pod-env': 'true',
        tfx.orchestration.experimental.LABEL_KFP_SDK_ENV: 'tfx-template'
    }
    tfx.orchestration.experimental.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            # TODO(step 5): (Optional) Set the path of the customized schema.
            # schema_path=generated_schema_path,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=tfx.proto.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=tfx.proto.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Пример #5
0
def run():
    """Define a local pipeline."""

    tfx.orchestration.LocalDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=tfx.proto.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=tfx.proto.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment here to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            metadata_connection_config=tfx.orchestration.metadata.
            sqlite_metadata_connection_config(METADATA_PATH)))