def run():
    """Define a kubeflow pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'advert-pred'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels).run(
            pipeline.create_pipeline(
                pipeline_name=PIPELINE_NAME,
                pipeline_root=PIPELINE_ROOT,
                data_path=DATA_PATH,
                preprocessing_fn=PREPROCESSING_FN,
                run_fn=RUN_FN,
                train_args=trainer_pb2.TrainArgs(num_steps=TRAIN_NUM_STEPS),
                eval_args=trainer_pb2.EvalArgs(num_steps=EVAL_NUM_STEPS),
                eval_accuracy_threshold=EVAL_ACCURACY_THRESHOLD,
                serving_model_dir=SERVING_MODEL_DIR,
            ))
def run(metadata_file: Optional[Text] = None):
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    metadata = get_metadata(metadata_file)
    system_config = get_config(metadata, "system_configurations")
    model_config = get_config(metadata, "model_configurations")
    # tfx_image = system_config.get("TFX_IMAGE", None)
    tfx_image = os.environ.get("KUBEFLOW_TFX_IMAGE", None)
    logging.info(f"Current tfx image used: {tfx_image}")

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=tfx_image,
        #pipeline_operator_funcs=([set_memory_request_and_limits(
        #    system_config["memory_request"], system_config["memory_limit"])]),
    )
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({
        telemetry_utils.LABEL_KFP_SDK_ENV:
        metadata["pipeline_name"] + "_" + metadata["pipeline_version"]
    })

    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=metadata["pipeline_name"] + "_" +
            metadata["pipeline_version"],
            pipeline_root=system_config["PIPELINE_ROOT"],
            query=model_config["query_script_path"],
            preprocessing_fn=system_config["preprocessing_fn"],
            run_fn=system_config["run_fn"],
            train_args=trainer_pb2.TrainArgs(splits=["train"], num_steps=100),
            eval_args=trainer_pb2.EvalArgs(splits=["train"], num_steps=50),
            model_serve_dir=system_config["MODEL_SERVE_DIR"],
            beam_pipeline_args=system_config["DATAFLOW_BEAM_PIPELINE_ARGS"],
            ai_platform_training_args=system_config[
                "GCP_AI_PLATFORM_TRAINING_ARGS"]
            if system_config["enable_gpc_ai_platform_training"] else None,
            # (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=system_config["GCP_AI_PLATFORM_SERVING_ARGS"],
            enable_cache=system_config["enable_cache"],
            system_config=system_config,  # passing config parameters downstream
            model_config=model_config,  # passing model parameters downstream
        ))
Exemplo n.º 3
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    # tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    tfx_image = 'gcr.io/gcp-nyc/tfx-pipeline'

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            gcp_project=configs.GOOGLE_CLOUD_PROJECT,
            gcs_bucket=configs.GCS_BUCKET_NAME,
            tcga_betas_query=configs.TCGA_BETAS_QUERY,
            tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA,
            tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE,
            cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY,
            cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA,
            cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE,
            pivot_query=configs.PIVOT_DATASET_QUERY,
            pivot_output_table=configs.PIVOT_OUTPUT_TABLE,
            final_dataset_query=configs.TRAIN_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemplo n.º 4
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=conf['kfp']['pipeline_name'],
            pipeline_root=conf['pipeline_root_dir'],
            data_path=conf['train_data'],
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            module_file='pjm_trainer.py',
            #   preprocessing_fn=configs.PREPROCESSING_FN,
            #   run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=conf['serving_model_dir'],
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemplo n.º 5
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))
def run():
    """Define a kubeflow pipeline."""

    # Metadata config.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-timeseries'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        timeseries_pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            enable_cache=True,
            run_fn='timeseries.encoder_decoder.encoder_decoder_run_fn.run_fn',
            preprocessing_fn=
            'timeseries.encoder_decoder.encoder_decoder_preprocessing.preprocessing_fn',
            data_path=DATA_PATH,
            pipeline_root=PIPELINE_ROOT,
            serving_model_dir=os.path.join(config.PIPELINE_ROOT, os.pathsep),
            train_args=trainer_pb2.TrainArgs(num_steps=3360),
            eval_args=trainer_pb2.EvalArgs(num_steps=56),
            beam_pipeline_args=config.GCP_DATAFLOW_ARGS,
            trainer_custom_config={
                'train_batches': 500,
                'eval_batches': 250,
                'training_example_count': 28000,
                'eval_example_count': 14000,
                'timesteps': config.MODEL_CONFIG['timesteps'],
                'number_features': 6,
                'outer_units': 16,
                'inner_units': 4
            },
            transformer_custom_config=config.MODEL_CONFIG,
        ))
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            query=configs.BIG_QUERY_QUERY,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemplo n.º 8
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=configs.PIPELINE_IMAGE)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))