def run(): """Define a kubeflow pipeline.""" metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'advert-pred'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels).run( pipeline.create_pipeline( pipeline_name=PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, preprocessing_fn=PREPROCESSING_FN, run_fn=RUN_FN, train_args=trainer_pb2.TrainArgs(num_steps=TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=EVAL_NUM_STEPS), eval_accuracy_threshold=EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, ))
def run(metadata_file: Optional[Text] = None): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. metadata = get_metadata(metadata_file) system_config = get_config(metadata, "system_configurations") model_config = get_config(metadata, "model_configurations") # tfx_image = system_config.get("TFX_IMAGE", None) tfx_image = os.environ.get("KUBEFLOW_TFX_IMAGE", None) logging.info(f"Current tfx image used: {tfx_image}") runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image, #pipeline_operator_funcs=([set_memory_request_and_limits( # system_config["memory_request"], system_config["memory_limit"])]), ) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({ telemetry_utils.LABEL_KFP_SDK_ENV: metadata["pipeline_name"] + "_" + metadata["pipeline_version"] }) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=metadata["pipeline_name"] + "_" + metadata["pipeline_version"], pipeline_root=system_config["PIPELINE_ROOT"], query=model_config["query_script_path"], preprocessing_fn=system_config["preprocessing_fn"], run_fn=system_config["run_fn"], train_args=trainer_pb2.TrainArgs(splits=["train"], num_steps=100), eval_args=trainer_pb2.EvalArgs(splits=["train"], num_steps=50), model_serve_dir=system_config["MODEL_SERVE_DIR"], beam_pipeline_args=system_config["DATAFLOW_BEAM_PIPELINE_ARGS"], ai_platform_training_args=system_config[ "GCP_AI_PLATFORM_TRAINING_ARGS"] if system_config["enable_gpc_ai_platform_training"] else None, # (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=system_config["GCP_AI_PLATFORM_SERVING_ARGS"], enable_cache=system_config["enable_cache"], system_config=system_config, # passing config parameters downstream model_config=model_config, # passing model parameters downstream ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. # tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) tfx_image = 'gcr.io/gcp-nyc/tfx-pipeline' runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, gcp_project=configs.GOOGLE_CLOUD_PROJECT, gcs_bucket=configs.GCS_BUCKET_NAME, tcga_betas_query=configs.TCGA_BETAS_QUERY, tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA, tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE, cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY, cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA, cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE, pivot_query=configs.PIVOT_DATASET_QUERY, pivot_output_table=configs.PIVOT_OUTPUT_TABLE, final_dataset_query=configs.TRAIN_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, beam_pipeline_args=configs. BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels().update( {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=conf['kfp']['pipeline_name'], pipeline_root=conf['pipeline_root_dir'], data_path=conf['train_data'], # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, module_file='pjm_trainer.py', # preprocessing_fn=configs.PREPROCESSING_FN, # run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=conf['serving_model_dir'], # TODO(step 7): (Optional) Uncomment below to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner. # beam_pipeline_args=configs. # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-timeseries'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( timeseries_pipeline.create_pipeline( pipeline_name=config.PIPELINE_NAME, enable_cache=True, run_fn='timeseries.encoder_decoder.encoder_decoder_run_fn.run_fn', preprocessing_fn= 'timeseries.encoder_decoder.encoder_decoder_preprocessing.preprocessing_fn', data_path=DATA_PATH, pipeline_root=PIPELINE_ROOT, serving_model_dir=os.path.join(config.PIPELINE_ROOT, os.pathsep), train_args=trainer_pb2.TrainArgs(num_steps=3360), eval_args=trainer_pb2.EvalArgs(num_steps=56), beam_pipeline_args=config.GCP_DATAFLOW_ARGS, trainer_custom_config={ 'train_batches': 500, 'eval_batches': 250, 'training_example_count': 28000, 'eval_example_count': 14000, 'timesteps': config.MODEL_CONFIG['timesteps'], 'number_features': 6, 'outer_units': 16, 'inner_units': 4 }, transformer_custom_config=config.MODEL_CONFIG, ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels().update( {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, query=configs.BIG_QUERY_QUERY, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), serving_model_dir=SERVING_MODEL_DIR, beam_pipeline_args=configs. BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ))
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=configs.PIPELINE_IMAGE) pod_labels = kubeflow_dag_runner.get_default_pod_labels() pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner. # beam_pipeline_args=configs. # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, ))