vertex_training_custom_config=vertex_training_custom_config, serving_model_dir=serving_model_dir, # ai_platform_serving_args=ai_platform_serving_args ) p = pipeline.Pipeline( pipeline_name=pipeline_name, pipeline_root=pipeline_root, components=components, beam_pipeline_args=beam_pipeline_args, ) # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. tfx_image = os.environ.get( "KUBEFLOW_TFX_IMAGE", "gcr.io/oreilly-book/ml-pipelines-tfx-custom:latest", ) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, # Specify custom docker image to use. tfx_image=tfx_image, )
def main(argv): del argv beam_pipeline_args = [ '--direct_running_mode=multi_processing', # 0 means auto-detect based on on the number of CPUs available # during execution time. '--direct_num_workers=0' ] metadata_connection_config = None data_root_uri = data_types.RuntimeParameter( name='data-root-uri', ptype=str, default=FLAGS.data_root_uri) eval_split_name = data_types.RuntimeParameter( name='eval-split-name', ptype=str, default='eval' ) #output_config = example_gen_pb2.Output( # split_config=example_gen_pb2.SplitConfig(splits=[ # example_gen_pb2.SplitConfig.Split(name=eval_split_name, hash_buckets=4), # example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=1)])) output_config = { "split_config": { "splits": [ { "name": "train", "hash_buckets": 4 }, { "name": eval_split_name, "hash_buckets": 1 } ] } } # Create the pipeline pipeline_def = pipeline.create_pipeline( pipeline_name=FLAGS.pipeline_name, pipeline_root=FLAGS.pipeline_root, data_root_uri=data_root_uri, output_config=output_config, beam_pipeline_args=beam_pipeline_args, metadata_connection_config=metadata_connection_config) logging.info(f'Compiling pipeline to: {FLAGS.pipeline_spec_path}') metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config() runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, # Specify custom docker image to use. # tfx_image=tfx_image ) runner = kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, output_filename=FLAGS.pipeline_spec_path) runner.run(pipeline_def)
def _get_kubeflow_metadata_config( self) -> kubeflow_pb2.KubeflowMetadataConfig: config = kubeflow_dag_runner.get_default_kubeflow_metadata_config() return config
def _get_kubeflow_metadata_config( self, pipeline_name: Text) -> kubeflow_pb2.KubeflowMetadataConfig: config = kubeflow_dag_runner.get_default_kubeflow_metadata_config() # Overwrite the DB name. config.mysql_db_name.value = self._get_mlmd_db_name(pipeline_name) return config