def resnet_train( project_id, output, region='us-central1', model='bolts', version='beta1', tf_version='1.9', train_csv='gs://bolts_image_dataset/bolt_images_train.csv', validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv', labels='gs://bolts_image_dataset/labels.txt', depth=50, train_batch_size=1024, eval_batch_size=1024, steps_per_eval=250, train_steps=10000, num_train_images=218593, num_eval_images=54648, num_label_classes=10): preprocess = resnet_preprocess_op(project_id, output, train_csv, validation_csv, labels).apply(gcp.use_gcp_secret()) train = resnet_train_op(preprocess.output, output, region, depth, train_batch_size, eval_batch_size, steps_per_eval, train_steps, num_train_images, num_eval_images, num_label_classes, tf_version).apply(gcp.use_gcp_secret()) deploy = resnet_deploy_op(train.output, model, version, project_id, region, tf_version).apply(gcp.use_gcp_secret())
def kubeflow_training( output, project, evaluation='gs://ml-pipeline-playground/flower/eval100.csv', train='gs://ml-pipeline-playground/flower/train200.csv', schema='gs://ml-pipeline-playground/flower/schema.json', learning_rate=0.1, hidden_layer_size='100,50', steps=2000, target='label', workers=0, pss=0, preprocess_mode='local', predict_mode='local'): # TODO: use the argo job name as the workflow workflow = '{{workflow.name}}' preprocess = dataflow_tf_transform_op( train, evaluation, schema, project, preprocess_mode, '', '%s/%s/transformed' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) training = kubeflow_tf_training_op( preprocess.output, schema, learning_rate, hidden_layer_size, steps, target, '', '%s/%s/train' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) prediction = dataflow_tf_predict_op( evaluation, schema, target, training.output, predict_mode, project, '%s/%s/predict' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix = confusion_matrix_op( prediction.output, '%s/%s/confusionmatrix' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa'))
def kubeflow_training( data_file='gs://images_pama/txt/class_ch.txt', image_data_pack='gs://images_pama/center_housing.tar.gz', parser='simple', skip=False, num_epochs='2000', gcs_weight_path='gs://images_pama/model/class_ch_model_frcnn.hdf5', number_of_rois='32', network='resnet50', prediction_dir='gs://images_pama/config.pickle', ): # set the flag to use GPU trainer use_gpu = True training = kubeflow_tf_training_op( training_data_file=data_file, training_image_pack=image_data_pack, parser=parser, skip=skip, num_epochs=num_epochs, gcs_weight_path=gcs_weight_path ).apply(gcp.use_gcp_secret('user-gcp-sa')) validation = kubeflow_tf_validation_op( validation_data_file=data_file, number_of_rois=number_of_rois, network=network, prediction_dir=prediction_dir, ).apply(gcp.use_gcp_secret('user-gcp-sa'))
def kaggle_houseprice(bucket_name: str, commit_sha: str): import os stepDownloadData = dsl.ContainerOp( name='download dataset', image=os.path.join(args.gcr_address, 'kaggle_download:latest'), command=['python', 'download_data.py'], arguments=["--bucket_name", bucket_name], file_outputs={ 'train_dataset': '/train.txt', 'test_dataset': '/test.txt' }).apply(use_gcp_secret('user-gcp-sa')) stepVisualizeTable = dsl.ContainerOp( name='visualize dataset in table', image=os.path.join(args.gcr_address, 'kaggle_visualize_table:latest'), command=['python', 'visualize.py'], arguments=[ '--train_file_path', '%s' % stepDownloadData.outputs['train_dataset'] ], output_artifact_paths={ 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).apply(use_gcp_secret('user-gcp-sa')) stepVisualizeHTML = dsl.ContainerOp( name='visualize dataset in html', image=os.path.join(args.gcr_address, 'kaggle_visualize_html:latest'), command=['python', 'visualize.py'], arguments=[ '--train_file_path', '%s' % stepDownloadData.outputs['train_dataset'], '--commit_sha', commit_sha, '--bucket_name', bucket_name ], output_artifact_paths={ 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).apply(use_gcp_secret('user-gcp-sa')) stepTrainModel = dsl.ContainerOp( name='train model', image=os.path.join(args.gcr_address, 'kaggle_train:latest'), command=['python', 'train.py'], arguments=[ '--train_file', '%s' % stepDownloadData.outputs['train_dataset'], '--test_file', '%s' % stepDownloadData.outputs['test_dataset'], '--output_bucket', bucket_name ], file_outputs={ 'result': '/result_path.txt' }).apply(use_gcp_secret('user-gcp-sa')) stepSubmitResult = dsl.ContainerOp( name='submit result to kaggle competition', image=os.path.join(args.gcr_address, 'kaggle_submit:latest'), command=['python', 'submit_result.py'], arguments=[ '--result_file', '%s' % stepTrainModel.outputs['result'], '--submit_message', 'submit' ]).apply(use_gcp_secret('user-gcp-sa'))
def train_and_deploy_helper(preprocess, hparam_train): """Helper function called from the two pipeline functions""" # Step 3: Train the model some more, but on the pipelines cluster itself train_tuned = dsl.ContainerOp( name='traintuned', # image needs to be a compile-time string image= 'gcr.io/ai-analytics-solutions/babyweight-pipeline-traintuned:latest', arguments=[ hparam_train.outputs['jobname'], preprocess.outputs['bucket'] ], file_outputs={ 'train': '/output.txt' }).apply(use_gcp_secret('user-gcp-sa')) train_tuned.set_memory_request('2G') train_tuned.set_cpu_request('1') # Step 4: Deploy the trained model to Cloud ML Engine deploy_cmle = dsl.ContainerOp( name='deploycmle', # image needs to be a compile-time string image= 'gcr.io/ai-analytics-solutions/babyweight-pipeline-deploycmle:latest', arguments=[ train_tuned.outputs['train'], # modeldir 'babyweight', 'mlp' ], file_outputs={ 'model': '/model.txt', 'version': '/version.txt' }).apply(use_gcp_secret('user-gcp-sa')) return deploy_cmle
def resnet_train( project_id, output, region='us-central1', model='bolts', version='beta1', tf_version='1.12', train_csv='gs://bolts_image_dataset/bolt_images_train.csv', validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv', labels='gs://bolts_image_dataset/labels.txt', depth=50, train_batch_size=1024, eval_batch_size=1024, steps_per_eval=250, train_steps=10000, num_train_images=218593, num_eval_images=54648, num_label_classes=10): output_dir = os.path.join(str(output), '{{workflow.name}}') preprocess_staging = os.path.join(output_dir, 'staging') preprocess_output = os.path.join(output_dir, 'preprocessed_output') train_output = os.path.join(output_dir, 'model') preprocess = resnet_preprocess_op( project_id, preprocess_output, preprocess_staging, train_csv, validation_csv, labels, train_batch_size, eval_batch_size).apply(gcp.use_gcp_secret()) train = resnet_train_op(project_id, preprocess_output, train_output, region, depth, train_batch_size, eval_batch_size, steps_per_eval, train_steps, num_train_images, num_eval_images, num_label_classes, tf_version).apply(gcp.use_gcp_secret()) train.after(preprocess) export_output = os.path.join(str(train.outputs['job_dir']), 'export') deploy = resnet_deploy_op(export_output, model, version, project_id, region, tf_version).apply(gcp.use_gcp_secret())
def kubeflow_training( output, project, evaluation='gs://ml-pipeline-playground/flower/eval100.csv', train='gs://ml-pipeline-playground/flower/train200.csv', schema='gs://ml-pipeline-playground/flower/schema.json', learning_rate=0.1, hidden_layer_size='100,50', steps=2000, target='label', workers=0, pss=0, preprocess_mode='local', predict_mode='local', optimizer=''): output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' # set the flag to use GPU trainer use_gpu = False preprocess = dataflow_tf_transform_op( training_data_file_pattern=train, evaluation_data_file_pattern=evaluation, schema=schema, gcp_project=project, run_mode=preprocess_mode, preprocessing_module='', transformed_data_dir=output_template).apply( gcp.use_gcp_secret('user-gcp-sa')) training = kubeflow_tf_training_op( transformed_data_dir=preprocess.output, schema=schema, learning_rate=learning_rate, hidden_layer_size=hidden_layer_size, steps=steps, target=target, preprocessing_module='', optimizer='', training_output_dir=output_template).apply( gcp.use_gcp_secret('user-gcp-sa')) if use_gpu: training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:fe639f41661d8e17fcda64ff8242127620b80ba0', training.set_gpu_limit(1) prediction = dataflow_tf_predict_op(data_file_pattern=evaluation, schema=schema, target_column=target, model=training.output, run_mode=predict_mode, gcp_project=project, batch_size='', predictions_dir=output_template).apply( gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix = confusion_matrix_op( predictions=prediction.output, target_lambda='', output_dir=output_template).apply(gcp.use_gcp_secret('user-gcp-sa'))
def xgb_train_pipeline( output, project, region='us-central1', train_data='gs://ml-pipeline-playground/sfpd/train.csv', eval_data='gs://ml-pipeline-playground/sfpd/eval.csv', schema='gs://ml-pipeline-playground/sfpd/schema.json', target='resolution', rounds=200, workers=2, true_label='ACTION', ): delete_cluster_op = DeleteClusterOp('delete-cluster', project, region).apply(gcp.use_gcp_secret('user-gcp-sa')) with dsl.ExitHandler(exit_op=delete_cluster_op): create_cluster_op = CreateClusterOp('create-cluster', project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa')) analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema, train_data, '%s/{{workflow.name}}/analysis' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) transform_op = TransformOp('transform', project, region, create_cluster_op.output, train_data, eval_data, target, analyze_op.output, '%s/{{workflow.name}}/transform' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'], transform_op.outputs['eval'], target, analyze_op.output, workers, rounds, '%s/{{workflow.name}}/model' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'], train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix_op = ConfusionMatrixOp('confusion-matrix', predict_op.output, '%s/{{workflow.name}}/confusionmatrix' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))
def deploy(modeldir): deploy_cmle = dsl.ContainerOp( name='deploycmle', # image needs to be a compile-time string image= 'gcr.io/tenacious-camp-267214/babyweight-pipeline-deploycmle:latest', arguments=[ modeldir, # modeldir 'babyweight', 'mlp' ], file_outputs={ 'model': '/model.txt', 'version': '/version.txt' }).apply(use_gcp_secret('user-gcp-sa')) deploy_cmle.execution_options.caching_strategy.max_cache_staleness = "P0D" deploy_app = dsl.ContainerOp( name='deployapp', # image needs to be a compile-time string image= 'gcr.io/tenacious-camp-267214/babyweight-pipeline-deployapp:latest', arguments=[ deploy_cmle.outputs['model'], deploy_cmle.outputs['version'] ], file_outputs={ 'appurl': '/appurl.txt' }).apply(use_gcp_secret('user-gcp-sa')) deploy_app.execution_options.caching_strategy.max_cache_staleness = "P0D"
def automl1( #pylint: disable=unused-argument # There's now a more succinct way to define the pipeline params project_id: dsl.PipelineParam = dsl.PipelineParam(name='project-id', value='YOUR_PROJECT_HERE'), compute_region: dsl.PipelineParam = dsl.PipelineParam(name='compute-region', value='YOUR_REGION_HERE'), dataset_name: dsl.PipelineParam = dsl.PipelineParam(name='dataset-name', value='YOUR_DATASETNAME_HERE'), model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name', value='YOUR_MODELNAME_HERE'), csv_path: dsl.PipelineParam = dsl.PipelineParam(name='csv-path', value='YOUR_DATASET_PATH') ): dataset = dsl.ContainerOp( name='dataset', image='gcr.io/google-samples/automl-pipeline', arguments=["--project_id", project_id, "--operation", DATASET_OP, "--compute_region", compute_region, "--dataset_name", dataset_name, "--csv_path", csv_path], file_outputs={'dataset_id': '/dataset_id.txt', 'csv_path': '/csv_path.txt'} ).apply(gcp.use_gcp_secret('user-gcp-sa')) model = dsl.ContainerOp( name='model', image='gcr.io/google-samples/automl-pipeline', arguments=["--project_id", project_id, "--operation", MODEL_OP, "--compute_region", compute_region, "--model_name", model_name, "--csv_path", dataset.outputs['csv_path'], "--dataset_id", dataset.outputs['dataset_id']] ).apply(gcp.use_gcp_secret('user-gcp-sa')) model.after(dataset)
def gh_summ( #pylint: disable=unused-argument train_steps: 'Integer' = 2019300, project: String = 'YOUR_PROJECT_HERE', github_token: String = 'YOUR_GITHUB_TOKEN_HERE', working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', checkpoint_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', deploy_webapp: String = 'true', data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' ): copydata = copydata_op( data_dir=data_dir, checkpoint_dir=checkpoint_dir, model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER), action=COPY_ACTION, ).apply(gcp.use_gcp_secret('user-gcp-sa')) log_dataset = metadata_log_op(log_type=DATASET, workspace_name=WORKSPACE_NAME, run_name=dsl.RUN_ID_PLACEHOLDER, data_uri=data_dir) train = train_op(data_dir=data_dir, model_dir=copydata.outputs['copy_output_path'], action=TRAIN_ACTION, train_steps=train_steps, deploy_webapp=deploy_webapp).apply( gcp.use_gcp_secret('user-gcp-sa')) log_model = metadata_log_op(log_type=MODEL, workspace_name=WORKSPACE_NAME, run_name=dsl.RUN_ID_PLACEHOLDER, model_uri=train.outputs['train_output_path']) serve = dsl.ContainerOp( name='serve', image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v2', arguments=[ "--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--model_path", train.outputs['train_output_path'] ]).apply(gcp.use_gcp_secret('user-gcp-sa')) log_dataset.after(copydata) log_model.after(train) train.set_gpu_limit(1) train.set_memory_limit('48G') with dsl.Condition(train.outputs['launch_server'] == 'true'): webapp = dsl.ContainerOp( name='webapp', image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v3ap', arguments=[ "--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--github_token", github_token ]) webapp.after(serve)
def kubeflow_training( output, project, evaluation='gs://ml-pipeline-playground/flower/eval100.csv', train='gs://ml-pipeline-playground/flower/train200.csv', schema='gs://ml-pipeline-playground/flower/schema.json', learning_rate=0.1, hidden_layer_size='100,50', steps=2000, target='label', workers=0, pss=0, preprocess_mode='local', predict_mode='local'): # TODO: use the argo job name as the workflow workflow = '{{workflow.name}}' # set the flag to use GPU trainer use_gpu = False preprocess = dataflow_tf_transform_op( training_data_file_pattern=train, evaluation_data_file_pattern=evaluation, schema=schema, gcp_project=project, run_mode=preprocess_mode, preprocessing_module='', transformed_data_dir='%s/%s/transformed' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) training = kubeflow_tf_training_op(transformed_data_dir=preprocess.output, schema=schema, learning_rate=learning_rate, hidden_layer_size=hidden_layer_size, steps=steps, target=target, preprocessing_module='', training_output_dir='%s/%s/train' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) if use_gpu: training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:a277f87ea1d4707bf860d080d06639b7caf9a1cf', training.set_gpu_limit(1) prediction = dataflow_tf_predict_op(data_file_pattern=evaluation, schema=schema, target_column=target, model=training.output, run_mode=predict_mode, gcp_project=project, predictions_dir='%s/%s/predict' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix = confusion_matrix_op( predictions=prediction.output, output_dir='%s/%s/confusionmatrix' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa'))
def chicago_taxi_pipeline(): gs_download_training_data_in_csv = chicago_taxi_dataset_op( gcs_path='gs://kf-demo-data-bucket/taxi_data.csv' ).apply(use_gcp_secret('user-gcp-sa')).output training_data_for_regression_in_csv = pandas_transform_csv_op( table=gs_download_training_data_in_csv, transform_code='''df.insert(0, "was_tipped", df["tips"] > 0); del df["tips"]''', ).output regression_data_visualization = visualization_op( train_file_path=training_data_for_regression_in_csv ).apply(use_gcp_secret('user-gcp-sa'))
def taxi_cab_classification( output, project, column_names='gs://ml-pipeline-playground/tfx/taxi-cab-classification/column-names.json', key_columns='trip_start_timestamp', train='gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv', evaluation='gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv', mode='local', preprocess_module='gs://ml-pipeline-playground/tfx/taxi-cab-classification/preprocessing.py', learning_rate=0.1, hidden_layer_size='1500', steps=3000, analyze_slice_column='trip_start_hour'): tf_server_name = 'taxi-cab-classification-model-{{workflow.uid}}' validation = dataflow_tf_data_validation_op(train, evaluation, column_names, key_columns, project, mode, output ).apply(gcp.use_gcp_secret('user-gcp-sa')) preprocess = dataflow_tf_transform_op(train, evaluation, validation.outputs['schema'], project, mode, preprocess_module, output ).apply(gcp.use_gcp_secret('user-gcp-sa')) training = tf_train_op(preprocess.output, validation.outputs['schema'], learning_rate, hidden_layer_size, steps, 'tips', preprocess_module, output ).apply(gcp.use_gcp_secret('user-gcp-sa')) analysis = dataflow_tf_model_analyze_op(training.output, evaluation, validation.outputs['schema'], project, mode, analyze_slice_column, output ).apply(gcp.use_gcp_secret('user-gcp-sa')) prediction = dataflow_tf_predict_op(evaluation, validation.outputs['schema'], 'tips', training.output, mode, project, output ).apply(gcp.use_gcp_secret('user-gcp-sa')) cm = confusion_matrix_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa')) roc = roc_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa')) deploy = kubeflow_deploy_op(training.output, tf_server_name).apply(gcp.use_gcp_secret('user-gcp-sa'))
def preprocess_train_deploy( bucket: str = '<bucket>', cutoff_year: str = '2010', tag: str = '4', model: str = 'DeepModel' ): """Pipeline to train financial time series model""" preprocess_op = Preprocess('preprocess', bucket, cutoff_year).apply( gcp.use_gcp_secret('user-gcp-sa')) #pylint: disable=unused-variable train_op = Train('train', preprocess_op.output, tag, bucket, model).apply(gcp.use_gcp_secret('user-gcp-sa')) with dsl.Condition(train_op.outputs['accuracy'] > 0.7): deploy_op = Deploy('deploy', tag, bucket).apply(gcp.use_gcp_secret('user-gcp-sa'))
def mnist_pipeline( storage_bucket: str, output_path: str, ): import os train_op = components.load_component_from_file('./train/component.yaml') train_step = train_op(storage_bucket=storage_bucket).apply( use_gcp_secret('user-gcp-sa')) visualize_op = components.load_component_from_file( './tensorboard/component.yaml') visualize_step = visualize_op(logdir='%s' % train_step.outputs['logdir'], output_path=output_path).apply( use_gcp_secret('user-gcp-sa'))
def preprocess_train_and_deploy( project='ai-analytics-solutions', bucket='ai-analytics-solutions-kfpdemo', start_year='2000' ): """End-to-end Pipeline to train and deploy babyweight model""" # Step 1: create training dataset using Apache Beam on Cloud Dataflow preprocess = dsl.ContainerOp( name='preprocess', # image needs to be a compile-time string image='gcr.io/ai-analytics-solutions/babyweight-pipeline-bqtocsv:latest', arguments=[ '--project', project, '--mode', 'cloud', '--bucket', bucket, '--start_year', start_year ], file_outputs={'bucket': '/output.txt'} ).apply(use_gcp_secret('user-gcp-sa')) # Step 2: Do hyperparameter tuning of the model on Cloud ML Engine hparam_train = dsl.ContainerOp( name='hypertrain', # image needs to be a compile-time string image='gcr.io/ai-analytics-solutions/babyweight-pipeline-hypertrain:latest', arguments=[ preprocess.outputs['bucket'] ], file_outputs={'jobname': '/output.txt'} ).apply(use_gcp_secret('user-gcp-sa')) # core ML part of pipeline deploy_cmle = train_and_deploy_helper(preprocess, hparam_train, True) # Step 5: Deploy web app deploy_app = dsl.ContainerOp( name='deployapp', # image needs to be a compile-time string image='gcr.io/ai-analytics-solutions/babyweight-pipeline-deployapp:latest', arguments=[ deploy_cmle.outputs['model'], deploy_cmle.outputs['version'] ], file_outputs={ 'appurl': '/appurl.txt' } ).apply(use_gcp_secret('user-gcp-sa'))
def my_pipeline( clusterproject='sparkpubsub', cluster='spark', region='europe-west4', staging='output-sparkpubsub-tweets', project='sparkpubsub', tableproject='sparkpubsub', dataset='wordcount', table='wordcount_output', output='gs://output-sparkpubsub-tweets/output/data.csv', ): spark_task = spark_run_op( clusterproject=clusterproject, cluster=cluster, region=region, staging=staging, project=project, # project of output bucket tableproject=tableproject, # table to read dataset=dataset, table=table, output=output, # path of output data ).apply(gcp.use_gcp_secret('user-gcp-sa')) # compile the bad boy # dsl-compile --py pipeline.py --output ./pipeline.tar.gz
def evaluate_model_op(pitch_type, dummy1=None): return dsl.ContainerOp(name='Evaluate Models', image='gcr.io/ross-kubeflow/evaluate-model:latest', arguments=['--pitch_type', pitch_type], file_outputs={ 'data': '/root/dummy.txt', }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target: str, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'): return dsl.ContainerOp( name=step_name, image= 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2', #TODO-release: update the release tag for the next release arguments=[ '--transformed-data-dir', transformed_data_dir, '--schema', schema, '--learning-rate', learning_rate, '--hidden-layer-size', hidden_layer_size, '--steps', steps, '--target', target, '--preprocessing-module', preprocess_module, '--job-dir', training_output, ], file_outputs={ 'train': '/output.txt' }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'): return dsl.ContainerOp( name=step_name, image= 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.3-rc.2', #TODO-release: update the release tag for the next release arguments=[ '--train', train_data, '--eval', evaluation_data, '--schema', schema, '--project', project, '--mode', preprocess_mode, '--preprocessing-module', preprocess_module, '--output', transform_output, ], file_outputs={ 'transformed': '/output.txt' }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'GcsUri', column_names: 'GcsUri[text/json]', key_columns, project: 'GcpProject', mode, validation_output: 'GcsUri[Directory]', step_name='validation'): return dsl.ContainerOp( name=step_name, image= 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:0.1.3-rc.2', #TODO-release: update the release tag for the next release arguments=[ '--csv-data-for-inference', inference_data, '--csv-data-to-validate', validation_data, '--column-names', column_names, '--key-columns', key_columns, '--project', project, '--mode', mode, '--output', validation_output, ], file_outputs={ 'output': '/output.txt', 'schema': '/output_schema.json', }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def nyc_taxi_pipeline(project='kubeflow-xyz', dataset='yellow_taxi', bucket='gs://yellow-taxi-nyc', start_date='2015-01-01', end_date='2015-01-05'): extract = extract_op(project=project, dataset=dataset, bucket=bucket, start_date=start_date, end_date=end_date).apply( gcp.use_gcp_secret('user-gcp-sa')) preprocessing = preprocessing_op( project=project, staging_bucket=extract.outputs['staging_bucket']).apply( gcp.use_gcp_secret('user-gcp-sa'))
def wbc_pipline(model_export_dir='export/wbc', data_root='data/segmentation_WBC-master', metadata_file_name='Class_Labels_of_{}.csv', subset='Dataset1', project='graphic-option-220202', bucket_name='kf-test1234', n_class="5", resume_model='export/wbc/NFCM_model.pth', epochs='50', batch_size='32', pvc_name=''): train = _train(data_root, metadata_file_name, subset, project, bucket_name, n_class, epochs, batch_size, model_export_dir) # .set_gpu_limit(1) # train.add_node_selector_constraint('cloud.google.com/gke-nodepool', 'gpu-pool') # out = train.outputs['output'] test = _test(data_root, metadata_file_name, subset, project, bucket_name, n_class, resume_model, model_export_dir) test.after(train) steps = [train, test] for step in steps: if platform == 'GCP': step.apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_function_embedding_op( cluster_name: str, function_embeddings_bq_table: str, function_embeddings_dir: str, namespace: str, num_workers: int, project: 'GcpProject', saved_model_dir: 'GcsUri', worker_machine_type: str, workflow_id: str, working_dir: str, ): return dsl.ContainerOp( name='dataflow_function_embedding', image= 'gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e', command=['/usr/local/src/submit_code_embeddings_job.sh'], arguments=[ "--cluster=%s" % cluster_name, "--dataDir=%s" % 'gs://code-search-demo/20181104/data', "--functionEmbeddingsDir=%s" % function_embeddings_dir, "--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table, "--modelDir=%s" % saved_model_dir, "--namespace=%s" % namespace, "--numWorkers=%s" % num_workers, "--project=%s" % project, "--workerMachineType=%s" % worker_machine_type, "--workflowId=%s" % workflow_id, "--workingDir=%s" % working_dir, ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
def kubeflow_training( output, project, test='gs://dataset-image-train/TFRecords/images/test_labels.csv', train='gs://dataset-image-train/TFRecords/images/train_labels.csv', # schema='gs://ml-pipeline-playground/flower/schema.json', learning_rate=0.1, hidden_layer_size='100,50', steps=2000, target='label', workers=0, pss=0, preprocess_mode='local', predict_mode='local', ): output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data' # set the flag to use GPU trainer use_gpu = False preprocess = dataflow_tf_transform_op( training_data_file_pattern=train, evaluation_data_file_pattern=test, #schema=schema, gcp_project=project, run_mode=preprocess_mode, preprocessing_module='', transformed_data_dir=output_template).apply( gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'): return dsl.ContainerOp( name=step_name, image= 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.3-rc.2', #TODO-release: update the release tag for the next release arguments=[ '--data', evaluation_data, '--schema', schema, '--target', target, '--model', model, '--mode', predict_mode, '--project', project, '--output', prediction_output, ], file_outputs={ 'prediction': '/output.txt' }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', analyze_mode, analyze_slice_column, analysis_output: 'GcsUri', step_name='analysis'): return dsl.ContainerOp( name=step_name, image= 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.3-rc.2', #TODO-release: update the release tag for the next release arguments=[ '--model', model, '--eval', evaluation_data, '--schema', schema, '--project', project, '--mode', analyze_mode, '--slice-columns', analyze_slice_column, '--output', analysis_output, ], file_outputs={ 'analysis': '/output.txt' }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def prepare_component(text_path: dsl.PipelineParam, out_pkl_path: dsl.PipelineParam): return kfp.dsl.ContainerOp( name='Prepare data component', image=f'{REGISTRY}/kf_prepare:1.0.0', arguments=['--text-path', text_path, '--pkl-path', out_pkl_path] ).apply(use_gcp_secret('user-gcp-sa'))
def dp_inf_pipe( model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name', value='MODEL_NAME'), model_path: dsl.PipelineParam = dsl.PipelineParam(name='model-path', value='MODEL_PATH'), num_gpus: dsl.PipelineParam = dsl.PipelineParam(name='num-gpus', value=0), # pred_inp_dir: dsl.PipelineParam = dsl.PipelineParam(name='pred_inp_dir', value='INPUT DIRECTORY FOR PREDICTION'), # model_location: dsl.PipelineParam = dsl.PipelineParam(name='model_location', value='TRAINED_MODEL_LOCATION'), # inf_batch_size: dsl.PipelineParam = dsl.PipelineParam(name='inf_batch_size', value=10) ): tfserve = dsl.ContainerOp( name='tfserve', image='gcr.io/speedy-aurora-193605/retina-tfserve:latest', arguments=[ "--model_name", model_name, "--model_path", model_path, "--num_gpus", num_gpus, ], # file_outputs={'output': '/tmp/output'} ).apply(gcp.use_gcp_secret('admin-gcp-sa'))
def gh_summ( #pylint: disable=unused-argument train_steps: dsl.PipelineParam = dsl.PipelineParam(name='train-steps', value=2019300), project: dsl.PipelineParam = dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'), github_token: dsl.PipelineParam = dsl.PipelineParam( name='github-token', value='YOUR_GITHUB_TOKEN_HERE'), working_dir: dsl.PipelineParam = dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'), checkpoint_dir: dsl.PipelineParam = dsl.PipelineParam( name='checkpoint-dir', value='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000'), deploy_webapp: dsl.PipelineParam = dsl.PipelineParam(name='deploy-webapp', value='true'), data_dir: dsl.PipelineParam = dsl.PipelineParam( name='data-dir', value='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')): train = dsl.ContainerOp( name='train', image='gcr.io/google-samples/ml-pipeline-t2ttrain', arguments=["--data-dir", data_dir, "--checkpoint-dir", checkpoint_dir, "--model-dir", '%s/%s/model_output' % (working_dir, '{{workflow.name}}'), "--train-steps", train_steps, "--deploy-webapp", deploy_webapp], file_outputs={'output': '/tmp/output'} ).apply(gcp.use_gcp_secret('user-gcp-sa')) serve = dsl.ContainerOp( name='serve', image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve', arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}') ] ) serve.after(train) train.set_gpu_limit(4) with dsl.Condition(train.output == 'true'): webapp = dsl.ContainerOp( name='webapp', image='gcr.io/google-samples/ml-pipeline-webapp-launcher', arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), "--github_token", github_token] ) webapp.after(serve)
def __new__(cls, component_name, input_dict, output_dict, exec_properties): """Creates a new component. Args: component_name: TFX component name. input_dict: Dictionary of input names to TFX types, or kfp.dsl.PipelineParam representing input parameters. output_dict: Dictionary of output names to List of TFX types. exec_properties: Execution properties. Returns: Newly constructed TFX Kubeflow component instance. """ outputs = output_dict.keys() file_outputs = { output: '/output/ml_metadata/{}'.format(output) for output in outputs } for k, v in ExecutionProperties.exec_properties.items(): exec_properties[k] = v arguments = [ '--exec_properties', json.dumps(exec_properties), '--outputs', types.jsonify_tfx_type_dict(output_dict), component_name, ] for k, v in input_dict.items(): if isinstance(v, float) or isinstance(v, int): v = str(v) arguments.append('--{}'.format(k)) arguments.append(v) container_op = dsl.ContainerOp( name=component_name, command=_COMMAND, image=_KUBEFLOW_TFX_IMAGE, arguments=arguments, file_outputs=file_outputs, ).apply(gcp.use_gcp_secret('user-gcp-sa')) # Adds GCP authentication. # Add the Argo workflow ID to the container's environment variable so it # can be used to uniquely place pipeline outputs under the pipeline_root. field_path = "metadata.labels['workflows.argoproj.io/workflow']" container_op.add_env_variable( k8s_client.V1EnvVar( name='WORKFLOW_ID', value_from=k8s_client.V1EnvVarSource( field_ref=k8s_client.V1ObjectFieldSelector( field_path=field_path)))) named_outputs = {output: container_op.outputs[output] for output in outputs} # This allows user code to refer to the ContainerOp 'op' output named 'x' # as op.outputs.x component_outputs = type('Output', (), named_outputs) return type(component_name, (BaseComponent,), { 'container_op': container_op, 'outputs': component_outputs })
def workflow1( input_handle_eval: dsl.PipelineParam=dsl.PipelineParam(name='input-handle-eval', value='gs://aju-dev-demos-codelabs/KF/taxidata/eval/data.csv'), input_handle_train: dsl.PipelineParam=dsl.PipelineParam(name='input-handle-train', value='gs://aju-dev-demos-codelabs/KF/taxidata/train/data.csv'), outfile_prefix_eval: dsl.PipelineParam=dsl.PipelineParam(name='outfile-prefix-eval', value='eval_transformed'), outfile_prefix_train: dsl.PipelineParam=dsl.PipelineParam(name='outfile-prefix-train', value='train_transformed'), train_steps: dsl.PipelineParam=dsl.PipelineParam(name='train-steps', value=10000), project: dsl.PipelineParam=dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'), working_dir: dsl.PipelineParam=dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'), tft_setup_file: dsl.PipelineParam=dsl.PipelineParam(name='tft-setup-file', value='/ml/transform/setup.py'), tfma_setup_file: dsl.PipelineParam=dsl.PipelineParam(name='tfma-setup-file', value='/ml/analysis/setup.py'), workers: dsl.PipelineParam=dsl.PipelineParam(name='workers', value=1), pss: dsl.PipelineParam=dsl.PipelineParam(name='pss', value=1), max_rows: dsl.PipelineParam=dsl.PipelineParam(name='max-rows', value=10000), ts1: dsl.PipelineParam=dsl.PipelineParam(name='ts1', value=''), ts2: dsl.PipelineParam=dsl.PipelineParam(name='ts2', value=''), preprocessing_module1: dsl.PipelineParam=dsl.PipelineParam(name='preprocessing-module1', value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing.py'), preprocessing_module2: dsl.PipelineParam=dsl.PipelineParam(name='preprocessing-module2', value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing2.py'), preprocess_mode: dsl.PipelineParam=dsl.PipelineParam(name='preprocess-mode', value='local'), tfma_mode: dsl.PipelineParam=dsl.PipelineParam(name='tfma-mode', value='local')): tfteval = dsl.ContainerOp( name = 'tft-eval', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi', arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval, "--working_dir", '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'), "--project", project, "--mode", preprocess_mode, "--setup_file", tft_setup_file, "--max-rows", '5000', "--ts1", ts1, "--ts2", ts2, "--stage", "eval", "--preprocessing-module", preprocessing_module1] # file_outputs = {'transformed': '/output.txt'} ).apply(gcp.use_gcp_secret('user-gcp-sa')) tfttrain = dsl.ContainerOp( name = 'tft-train', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi', arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train, "--working_dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'), "--project", project, "--mode", preprocess_mode, "--setup_file", tft_setup_file, "--max_rows", max_rows, "--ts1", ts1, "--ts2", ts2, "--stage", "train", "--preprocessing_module", preprocessing_module1] ).apply(gcp.use_gcp_secret('user-gcp-sa')) tfteval2 = dsl.ContainerOp( name = 'tft-eval2', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi', arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval, "--working_dir", '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'), "--project", project, "--mode", preprocess_mode, "--setup_file", tft_setup_file, "--max_rows", '5000', "--ts1", ts1, "--ts2", ts2, "--stage", "eval", "--preprocessing_module", preprocessing_module2] ).apply(gcp.use_gcp_secret('user-gcp-sa')) tfttrain2 = dsl.ContainerOp( name = 'tft-train2', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi', arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train, "--working_dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'), "--project", project, "--mode", preprocess_mode, "--setup_file", tft_setup_file, "--max_rows", max_rows, "--ts1", ts1, "--ts2", ts2, "--stage", "train", "--preprocessing_module", preprocessing_module2] ).apply(gcp.use_gcp_secret('user-gcp-sa')) train = dsl.ContainerOp( name = 'train', image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi', arguments = [ "--tf-transform-dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'), "--output-dir", '%s/%s/tf' % (working_dir, '{{workflow.name}}'), "--working-dir", '%s/%s/tf/serving_model_dir' % (working_dir, '{{workflow.name}}'), "--job-dir", '%s/%s/tf' % (working_dir, '{{workflow.name}}'), "--train-files-dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'), "--eval-files-dir", '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'), "--train-files-prefix", outfile_prefix_train, "--eval-files-prefix", outfile_prefix_eval, "--train-steps", train_steps, "--workers", workers, "--pss", pss] ) train.after(tfteval) train.after(tfttrain) train2 = dsl.ContainerOp( name = 'train2', image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi', arguments = [ "--tf-transform-dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'), "--output-dir", '%s/%s/tf2' % (working_dir, '{{workflow.name}}'), "--working-dir", '%s/%s/tf2/serving_model_dir' % (working_dir, '{{workflow.name}}'), "--job-dir", '%s/%s/tf2' % (working_dir, '{{workflow.name}}'), "--train-files-dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'), "--eval-files-dir", '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'), "--train-files-prefix", outfile_prefix_train, "--eval-files-prefix", outfile_prefix_eval, "--train-steps", train_steps, "--workers", '1', "--pss", '1'] ) train2.after(tfteval2) train2.after(tfttrain2) analyze = dsl.ContainerOp( name = 'analyze', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi', arguments = ["--input_csv", input_handle_eval, "--tfma_run_dir", '%s/%s/tfma/output' % (working_dir, '{{workflow.name}}'), "--eval_model_dir", '%s/%s/tf/eval_model_dir' % (working_dir, '{{workflow.name}}'), "--mode", tfma_mode, "--setup_file", tfma_setup_file, "--project", project] ).apply(gcp.use_gcp_secret('user-gcp-sa')) analyze2 = dsl.ContainerOp( name = 'analyze2', image = 'gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi', arguments = ["--input_csv", input_handle_eval, "--tfma_run_dir", '%s/%s/tfma2/output' % (working_dir, '{{workflow.name}}'), "--eval_model_dir", '%s/%s/tf2/eval_model_dir' % (working_dir, '{{workflow.name}}'), "--mode", tfma_mode, "--setup_file", tfma_setup_file, "--project", project] ).apply(gcp.use_gcp_secret('user-gcp-sa')) cmleop = dsl.ContainerOp( name = 'cmleop', image = 'gcr.io/google-samples/ml-pipeline-cmle-op', arguments = ["--gcs-path", '%s/%s/tf/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}'), "--version-name", '{{workflow.name}}', "--project", project] ).apply(gcp.use_gcp_secret('user-gcp-sa')) cmleop2 = dsl.ContainerOp( name = 'cmleop2', image = 'gcr.io/google-samples/ml-pipeline-cmle-op', arguments = ["--gcs-path", '%s/%s/tf2/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}'), "--version-name", '{{workflow.name}}_2', "--project", project] ).apply(gcp.use_gcp_secret('user-gcp-sa')) tfserving = dsl.ContainerOp( name = 'tfserving', image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi', arguments = ["--model_name", '{{workflow.name}}', "--model_path", '%s/%s/tf/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}')] ) tfserving2 = dsl.ContainerOp( name = 'tfserving2', image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi', arguments = ["--model_name", '{{workflow.name}}-2', "--model_path", '%s/%s/tf2/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}')] ) analyze.after(train) analyze2.after(train2) cmleop.after(train) cmleop2.after(train2) tfserving.after(train) tfserving2.after(train2)