def save_most_frequent_word(): exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message_param) counter.container.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=output_path_param) saver.container.set_cpu_limit('0.5') saver.container.set_gpu_limit('2') saver.add_node_selector_constraint('cloud.google.com/gke-accelerator', 'nvidia-tesla-k80') saver.apply( gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))
def save_most_frequent_word(message: str, outputpath: str): """A pipeline function describing the orchestration of the workflow.""" exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message) counter.container.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=outputpath) saver.container.set_cpu_limit('0.5') saver.container.set_gpu_limit('2') saver.add_node_selector_constraint('cloud.google.com/gke-accelerator', 'nvidia-tesla-k80') saver.apply( gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))
def train_and_deploy(project=dsl.PipelineParam(name='project', value='dhodun1'), bucket=dsl.PipelineParam(name='bucket', value='gs://dhodun1-central1'), startYear=dsl.PipelineParam(name='startYear', value='2000')): """Pipeline to train Mask RCNN""" reprocess_coco = dsl.ContainerOp( name='preprocess_coco', # image needs to be compile-time string image='gcr.io/dhodun1/preprocess-coco:latest', arguments=[ bucket, ], file_outputs={'bucket': '/output.txt'}) if start_step <= 1: preprocess_coco = dsl.ContainerOp( name='preprocess_coco', # image needs to be compile-time string image='gcr.io/dhodun1/preprocess-coco:latest', arguments=[ bucket, ], file_outputs={'bucket': '/output.txt'}) preprocess_coco.set_cpu_request('8') preprocess_coco.set_memory_request('30G') else: preprocess_coco = ObjectDict({'outputs': {'bucket': bucket}}) if start_step <= 2: train_mask_rcnn = dsl.ContainerOp( name='train_mask_rcnn_tpu', # image needs to be a compile-time string image='gcr.io/dhodun1/train-mask-rcnn', arguments=[ bucket, ], #file_outputs={'results': '/output.txt'} ) train_mask_rcnn.apply( use_tpu(tpu_cores=8, tpu_resource='v3', tf_version='1.12')) train_mask_rcnn.set_cpu_request('8') train_mask_rcnn.set_memory_request('30G')
def train_and_deploy( project='dhodun1', bucket='gs://maskrcnn-kfp', #TODO: non-camel-case was conflicting with the use_tpu op modifier ): usetpu = True istest = True """Pipeline to train Mask RCNN""" start_step = 1 if start_step <= 1: preprocess_coco = dsl.ContainerOp( name='preprocess_coco', # image needs to be compile-time string image='gcr.io/dhodun1/preprocess-coco:latest', arguments=[bucket], file_outputs={'coco_dir': '/output.txt'}) preprocess_coco.container.set_cpu_request('8') preprocess_coco.container.set_memory_request('30G') if start_step <= 2: train_mask_rcnn = dsl.ContainerOp( name='train_mask_rcnn_tpu', # image needs to be a compile-time string image='gcr.io/dhodun1/train-mask-rcnn:latest', arguments=[ bucket, preprocess_coco.outputs['coco_dir'], str(usetpu), str(istest) ], file_outputs={ 'model_dir': '/model_dir.txt', 'mAP_box': '/map_box.txt', 'mAP_segm': '/map_segm.txt' }) train_mask_rcnn.after(preprocess_coco) train_mask_rcnn.container.set_cpu_request('8') train_mask_rcnn.container.set_memory_request('30G') #train_mask_rcnn_tpu.container.set_pull_image_policy('Always') if usetpu: train_mask_rcnn.apply( use_tpu(tpu_cores=8, tpu_resource='v3', tf_version='1.12')) # note needed now that i've consolidated TPU #train_mask_rcnn.container.image='gcr.io/dhodun1/train-mask-rcnn-tpu:latest' if start_step <= 3: export_model_jpeg = export_op_fn( name='export_model_jpeg', arguments=[ 'jpeg', train_mask_rcnn.outputs['model_dir'], train_mask_rcnn.outputs['model_dir'] ]) export_model_jpeg.after(train_mask_rcnn) export_model_tensor = export_op_fn( name='export_model_tensor', arguments=[ 'tensor', train_mask_rcnn.outputs['model_dir'], train_mask_rcnn.outputs['model_dir'] ]) export_model_tensor.after(train_mask_rcnn)