Пример #1
0
def save_most_frequent_word():
    exit_op = ExitHandlerOp('exiting')
    with dsl.ExitHandler(exit_op):
        counter = GetFrequentWordOp(name='get-Frequent', message=message_param)
        counter.container.set_memory_request('200M')

        saver = SaveMessageOp(name='save',
                              message=counter.output,
                              output_path=output_path_param)
        saver.container.set_cpu_limit('0.5')
        saver.container.set_gpu_limit('2')
        saver.add_node_selector_constraint('cloud.google.com/gke-accelerator',
                                           'nvidia-tesla-k80')
        saver.apply(
            gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))
Пример #2
0
def save_most_frequent_word(message: str, outputpath: str):
    """A pipeline function describing the orchestration of the workflow."""

    exit_op = ExitHandlerOp('exiting')
    with dsl.ExitHandler(exit_op):
        counter = GetFrequentWordOp(name='get-Frequent', message=message)
        counter.container.set_memory_request('200M')

        saver = SaveMessageOp(name='save',
                              message=counter.output,
                              output_path=outputpath)
        saver.container.set_cpu_limit('0.5')
        saver.container.set_gpu_limit('2')
        saver.add_node_selector_constraint('cloud.google.com/gke-accelerator',
                                           'nvidia-tesla-k80')
        saver.apply(
            gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))
Пример #3
0
def train_and_deploy(project=dsl.PipelineParam(name='project',
                                               value='dhodun1'),
                     bucket=dsl.PipelineParam(name='bucket',
                                              value='gs://dhodun1-central1'),
                     startYear=dsl.PipelineParam(name='startYear',
                                                 value='2000')):
    """Pipeline to train Mask RCNN"""

    reprocess_coco = dsl.ContainerOp(
        name='preprocess_coco',
        # image needs to be compile-time string
        image='gcr.io/dhodun1/preprocess-coco:latest',
        arguments=[
            bucket,
        ],
        file_outputs={'bucket': '/output.txt'})

    if start_step <= 1:
        preprocess_coco = dsl.ContainerOp(
            name='preprocess_coco',
            # image needs to be compile-time string
            image='gcr.io/dhodun1/preprocess-coco:latest',
            arguments=[
                bucket,
            ],
            file_outputs={'bucket': '/output.txt'})
        preprocess_coco.set_cpu_request('8')
        preprocess_coco.set_memory_request('30G')
    else:
        preprocess_coco = ObjectDict({'outputs': {'bucket': bucket}})

    if start_step <= 2:
        train_mask_rcnn = dsl.ContainerOp(
            name='train_mask_rcnn_tpu',
            # image needs to be a compile-time string
            image='gcr.io/dhodun1/train-mask-rcnn',
            arguments=[
                bucket,
            ],
            #file_outputs={'results': '/output.txt'}
        )
        train_mask_rcnn.apply(
            use_tpu(tpu_cores=8, tpu_resource='v3', tf_version='1.12'))
        train_mask_rcnn.set_cpu_request('8')
        train_mask_rcnn.set_memory_request('30G')
Пример #4
0
def train_and_deploy(
    project='dhodun1',
    bucket='gs://maskrcnn-kfp',
    #TODO: non-camel-case was conflicting with the use_tpu op modifier
):
    usetpu = True
    istest = True
    """Pipeline to train Mask RCNN"""
    start_step = 1

    if start_step <= 1:
        preprocess_coco = dsl.ContainerOp(
            name='preprocess_coco',
            # image needs to be compile-time string
            image='gcr.io/dhodun1/preprocess-coco:latest',
            arguments=[bucket],
            file_outputs={'coco_dir': '/output.txt'})
        preprocess_coco.container.set_cpu_request('8')
        preprocess_coco.container.set_memory_request('30G')

    if start_step <= 2:
        train_mask_rcnn = dsl.ContainerOp(
            name='train_mask_rcnn_tpu',
            # image needs to be a compile-time string
            image='gcr.io/dhodun1/train-mask-rcnn:latest',
            arguments=[
                bucket, preprocess_coco.outputs['coco_dir'],
                str(usetpu),
                str(istest)
            ],
            file_outputs={
                'model_dir': '/model_dir.txt',
                'mAP_box': '/map_box.txt',
                'mAP_segm': '/map_segm.txt'
            })
        train_mask_rcnn.after(preprocess_coco)
        train_mask_rcnn.container.set_cpu_request('8')
        train_mask_rcnn.container.set_memory_request('30G')
        #train_mask_rcnn_tpu.container.set_pull_image_policy('Always')
        if usetpu:
            train_mask_rcnn.apply(
                use_tpu(tpu_cores=8, tpu_resource='v3', tf_version='1.12'))
            # note needed now that i've consolidated TPU
            #train_mask_rcnn.container.image='gcr.io/dhodun1/train-mask-rcnn-tpu:latest'

    if start_step <= 3:
        export_model_jpeg = export_op_fn(
            name='export_model_jpeg',
            arguments=[
                'jpeg', train_mask_rcnn.outputs['model_dir'],
                train_mask_rcnn.outputs['model_dir']
            ])
        export_model_jpeg.after(train_mask_rcnn)

        export_model_tensor = export_op_fn(
            name='export_model_tensor',
            arguments=[
                'tensor', train_mask_rcnn.outputs['model_dir'],
                train_mask_rcnn.outputs['model_dir']
            ])
        export_model_tensor.after(train_mask_rcnn)