def pipeline(my_pipe_param: list = [100, 200], my_pipe_param3: list = [1, 2]): loop_args = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}] with Loop(loop_args).enumerate() as (i, item): # 2 iterations in total op1_template = components.load_component_from_text(op1_yaml) op1 = op1_template(index=i, item=item.a) with dsl.ParallelFor(my_pipe_param) as inner_item: # 4 iterations in total op11_template = components.load_component_from_text(op11_yaml) op11 = op11_template(item.a, inner_item) my_pipe_param2: List[int] = [4, 5] with Loop(my_pipe_param2).enumerate() as (j, inner_item): # 8 iterations in total op12_template = components.load_component_from_text(op12_yaml) op12 = op12_template(outter_index=i, index=j, item=item.b, inner_item=inner_item) with dsl.ParallelFor(my_pipe_param3) as inner_item: # 16 iterations in total op13_template = components.load_component_from_text( op13_yaml) op13 = op13_template(item.b, inner_item) op2_template = components.load_component_from_text(op2_yaml) op2 = op2_template(item.b)
def my_pipeline( text_parameter: str = '[{"p_a": [{"q_a":1}, {"q_a":2}], "p_b": "hello"}, {"p_a": [{"q_a":11},{"q_a":22}], "p_b": "halo"}]' ): with dsl.ParallelFor(text_parameter) as item: with dsl.ParallelFor(item.p_a) as item_p_a: print_op(item_p_a.q_a)
def my_pipeline(loop_parameter: list = [ { "p_a": [{ "q_a": '1' }, { "q_a": '2' }], "p_b": "hello", }, { "p_a": [{ "q_a": '11' }, { "q_a": '22' }], "p_b": "halo", }, ]): # Nested loop with withParams loop args with dsl.ParallelFor(loop_parameter) as item: with dsl.ParallelFor(item.p_a) as item_p_a: print_op(msg=item_p_a.q_a) # Nested loop with withItems loop args with dsl.ParallelFor(['1', '2']) as outter_item: print_op(msg=outter_item) with dsl.ParallelFor(['100', '200', '300']) as inner_item: print_op(msg=outter_item, msg2=inner_item)
def flip_component(flip_result, maxVal, my_pipe_param): with dsl.Condition(flip_result == 'heads'): print_flip = print_op(flip_result) flipA = flip_coin_op().after(print_flip) loop_args = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}] with dsl.ParallelFor(loop_args) as item: op1 = dsl.ContainerOp( name="my-in-coop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op1 %s %s" % (item.a, my_pipe_param)], ) with dsl.ParallelFor([100, 200, 300]) as inner_item: op11 = dsl.ContainerOp( name="my-inner-inner-coop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=[ "echo op1 %s %s %s" % (item.a, inner_item, my_pipe_param) ], ) op2 = dsl.ContainerOp( name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) flip_component(flipA.output, maxVal, my_pipe_param)
def my_pipeline(text_parameter: str = json.dumps([ {'p_a': -1, 'p_b': 'hello'}, {'p_a': 2, 'p_b': 'halo'}, {'p_a': 3, 'p_b': 'ni hao'}, ], sort_keys=True)): flip1 = flip_coin_op() with dsl.Condition(flip1.output != 'no-such-result'): # always true args_generator = args_generator_op() with dsl.ParallelFor(args_generator.output) as item: print_op(text_parameter) with dsl.Condition(flip1.output == 'heads'): print_op(item.A_a) with dsl.Condition(flip1.output == 'tails'): print_op(item.B_b) with dsl.Condition(item.A_a == '1'): with dsl.ParallelFor([{'a':'-1'}, {'a':'-2'}]) as item: print_op(item) with dsl.ParallelFor(text_parameter) as item: with dsl.Condition(item.p_a > 0): print_op(item.p_a) print_op(item.p_b)
def pipeline(my_pipe_param: int = 10): loop_args = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}] with dsl.ParallelFor(loop_args) as item: op1 = dsl.ContainerOp( name="my-in-coop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op1 %s %s" % (item.a, my_pipe_param)], ) with dsl.ParallelFor([100, 200, 300]) as inner_item: op11 = dsl.ContainerOp( name="my-inner-inner-coop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=[ "echo op1 %s %s %s" % (item.a, inner_item, my_pipe_param) ], ) op2 = dsl.ContainerOp( name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) op_out = dsl.ContainerOp( name="my-out-cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo %s" % my_pipe_param], )
def nested_loop(param: list = ["a", "b", "c"]): # param of the inner loop is used inner-most --- works fine with dsl.ParallelFor(param): with dsl.ParallelFor(param): PrintOp('print-0', f"print {param}") # param of the inner loop is not used inner-most --- fails with dsl.ParallelFor(param): with dsl.ParallelFor(param): PrintOp('print-1', "print")
def pipeline(my_pipe_param: int = 10): loop_args = [1, 2] with dsl.ParallelFor(loop_args) as item: op1_template = components.load_component_from_text(op1_yaml) op1 = op1_template(item, my_pipe_param) with dsl.ParallelFor([100, 200, 300]) as inner_item: op11_template = components.load_component_from_text(op11_yaml) op11 = op11_template(item, inner_item, my_pipe_param) op2_template = components.load_component_from_text(op2_yaml) op2 = op2_template(item) op_out_template = components.load_component_from_text(op_out_yaml) op_out = op_out_template(my_pipe_param)
def huggingface_pipeline(): dataset_dict_task = load_dataset_op(dataset_name='imdb') with dsl.ParallelFor(dataset_dict_task.outputs['splits']) as split_name: deataset_task = split_dataset_op( dataset_dict=dataset_dict_task.outputs['dataset_dict'], split_name=split_name, )
def bikes_weather( #pylint: disable=unused-argument train_epochs: int = 5, working_dir: str = 'gs://YOUR/GCS/PATH', # for the full training jobs data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/', steps_per_epoch: int = -1, # if -1, don't override normal calcs based on dataset size num_best_hps_list: list = [0], hptune_params: str = '[{"num_hidden_layers": %s, "learning_rate": %s, "hidden_size": %s}]' % (3, 1e-2, 64)): # create TensorBoard viz for the parent directory of all training runs, so that we can # compare them. tb_viz = tb_op(log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER)) with dsl.ParallelFor(num_best_hps_list ) as idx: # start the full training runs in parallel train = train_op(data_dir=data_dir, workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx), tb_dir=tb_viz.outputs['log_dir_uri'], epochs=train_epochs, steps_per_epoch=steps_per_epoch, hp_idx=idx, hptune_results=hptune_params) serve = serve_op(model_path=train.outputs['train_output_path'], model_name='bikesw', namespace='default') train.set_gpu_limit(2)
def bikes_weather_hptune( #pylint: disable=unused-argument tune_epochs: int = 2, train_epochs: int = 5, num_tuners: int = 8, bucket_name: str = 'YOUR_BUCKET_NAME', # used for the HP dirs; don't include the 'gs://' tuner_dir_prefix: str = 'hptest', tuner_proj: str = 'p1', max_trials: int = 128, working_dir: str = 'gs://YOUR/GCS/PATH', # for the full training jobs data_dir: str = 'gs://aju-dev-demos-codelabs/bikes_weather/', steps_per_epoch: int = -1, # if -1, don't override normal calcs based on dataset size num_best_hps: int = 2, # the N best parameter sets for full training # the indices to the best param sets; necessary in addition to the above param because of # how KFP loops work currently. Must be consistent with the above param. num_best_hps_list: list = [0, 1], thresholds: str = '{"root_mean_squared_error": 2000}'): hptune = dsl.ContainerOp( name='ktune', image='gcr.io/google-samples/ml-pipeline-bikes-dep:b97ee76', arguments=[ '--epochs', tune_epochs, '--num-tuners', num_tuners, '--tuner-dir', '%s/%s' % (tuner_dir_prefix, dsl.RUN_ID_PLACEHOLDER), '--tuner-proj', tuner_proj, '--bucket-name', bucket_name, '--max-trials', max_trials, '--namespace', 'default', '--num-best-hps', num_best_hps, '--executions-per-trial', 2, '--deploy' ], file_outputs={'hps': '/tmp/hps.json'}, ) # create TensorBoard viz for the parent directory of all training runs, so that we can # compare them. tb_viz = tb_op(log_dir_uri='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER)) with dsl.ParallelFor(num_best_hps_list ) as idx: # start the full training runs in parallel train = train_op(data_dir=data_dir, workdir='%s/%s' % (tb_viz.outputs['log_dir_uri'], idx), tb_dir=tb_viz.outputs['log_dir_uri'], epochs=train_epochs, steps_per_epoch=steps_per_epoch, hp_idx=idx, hptune_results=hptune.outputs['hps']) eval_metrics = eval_metrics_op( thresholds=thresholds, metrics=train.outputs['metrics_output_path'], ) with dsl.Condition(eval_metrics.outputs['deploy'] == 'deploy'): serve = serve_op(model_path=train.outputs['train_output_path'], model_name='bikesw', namespace='default') train.set_gpu_limit(2)
def pipeline(): loop_args = [{'A_a': 1, 'B_b': 2}, {'A_a': 10, 'B_b': 20}] with dsl.SubGraph(parallelism=2): with dsl.ParallelFor(loop_args) as item: print_op(item) print_op(item.A_a) print_op(item.B_b)
def my_pipeline(): args_generator = args_generator_op() with dsl.ParallelFor(args_generator.output) as item: print_op(item) print_op(item.A_a) print_op(item.B_b)
def pipeline(): op0 = dsl.ContainerOp( name="my-out-cop0", image='python:alpine3.6', command=["sh", "-c"], arguments=[ 'python -c "import json; import sys; json.dump([{\'a\': 1, \'b\': 2}, {\'a\': 10, \'b\': 20}], open(\'/tmp/out.json\', \'w\'))"' ], file_outputs={'out': '/tmp/out.json'}, ) with dsl.ParallelFor(op0.output) as item: with dsl.Condition(item.a == '1'): op1 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item.a: %s" % item.a], ) op2 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item.a: %s" % item.a], ).after(op1) op_out = dsl.ContainerOp( name="my-out-cop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op2, outp: %s" % op0.output], )
def pipeline(): op0 = dsl.ContainerOp( name="my-out-cop0", image='python:alpine3.6', command=["sh", "-c"], arguments=[ 'python -c "import json; import sys; json.dump([i for i in range(20, 31)], open(\'/tmp/out.json\', \'w\'))"' ], file_outputs={'out': '/tmp/out.json'}, ) with dsl.ParallelFor(op0.output) as item: op1 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item: %s" % item], ) op_out = dsl.ContainerOp( name="my-out-cop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op2, outp: %s" % op0.output], )
def pipeline(): dump_loop_args_op = dump_loop_args() with dsl.SubGraph(parallelism=2): with dsl.ParallelFor(dump_loop_args_op.output) as item: print_op(item) print_op(item.A_a) print_op(item.B_b)
def demo_pipeline( fastqs=["/mnt/data/file1.fastq.gz", "/mnt/data/file2.fastq.gz"], leading: int = 5, trailing: int = 5, minlen: int = 80, sliding_window: str = "4:25", bar_color: str = "white", flier_color: str = "grey", plot_color: str = "darkgrid", ): """ func_to_container_op simply converts the function into a factory that produces ops when called. add_pvolumes is a method of the op itself, so we need to apply it here when the op is actually generated, NOT above where the trim_op factory is created. """ with dsl.ParallelFor(fastqs) as fastq: trim_task = trim_op( fastq=fastq, leading=leading, trailing=trailing, minlen=minlen, sliding_window=sliding_window, ).add_pvolumes( {"/mnt/data": dsl.PipelineVolume(pvc="test-data-pv-claim")}) _ = plot_op(fastq=fastq, trimmed_fastq=trim_task.outputs["trimmed_fastq"], bar_color=bar_color, flier_color=flier_color, plot_color=plot_color).add_pvolumes({ "/mnt/data": dsl.PipelineVolume(pvc="test-data-pv-claim") })
def my_pipeline(name: str = 'KFP'): print_task = print_op(text='Hello {}'.format(name)) print_op(text='{}, again.'.format(print_task.output)) new_value = f' and {name}.' with dsl.ParallelFor(['1', '2']) as item: print_op2(text1=item, text2=new_value)
def my_pipeline(): loop_args = [{'A_a': '1', 'B_b': '2'}, {'A_a': '10', 'B_b': '20'}] with dsl.ParallelFor(loop_args) as item: print_op(item) print_op(item.A_a) print_op(item.B_b)
def pipeline(loopidy_doop: dict = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}]): op0 = dsl.ContainerOp( name="my-out-cop0", image='python:alpine3.6', command=["sh", "-c"], arguments=[ 'python -c "import json; import sys; json.dump([i for i in range(20, 31)], open(\'/tmp/out.json\', \'w\'))"' ], file_outputs={'out': '/tmp/out.json'}, ) with dsl.ParallelFor(loopidy_doop) as item: op1 = dsl.ContainerOp( name="my-in-cop1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo no output global op1, item.a: %s" % item.a], ).after(op0) op_out = dsl.ContainerOp( name="my-out-cop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo no output global op2, outp: %s" % op0.output], )
def pipeline(): op0 = dsl.ContainerOp( name="gen-numbers", image='python:alpine3.6', command=["sh", "-c"], arguments=[ 'python -c "import random; import json; import sys; json.dump([i for i in range(20, 26)], open(\'/tmp/out.json\', \'w\'))"' ], file_outputs={'out': '/tmp/out.json'}, ) with dsl.ParallelFor(op0.output) as item: op1 = dsl.ContainerOp( name="my-item-print", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo do output op1 item: %s" % item], ) op_out = dsl.ContainerOp( name="total", image="python:alpine3.6", command=["sh", "-c"], arguments=[ 'echo output gen-numbers: %s && python -c "print(sum(%s))"' % (op0.output, op0.output) ], )
def my_pipeline(greeting='this is a test for looping through parameters'): print_task = print_op(text=greeting) generate_task = generate_op() with dsl.ParallelFor(generate_task.output) as item: sum_task = sum_op(a=item.a, b=item.b) sum_task.after(print_task) print_task_2 = print_op(sum_task.output.ignore_type())
def pipeline(param: int = 10): loop_args = [1, 2] with dsl.ParallelFor(loop_args, parallelism=1) as item: op1_template = components.load_component_from_text(op1_yaml) op1 = op1_template(item, param) condi_1 = tekton.CEL_ConditionOp(f"{item} == 2").output with dsl.Condition(condi_1 == 'true'): tekton.Break()
def my_pipeline(greeting:str='this is a test for looping through parameters'): print_task = print_op(text=greeting) generate_task = generate_op() with dsl.ParallelFor(generate_task.output) as item: concat_task = concat_op(a=item.a, b=item.b) concat_task.after(print_task) print_task_2 = print_op(concat_task.output)
def my_pipeline( msg: str = 'hello', loop_parameter: list = [ { 'A_a': 'heads', 'B_b': ['A', 'B'], }, { 'A_a': 'tails', 'B_b': ['X', 'Y', 'Z'], }, ], ): flip = flip_coin_op() outter_args_generator = args_generator_op() with dsl.Condition(flip.output != 'no-such-result'): # always true inner_arg_generator = args_generator_op() with dsl.ParallelFor(outter_args_generator.output) as item: print_text(msg=msg) with dsl.Condition(item.A_a == 'heads'): print_text(msg=item.B_b) with dsl.Condition(flip.output == 'heads'): print_text(msg=item.B_b) with dsl.Condition(item.A_a == 'tails'): with dsl.ParallelFor([{'a': '-1'}, {'a': '-2'}]) as inner_item: print_struct(struct=inner_item) with dsl.ParallelFor(item.B_b) as item_b: print_text(msg=item_b) with dsl.ParallelFor(loop_parameter) as pipeline_item: print_text(msg=pipeline_item) with dsl.ParallelFor(inner_arg_generator.output) as inner_item: print_text(msg=pipeline_item, msg2=inner_item.A_a) with dsl.ParallelFor(['1', '2']) as static_item: print_text(msg=static_item) with dsl.Condition(static_item == '1'): print_text(msg='1') # Reference loop item from grand child with dsl.ParallelFor(loop_parameter) as item: with dsl.Condition(item.A_a == 'heads'): with dsl.ParallelFor(item.B_b) as item_b: print_text(msg=item_b)
def my_pipeline( greeting: str = 'this is a test for looping through parameters', ): print_task = print_op(text=greeting) static_loop_arguments = [{'a': '1', 'b': '2'}, {'a': '10', 'b': '20'}] with dsl.ParallelFor(static_loop_arguments) as item: concat_task = concat_op(a=item.a, b=item.b) concat_task.after(print_task) print_task_2 = print_op(text=concat_task.output)
def my_pipeline( static_loop_arguments: List[dict] = _DEFAULT_LOOP_ARGUMENTS, greeting='this is a test for looping through parameters' ): print_task = print_op(text=greeting) with dsl.ParallelFor(static_loop_arguments) as item: sum_task = sum_op(a=item.a, b=item.b) sum_task.after(print_task) print_task_2 = print_op(sum_task.output.ignore_type())
def my_pipeline( static_loop_arguments: List[dict] = _DEFAULT_LOOP_ARGUMENTS, greeting: str = 'this is a test for looping through parameters', ): print_task = print_op(text=greeting) with dsl.ParallelFor(static_loop_arguments) as item: concat_task = concat_op(a=item.a, b=item.b) concat_task.after(print_task) print_task_2 = print_op(text=concat_task.output)
def pipeline(): op0_template = components.load_component_from_text(op0_yaml) op0 = op0_template() with dsl.ParallelFor(op0.output) as item: op1_template = components.load_component_from_text(op1_yaml) op1 = op1_template(item) op_out_template = components.load_component_from_text(op_out_yaml) op_out = op_out_template(op0.output)
def conditions_and_loops(n='3', threshold='20'): produce_numbers_task = produce_numbers(n) with dsl.ParallelFor(produce_numbers_task.output) as loop_item: add_numbers_task = add_numbers(loop_item, '10') print_number_task = print_number(add_numbers_task.output) with dsl.Condition(print_number_task.output > threshold): notify_success() with dsl.Condition(print_number_task.output <= threshold): notify_failure()