def compile_deployment_pipeline(self): """Compile pipeline in a deployment format.""" operator_specs = self._create_operator_specs_json() graph = self._create_graph_json() @dsl.pipeline(name='Common Seldon Deployment.') def deployment_pipeline(): seldonserving = SELDON_DEPLOYMENT.substitute({ "namespace": KF_PIPELINES_NAMESPACE, "experimentId": self._experiment_id, "deploymentName": self._name, "componentSpecs": operator_specs, "graph": graph }) seldon_deployment = json.loads(seldonserving) serve_op = dsl.ResourceOp( name="deployment", k8s_resource=seldon_deployment, success_condition="status.state == Available").set_timeout(300) for _, operator in self._operators.items(): operator.build_operator() serve_op.after(operator.export_notebook) try: # compiler raises execption, but produces a valid yaml compiler.Compiler().compile(deployment_pipeline, f'{self._experiment_id}.yaml') except RuntimeError: pass
def test_basic_workflow_without_decorator(self): """Test compiling a workflow and appending pipeline params.""" test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata') sys.path.append(test_data_dir) import basic_no_decorator tmpdir = tempfile.mkdtemp() try: compiled_workflow = compiler.Compiler().create_workflow( basic_no_decorator.save_most_frequent_word, 'Save Most Frequent', 'Get Most Frequent Word and Save to GCS', [ basic_no_decorator.message_param, basic_no_decorator.output_path_param ]) with open(os.path.join(test_data_dir, 'basic_no_decorator.yaml'), 'r') as f: golden = yaml.safe_load(f) for workflow in golden, compiled_workflow: del workflow['metadata'] self.assertEqual(golden, compiled_workflow) finally: shutil.rmtree(tmpdir)
def save_workflow(self, name, target, artifact_path=None, ttl=None): """create and save a workflow as a yaml or archive file :param name: workflow name :param target: target file path (can end with .yaml or .zip) :param artifact_path: target path/url for workflow artifacts, the string '{{workflow.uid}}' will be replaced by workflow id :param ttl pipeline ttl in secs (after that the pods will be removed) """ if not name or name not in self._workflows: raise ValueError('workflow {} not found'.format(name)) workflow_path, code, _ = self._get_wf_file(name) pipeline = _create_pipeline(self, workflow_path, self._function_objects, secrets=self._secrets) artifact_path = artifact_path or self.artifact_path conf = new_pipe_meta(artifact_path, ttl=ttl) compiler.Compiler().compile(pipeline, target, pipeline_conf=conf) if code: remove(workflow_path)
name='serve', image='gcr.io/' + project_name + '/ml-pipeline-kubeflow-trtisserve', arguments=[ "--trtserver_name", trtserver_name, '--model_version', model_version, '--orig_model_path', '%s/%s' % (mount_dir, model_name), "--model_path", '%s/%s' % (storage_bucket, model_dir) ]) webapp = dsl.ContainerOp( name='webapp', image='gcr.io/' + project_name + '/ml-pipeline-trtis-webapp-launcher', arguments=[ "--workflow_name", '%s' % ('{{workflow.name}}', ), "--trtserver_name", trtserver_name, "--model_name", model_name, "--model_version", str(model_version), "--webapp_prefix", webapp_prefix, "--webapp_port", str(webapp_port) ]) training.after(preprocessing) serve.after(training) webapp.after(serve) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(end2end_demo, __file__ + '.tar.gz')
image: python:alpine3.6 command: - echo - {inputValue: msg} """) @dsl.pipeline(name='conditional-execution-pipeline', pipeline_root='dummy_root', description='Shows how to use dsl.Condition().') def my_pipeline(): flip = flip_coin_op() with dsl.Condition(flip.output == 'heads'): random_num_head = random_num_op(0, 9)() with dsl.Condition(random_num_head.output > 5): print_op(msg='heads and %s > 5!' % random_num_head.output) with dsl.Condition(random_num_head.output <= 5): print_op(msg='heads and %s <= 5!' % random_num_head.output) with dsl.Condition(flip.output == 'tails'): random_num_tail = random_num_op(10, 19)() with dsl.Condition(random_num_tail.output > 15): print_op(msg='tails and %s > 15!' % random_num_tail.output) with dsl.Condition(random_num_tail.output <= 15): print_op(msg='tails and %s <= 15!' % random_num_tail.output) if __name__ == '__main__': compiler.Compiler().compile(pipeline_func=my_pipeline, package_path=__file__.replace('.py', '.yaml'))
test.add_volume_mount(data_volume_mount) test.add_env_variable(data_directory_env) test.add_env_variable(hydrosphere_address_env) test.add_env_variable(application_name_env) test.add_env_variable(signature_name_env) test.add_env_variable(warmup_count_env) test.add_env_variable(acceptable_accuracy_env) # 6. Clean environment clean = dsl.ContainerOp(name="clean", image="tidylobster/mnist-pipeline-clean:latest") clean.after(test) clean.add_volume(data_volume) clean.add_volume_mount(data_volume_mount) clean.add_env_variable(data_directory_env) clean.add_volume(models_volume) clean.add_volume_mount(models_volume_mount) clean.add_env_variable(models_directory_env) if __name__ == "__main__": import sys import kfp.compiler as compiler if len(sys.argv) != 2: print("Usage: python pipeline.py output_file") sys.exit(-1) filename = sys.argv[1] compiler.Compiler().compile(pipeline_definition, filename)
# register model operations['register'] = dsl.ContainerOp( name='register', image='kubeflowregistry.azurecr.io/kubeflow/register:' + str(imagetag), command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', tenant_id, '--service_principal_id', service_principal_id, '--service_principal_password', service_principal_password, '--subscription_id', subscription_id, '--resource_group', resource_group, '--workspace', workspace ]) operations['register'].after(operations['training']) for _, op in operations.items(): op.container.set_image_pull_policy("Always") op.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( claim_name='azure-managed-disk'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure')) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
@dsl.pipeline( name='FfDL pipeline', description= 'A pipeline for machine learning workflow using Fabric for Deep Learning and Seldon.' ) def ffdlPipeline( GITHUB_TOKEN='', CONFIG_FILE_URL='https://raw.githubusercontent.com/user/repository/branch/creds.ini', model_def_file_path='gender-classification.zip', manifest_file_path='manifest.yml', model_deployment_name='gender-classifier', model_class_name='ThreeLayerCNN', model_class_file='gender_classification.py'): """A pipeline for end to end machine learning workflow.""" create_secrets = configuration_op(token=GITHUB_TOKEN, url=CONFIG_FILE_URL, name=secret_name) train = train_op(model_def_file_path, manifest_file_path).apply( params.use_ai_pipeline_params(secret_name)) serve = serve_op(train.output, model_deployment_name, model_class_name, model_class_file).apply( params.use_ai_pipeline_params(secret_name)) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(ffdlPipeline, __file__ + '.tar.gz')
def test_operator_to_template(self): """Test converting operator to template""" with dsl.Pipeline('somename') as p: msg1 = dsl.PipelineParam('msg1') msg2 = dsl.PipelineParam('msg2', value='value2') op = dsl.ContainerOp( name='echo', image='image', command=['sh', '-c'], arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)], file_outputs={'merged': '/tmp/message.txt'}) golden_output = { 'container': { 'image': 'image', 'args': [ 'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt' ], 'command': ['sh', '-c'], }, 'inputs': { 'parameters': [ { 'name': 'msg1' }, { 'name': 'msg2', 'value': 'value2' }, ] }, 'name': 'echo', 'outputs': { 'parameters': [{ 'name': 'echo-merged', 'valueFrom': { 'path': '/tmp/message.txt' } }], 'artifacts': [{ 'name': 'mlpipeline-ui-metadata', 'path': '/mlpipeline-ui-metadata.json', 's3': { 'accessKeySecret': { 'key': 'accesskey', 'name': 'mlpipeline-minio-artifact', }, 'bucket': 'mlpipeline', 'endpoint': 'minio-service.kubeflow:9000', 'insecure': True, 'key': 'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz', 'secretKeySecret': { 'key': 'secretkey', 'name': 'mlpipeline-minio-artifact', } } }, { 'name': 'mlpipeline-metrics', 'path': '/mlpipeline-metrics.json', 's3': { 'accessKeySecret': { 'key': 'accesskey', 'name': 'mlpipeline-minio-artifact', }, 'bucket': 'mlpipeline', 'endpoint': 'minio-service.kubeflow:9000', 'insecure': True, 'key': 'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz', 'secretKeySecret': { 'key': 'secretkey', 'name': 'mlpipeline-minio-artifact', } } }] } } self.maxDiff = None self.assertEqual(golden_output, compiler.Compiler()._op_to_template(op))
train_task = train_op(EPOCHS, STEPS, BATCH_SIZE, HIDDEN_LAYER_SIZE, LEARNING_RATE)\ .add_pvolumes(pvolumes_dict)\ .after(data_transformation_task) train_task.container.working_dir = "/home/jovyan/kale/examples/taxi-cab-classification" train_task.container.set_security_context( k8s_client.V1SecurityContext(run_as_user=0)) eval_task = eval_op(EPOCHS, STEPS, BATCH_SIZE, HIDDEN_LAYER_SIZE, LEARNING_RATE)\ .add_pvolumes(pvolumes_dict)\ .after(train_task) eval_task.container.working_dir = "/home/jovyan/kale/examples/taxi-cab-classification" eval_task.container.set_security_context( k8s_client.V1SecurityContext(run_as_user=0)) if __name__ == "__main__": pipeline_func = auto_generated_pipeline pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz' import kfp.compiler as compiler compiler.Compiler().compile(pipeline_func, pipeline_filename) # Get or create an experiment and submit a pipeline run import kfp client = kfp.Client() experiment = client.create_experiment('Taxicab') # Submit a pipeline run run_name = 'taxicab-rhxwc_run' run_result = client.run_pipeline( experiment.id, run_name, pipeline_filename, {})
import kfp import os component_root = "/home/jovyan/src" # Load the component by calling load_component_from_file or load_component_from_url # To load the component, the pipeline author only needs to have access to the component.yaml file. # The Kubernetes cluster executing the pipeline needs access to the container image specified in the component. echo = kfp.components.load_component_from_file( os.path.join(component_root, 'component.yaml')) # dummy_op = kfp.components.load_component_from_url('http://....../component.yaml') # Define a pipeline and create a task from a component: @kfp.dsl.pipeline(name='My pipeline', description='') def my_pipeline(): compo1 = echo(input_1_uri='https://www.w3.org/TR/PNG/iso_8859-1.txt') if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(my_pipeline, 'norok_reusable_compo_pipeline.tar.gz')
workflow = '{{workflow.name}}' # set the flag to use GPU trainer use_gpu = False preprocess = dataflow_tf_transform_op( train, evaluation, schema, project, preprocess_mode, '', '%s/%s/transformed' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) training = kubeflow_tf_training_op(preprocess.output, schema, learning_rate, hidden_layer_size, steps, target, '', '%s/%s/train' % (output, workflow), use_gpu=use_gpu).apply( gcp.use_gcp_secret('user-gcp-sa')) prediction = dataflow_tf_predict_op( evaluation, schema, target, training.output, predict_mode, project, '%s/%s/predict' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix = confusion_matrix_op( prediction.output, '%s/%s/confusionmatrix' % (output, workflow)).apply( gcp.use_gcp_secret('user-gcp-sa')) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(kubeflow_training, __file__ + '.tar.gz')
) @dsl.pipeline(name='bikes_weather', description='Model bike rental duration given weather') def bikes_weather( #pylint: disable=unused-argument working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE', data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/', epochs: 'Integer' = 1, steps_per_epoch: 'Integer' = -1, # if -1, don't override normal calcs based on dataset size load_checkpoint: String = ''): train = train_op(data_dir=data_dir, workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER), epochs=epochs, steps_per_epoch=steps_per_epoch, load_checkpoint=load_checkpoint).apply( gcp.use_gcp_secret('user-gcp-sa')) serve = serve_op(model_path=train.outputs['train_output_path'], model_name='bikesw').apply( gcp.use_gcp_secret('user-gcp-sa')) train.set_gpu_limit(1) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz')
volume=step2.pvolume ) step3 = dsl.ContainerOp( name="step3_copy", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["mkdir /data/step3 && " "cp -av /data/step2/file1 /data/step3/file3"], pvolumes={"/data": step2.pvolume} ) step3_snap = dsl.VolumeSnapshotOp( name="step3_snap", resource_name="step3_snap", volume=step3.pvolume ) step4 = dsl.ContainerOp( name="step4_output", image="library/bash:4.4.23", command=["cat", "/data/step2/file1", "/data/step3/file3"], pvolumes={"/data": step3.pvolume} ) if __name__ == "__main__": import kfp.compiler as compiler compiler.Compiler().compile(volume_snapshotop_sequential, __file__ + ".tar.gz")
url=CONFIG_FILE_URL, name=secret_name) # op2 - this operation trains the model with the model codes and data saved in the cloud object store wml_train = train_op(config=get_configuration.output, train_code=train_code, execution_command=execution_command, framework=framework, framework_version=framework_version, runtime=runtime, runtime_version=runtime_version, run_definition=run_definition, run_name=run_name).apply( params.use_ai_pipeline_params(secret_name)) # op3 - this operation stores the model trained above wml_store = store_op(wml_train.output, model_name).apply( params.use_ai_pipeline_params(secret_name)) # op4 - this operation deploys the model to a web service and run scoring with the payload in the cloud object store wml_deploy = deploy_op(wml_store.output, model_name, scoring_payload).apply( params.use_ai_pipeline_params(secret_name)) if __name__ == '__main__': # compile the pipeline import kfp.compiler as compiler pipeline_filename = kfp_wml_pipeline.__name__ + '.zip' compiler.Compiler().compile(kfp_wml_pipeline, pipeline_filename)
@dsl.pipeline( name='Exit Handler', description= 'Download a message and print it out. Exit Handler will run at the end.') def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_op = dsl.ContainerOp(name='finally', image='library/bash:4.4.23', command=['echo', 'exit!']) with dsl.ExitHandler(exit_op): op1 = dsl.ContainerOp( name='download', image='google/cloud-sdk:216.0.0', command=['sh', '-c'], arguments=['gsutil cat %s | tee /tmp/results.txt' % url], file_outputs={'downloaded': '/tmp/results.txt'}) op2 = dsl.ContainerOp(name='echo', image='library/bash:4.4.23', command=['sh', '-c'], arguments=['echo %s' % op1.output]) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(download_and_print, __file__ + '.zip')
model='bolts', version='beta1', tf_version='1.9', train_csv='gs://bolts_image_dataset/bolt_images_train.csv', validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv', labels='gs://bolts_image_dataset/labels.txt', depth=50, train_batch_size=1024, eval_batch_size=1024, steps_per_eval=250, train_steps=10000, num_train_images=218593, num_eval_images=54648, num_label_classes=10): preprocess = resnet_preprocess_op(project_id, output, train_csv, validation_csv, labels).apply(gcp.use_gcp_secret()) train = resnet_train_op(preprocess.output, output, region, depth, train_batch_size, eval_batch_size, steps_per_eval, train_steps, num_train_images, num_eval_images, num_label_classes, tf_version).apply(gcp.use_gcp_secret()) deploy = resnet_deploy_op(train.output, model, version, project_id, region, tf_version).apply(gcp.use_gcp_secret()) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(resnet_train, __file__ + '.tar.gz')
from kfp.dsl import PipelineVolume @dsl.pipeline(name="Volume Op DAG", description="The second example of the design doc.") def volume_op_dag(): dataset = PipelineVolume("your-dataset") step1 = dsl.ContainerOp(name="step1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 1|tee /data/file1"], pvolumes={"/data": dataset}) step2 = dsl.ContainerOp(name="step2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["cp /data/file1 /data/file2"], pvolumes={"/data": step1.pvolume}) step3 = dsl.ContainerOp(name="step3", image="library/bash:4.4.23", command=["cat", "/mnt/file1", "/mnt/file2"], pvolumes={"/mnt": step2.pvolume}) if __name__ == "__main__": import kfp.compiler as compiler compiler.Compiler().compile(volume_op_dag, __file__ + ".tar.gz")
name='ccc', host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.2_arm"))).add_volume_mount( k8s_client.V1VolumeMount(mount_path="/home/Cambricon-Test-v8.2_arm", name='ccc')).add_volume(k8s_client.V1Volume( name='ddd', host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/arm_v8.0/v8.0_arm/ARM64-v8.0/arm64/congcan"))).add_volume_mount( k8s_client.V1VolumeMount(mount_path="/home/congcan", name='ddd')).add_volume(k8s_client.V1Volume( name='eee', host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.0/Cambricon-MLU100/datasets"))).add_volume_mount( k8s_client.V1VolumeMount(mount_path="/home/datasets", name='eee')).add_volume(k8s_client.V1Volume( name='fff', host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.0/Cambricon-MLU100/models"))).add_volume_mount( k8s_client.V1VolumeMount(mount_path="/home/models", name='fff')).add_node_selector_constraint('beta.kubernetes.io/arch', 'arm64') camb.add_resource_limit("cambricon.com/mlu", "1") # 挂载节点上的设备驱动 device_name = "dev-cambricon" camb.add_volume(k8s_client.V1Volume(name=device_name, host_path=k8s_client.V1HostPathVolumeSource( path="/dev/cambricon_c10Dev0"))).add_volume_mount( k8s_client.V1VolumeMount(name=device_name, mount_path="/dev/cambricon_c10Dev0")).add_node_selector_constraint('beta.kubernetes.io/arch', 'arm64') # 设置安全策略 # camb._container.set_security_context(k8s_client.V1SecurityContext(privileged=True)) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(demo, "demo.tar.gz")
data='user-susan:/training', gpus=1, workers=1, cpu_limit='2', metric='images/sec', memory_limit='10Gi'): """A pipeline for end to end machine learning workflow.""" env = ['NCCL_DEBUG=INFO', 'GIT_SYNC_BRANCH={0}'.format(git_sync_branch)] train = arena.mpi_job_op(name="all-reduce", image=image, env=env, data=[data], workers=workers, sync_source=sync_source, gpus=gpus, cpu_limit=cpu_limit, memory_limit=memory_limit, metrics=[metric], command=""" mpirun python code/benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --model resnet101 \ --batch_size {0} --variable_update horovod --optimizer {1}\ --summary_verbosity=3 --save_summaries_steps=10 """.format(batch_size, optimizer)) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(mpirun_pipeline, __file__ + '.tar.gz')
transform_op = TransformOp('transform', project, region, create_cluster_op.output, train_data, eval_data, target, analyze_op.output, '%s/{{workflow.name}}/transform' % output) train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'], transform_op.outputs['eval'], target, analyze_op.output, workers, rounds, '%s/{{workflow.name}}/model' % output) predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'], train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output) confusion_matrix_op = ConfusionMatrixOp( 'confusion-matrix', predict_op.output, '%s/{{workflow.name}}/confusionmatrix' % output) roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(xgb_train_pipeline, __file__ + '.tar.gz')
name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) op_out = dsl.ContainerOp( name="my-out-cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo %s" % my_pipe_param], ) if __name__ == '__main__': from kfp import compiler import kfp import time client = kfp.Client(host='127.0.0.1:8080/pipeline') print(compiler.Compiler().compile(pipeline, package_path=None)) pkg_path = '/tmp/witest_pkg.tar.gz' compiler.Compiler().compile(pipeline, package_path=pkg_path) exp = client.create_experiment('withparams_exp') client.run_pipeline( experiment_id=exp.id, job_name='withitem_nested_{}'.format(time.time()), pipeline_package_path=pkg_path, params={}, )
gcp.use_gcp_secret(secret_name='user-gcp-sa', secret_file_path_in_volume='/user-gcp-sa.json', volume_name='gcp-credentials-user-gcp-sa')) tfserve = dsl.ContainerOp( name='tfserve', image='gcr.io/speedy-aurora-193605/retina-tfserve:latest', arguments=[ "--model_name", model_name, "--model_path", save_model_dir, "--num_gpus", num_gpus_serve, ], # file_outputs={'output': '/tmp/output'} ).apply( gcp.use_gcp_secret(secret_name='admin-gcp-sa', secret_file_path_in_volume='/admin-gcp-sa.json', volume_name='gcp-credentials-admin-gcp-sa')) train.set_gpu_limit('2') train.set_memory_request('8G') train.set_cpu_request('4') tfserve.after(train) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(dp_inf_pipe, 'train_admin_sa.tar.gz')
def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite): if pipeline_export_format not in ["yaml", "py"]: raise ValueError("Pipeline export format {} not recognized.".format(pipeline_export_format)) t0_all = time.time() timestamp = datetime.now().strftime("%m%d%H%M%S") pipeline_name = pipeline.name pipeline_version_name = f'{pipeline_name}-{timestamp}' # work around https://github.com/kubeflow/pipelines/issues/5172 experiment_name = pipeline_name.lower() # Unique identifier for the pipeline run job_name = f'{pipeline_name}-{timestamp}' # Unique location on COS where the pipeline run artifacts # will be stored cos_directory = f'{pipeline_name}-{timestamp}' # Since pipeline_export_path may be relative to the notebook directory, ensure # we're using its absolute form. absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path) runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES, name=pipeline.runtime_config) api_endpoint = runtime_configuration.metadata['api_endpoint'] namespace = runtime_configuration.metadata.get('user_namespace') engine = runtime_configuration.metadata.get('engine') cos_secret = runtime_configuration.metadata.get('cos_secret') if os.path.exists(absolute_pipeline_export_path) and not overwrite: raise ValueError("File " + absolute_pipeline_export_path + " already exists.") self.log_pipeline_info(pipeline_name, f"exporting pipeline as a .{pipeline_export_format} file") if pipeline_export_format != "py": # Export pipeline as static configuration file (YAML formatted) try: # Exported pipeline is not associated with an experiment # or a version. The association is established when the # pipeline is imported into KFP by the user. pipeline_function = lambda: self._cc_pipeline(pipeline, pipeline_name, cos_directory=cos_directory) # nopep8 if 'Tekton' == engine: self.log.info("Compiling pipeline for Tekton engine") kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path) else: self.log.info("Compiling pipeline for Argo engine") kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path) except Exception as ex: if ex.__cause__: raise RuntimeError(str(ex)) from ex raise RuntimeError('Error pre-processing pipeline {} for export at {}'. format(pipeline_name, absolute_pipeline_export_path), str(ex)) from ex else: # Export pipeline as Python DSL # Load template from installed elyra package loader = PackageLoader('elyra', 'templates/kfp') template_env = Environment(loader=loader, trim_blocks=True) template_env.filters['to_basename'] = lambda path: os.path.basename(path) template = template_env.get_template('kfp_template.jinja2') defined_pipeline = self._cc_pipeline(pipeline, pipeline_name, pipeline_version=pipeline_version_name, experiment_name=experiment_name, cos_directory=cos_directory, export=True) description = f'Created with Elyra {__version__} pipeline editor using {pipeline.source}.' for key, operation in defined_pipeline.items(): self.log.debug("component :\n " "container op name : %s \n " "inputs : %s \n " "outputs : %s \n ", operation.name, operation.inputs, operation.outputs) # The exported pipeline is by default associated with # an experiment. # The user can manually customize the generated code # and change the associations as desired. python_output = template.render(operations_list=defined_pipeline, pipeline_name=pipeline_name, pipeline_version=pipeline_version_name, experiment_name=experiment_name, run_name=job_name, engine=engine, cos_secret=cos_secret, namespace=namespace, api_endpoint=api_endpoint, pipeline_description=description, writable_container_dir=self.WCD) # Write to Python file and fix formatting with open(absolute_pipeline_export_path, "w") as fh: autopep_output = autopep8.fix_code(python_output) output_to_file = format_str(autopep_output, mode=FileMode()) fh.write(output_to_file) self.log_pipeline_info(pipeline_name, "pipeline rendered", duration=(time.time() - t0_all)) self.log_pipeline_info(pipeline_name, f"pipeline exported: {pipeline_export_path}", duration=(time.time() - t0_all)) return pipeline_export_path # Return the input value, not its absolute form
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import kfp.dsl as dsl @dsl.pipeline( name="Param Substitutions", description="Test the same PipelineParam getting substituted in multiple " "places") def param_substitutions(): vop = dsl.VolumeOp(name="create_volume", resource_name="data", size="1Gi") op = dsl.ContainerOp(name="cop", image="image", arguments=["--param", vop.output], pvolumes={"/mnt": vop.volume}) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(param_substitutions, __file__ + '.tar.gz')
def process(self, pipeline): """Runs a pipeline on Kubeflow Pipelines Each time a pipeline is processed, a new version is uploaded and run under the same experiment name. """ t0_all = time.time() timestamp = datetime.now().strftime("%m%d%H%M%S") runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES, name=pipeline.runtime_config) api_endpoint = runtime_configuration.metadata['api_endpoint'] cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_bucket = runtime_configuration.metadata['cos_bucket'] user_namespace = runtime_configuration.metadata.get('user_namespace') # TODO: try to encapsulate the info below api_username = runtime_configuration.metadata.get('api_username') api_password = runtime_configuration.metadata.get('api_password') engine = runtime_configuration.metadata.get('engine') pipeline_name = pipeline.name try: # Connect to the Kubeflow server, determine whether it is secured, # and if it is try to authenticate with the user-provided credentials # (if any were defined in the runtime configuration) endpoint = api_endpoint.replace('/pipeline', '') auth_info = \ KfpPipelineProcessor._get_user_auth_session_cookie(endpoint, api_username, api_password) self.log.debug(f"Kubeflow authentication info: {auth_info}") if auth_info['endpoint_secured'] and \ auth_info['authservice_session_cookie'] is None: # Kubeflow is secured but our attempt to authenticate did # not yield the expected results. Log the collected authentication # information and abort processing. self.log.warning(f"Kubeflow authentication info: {auth_info}") raise RuntimeError(f"Error connecting to Kubeflow at '{endpoint}'" f": Authentication request failed. Check the " f"Kubeflow Pipelines credentials in runtime " f"configuration '{pipeline.runtime_config}'.") # Create a KFP client if 'Tekton' == engine: client = TektonClient(host=api_endpoint, cookies=auth_info['authservice_session_cookie']) else: client = ArgoClient(host=api_endpoint, cookies=auth_info['authservice_session_cookie']) # Determine whether a pipeline with the provided # name already exists pipeline_id = client.get_pipeline_id(pipeline_name) if pipeline_id is None: # The KFP default version name is the pipeline # name pipeline_version_name = pipeline_name else: # Append timestamp to generate unique version name pipeline_version_name = f'{pipeline_name}-{timestamp}' # Establish a 1:1 relationship with an experiment # work around https://github.com/kubeflow/pipelines/issues/5172 experiment_name = pipeline_name.lower() # Unique identifier for the pipeline run job_name = f'{pipeline_name}-{timestamp}' # Unique location on COS where the pipeline run artifacts # will be stored cos_directory = f'{pipeline_name}-{timestamp}' except MaxRetryError as ex: raise RuntimeError('Error connecting to pipeline server {}'.format(api_endpoint)) from ex except LocationValueError as lve: if api_username: raise ValueError("Failure occurred uploading pipeline, check your credentials") from lve else: raise lve # Verify that user-entered namespace is valid try: client.list_experiments(namespace=user_namespace, page_size=0) except ApiException as ae: error_msg = f"{ae.reason} ({ae.status})" if ae.body: error_body = json.loads(ae.body) error_msg += f": {error_body['error']}" if error_msg[-1] not in ['.', '?', '!']: error_msg += '.' namespace = "namespace" if not user_namespace else f"namespace {user_namespace}" self.log.error(f"Error validating {namespace}: {error_msg}") raise RuntimeError(f"Error validating {namespace}: {error_msg} " + "Please validate your runtime configuration details and retry.") from ae self.log_pipeline_info(pipeline_name, "submitting pipeline") with tempfile.TemporaryDirectory() as temp_dir: pipeline_path = os.path.join(temp_dir, f'{pipeline_name}.tar.gz') self.log.debug("Creating temp directory %s", temp_dir) # Compile the new pipeline try: pipeline_function = lambda: self._cc_pipeline(pipeline, # nopep8 E731 pipeline_name=pipeline_name, pipeline_version=pipeline_version_name, experiment_name=experiment_name, cos_directory=cos_directory) if 'Tekton' == engine: kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, pipeline_path) else: kfp_argo_compiler.Compiler().compile(pipeline_function, pipeline_path) except Exception as ex: if ex.__cause__: raise RuntimeError(str(ex)) from ex raise RuntimeError('Error pre-processing pipeline {} for engine {} at {}'. format(pipeline_name, engine, pipeline_path), str(ex)) from ex self.log.debug("Kubeflow Pipeline was created in %s", pipeline_path) # Upload the compiled pipeline, create an experiment and run try: description = f"Created with Elyra {__version__} pipeline editor using '{pipeline.source}'." t0 = time.time() if pipeline_id is None: # Upload new pipeline. The call returns # a unique pipeline id. kfp_pipeline = \ client.upload_pipeline(pipeline_path, pipeline_name, description) pipeline_id = kfp_pipeline.id version_id = None else: # Upload a pipeline version. The call returns # a unique version id. kfp_pipeline = \ client.upload_pipeline_version(pipeline_path, pipeline_version_name, pipeline_id=pipeline_id) version_id = kfp_pipeline.id self.log_pipeline_info(pipeline_name, 'pipeline uploaded', duration=(time.time() - t0)) except MaxRetryError as ex: raise RuntimeError('Error connecting to pipeline server {}'.format(api_endpoint)) from ex except LocationValueError as lve: if api_username: raise ValueError("Failure occurred uploading pipeline, check your credentials") from lve else: raise lve # Create a new experiment. If it already exists this is # a no-op. experiment = client.create_experiment(name=experiment_name, namespace=user_namespace) self.log_pipeline_info(pipeline_name, f'Created experiment {experiment_name}', duration=(time.time() - t0_all)) # Run the pipeline (or specified pipeline version) run = client.run_pipeline(experiment_id=experiment.id, job_name=job_name, pipeline_id=pipeline_id, version_id=version_id) self.log_pipeline_info(pipeline_name, f"pipeline submitted: {api_endpoint}/#/runs/details/{run.id}", duration=(time.time() - t0_all)) return KfpPipelineProcessorResponse( run_url=f'{api_endpoint}/#/runs/details/{run.id}', object_storage_url=f'{cos_endpoint}', object_storage_path=f'/{cos_bucket}/{cos_directory}', ) return None
training_output = '%s/{{workflow.name}}/train' % output analysis_output = '%s/{{workflow.name}}/analysis' % output prediction_output = '%s/{{workflow.name}}/predict' % output tf_server_name = 'taxi-cab-classification-model-{{workflow.name}}' validation = dataflow_tf_data_validation_op(train, evaluation, column_names, key_columns, project, validation_mode, validation_output) schema = '%s/schema.json' % validation.outputs['output'] preprocess = dataflow_tf_transform_op(train, evaluation, schema, project, preprocess_mode, preprocess_module, transform_output) training = tf_train_op(preprocess.output, schema, learning_rate, hidden_layer_size, steps, target, preprocess_module, training_output) analysis = dataflow_tf_model_analyze_op(training.output, evaluation, schema, project, analyze_mode, analyze_slice_column, analysis_output) prediction = dataflow_tf_predict_op(evaluation, schema, target, training.output, predict_mode, project, prediction_output) deploy = kubeflow_deploy_op(training.output, tf_server_name) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(taxi_cab_classification, __file__ + '.tar.gz')
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import kfp.dsl as dsl class RandomFailure1Op(dsl.ContainerOp): """A component that fails randomly.""" def __init__(self, exit_codes): super(RandomFailure1Op, self).__init__( name='random_failure', image='python:alpine3.6', command=['python', '-c'], arguments=[ "import random; import sys; exit_code = random.choice([%s]); print(exit_code); sys.exit(exit_code)" % exit_codes ]) @dsl.pipeline(name='pipeline includes two steps which fail randomly.', description='shows how to use ContainerOp set_retry().') def retry_sample_pipeline(): op1 = RandomFailure1Op('0,1,2,3').set_retry(10) op2 = RandomFailure1Op('0,1').set_retry(5) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(retry_sample_pipeline, __file__ + '.tar.gz')
"--out-path", lr_prediction_path, "--c-param", lr_c_param, "--action", "train", "--model-path", lr_model_path, ], pvolumes={"/mnt": vectorize_step.pvolume}) try: seldon_config = yaml.load( open("../deploy_pipeline/seldon_production_pipeline.yaml")) except: # If this file is run from the project core directory seldon_config = yaml.load( open("deploy_pipeline/seldon_production_pipeline.yaml")) deploy_step = dsl.ResourceOp( name="seldondeploy", k8s_resource=seldon_config, attribute_outputs={"name": "{.metadata.name}"}) deploy_step.after(predict_step) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(nlp_pipeline, __file__ + '.tar.gz')
volume=vop.volume ) compo2 = multiply( input_file='/data/input_compo2.txt', multiplier=7, output_uri='/data/output_compo2.txt', output_uri_in_file='/data/output_compo2_uri.txt', volume=vop.volume ) # compo3 = concatenate( # input_file1='/data/input_compo1.txt', # input_file2='/data/input_compo2.txt', # output_uri='/data/output_compo3.txt', # output_uri_in_file='/data/output_compo3_uri.txt', # volume=vop.volume # ) compo3 = concatenate( input_file1=compo1.outputs['output_uri_in_file'], input_fi1e2=compo2.outputs['output_uri_in_file'], output_uri='/data/output_compo3.txt', output_uri_in_file='/data/output_compo3_uri.txt', volume=vop.volume ) if __name__ == '__main__': import kfp.compiler as compiler compiler.Compiler().compile(my_pipeline, 'small_pipeline.tar.gz')