示例#1
0
    def compile_deployment_pipeline(self):
        """Compile pipeline in a deployment format."""
        operator_specs = self._create_operator_specs_json()
        graph = self._create_graph_json()

        @dsl.pipeline(name='Common Seldon Deployment.')
        def deployment_pipeline():
            seldonserving = SELDON_DEPLOYMENT.substitute({
                "namespace":
                KF_PIPELINES_NAMESPACE,
                "experimentId":
                self._experiment_id,
                "deploymentName":
                self._name,
                "componentSpecs":
                operator_specs,
                "graph":
                graph
            })

            seldon_deployment = json.loads(seldonserving)
            serve_op = dsl.ResourceOp(
                name="deployment",
                k8s_resource=seldon_deployment,
                success_condition="status.state == Available").set_timeout(300)

            for _, operator in self._operators.items():
                operator.build_operator()
                serve_op.after(operator.export_notebook)

        try:
            # compiler raises execption, but produces a valid yaml
            compiler.Compiler().compile(deployment_pipeline,
                                        f'{self._experiment_id}.yaml')
        except RuntimeError:
            pass
示例#2
0
    def test_basic_workflow_without_decorator(self):
        """Test compiling a workflow and appending pipeline params."""
        test_data_dir = os.path.join(os.path.dirname(__file__), 'testdata')
        sys.path.append(test_data_dir)
        import basic_no_decorator
        tmpdir = tempfile.mkdtemp()
        try:
            compiled_workflow = compiler.Compiler().create_workflow(
                basic_no_decorator.save_most_frequent_word,
                'Save Most Frequent', 'Get Most Frequent Word and Save to GCS',
                [
                    basic_no_decorator.message_param,
                    basic_no_decorator.output_path_param
                ])
            with open(os.path.join(test_data_dir, 'basic_no_decorator.yaml'),
                      'r') as f:
                golden = yaml.safe_load(f)

            for workflow in golden, compiled_workflow:
                del workflow['metadata']

            self.assertEqual(golden, compiled_workflow)
        finally:
            shutil.rmtree(tmpdir)
示例#3
0
    def save_workflow(self, name, target, artifact_path=None, ttl=None):
        """create and save a workflow as a yaml or archive file

        :param name:   workflow name
        :param target: target file path (can end with .yaml or .zip)
        :param artifact_path:
                       target path/url for workflow artifacts, the string
                       '{{workflow.uid}}' will be replaced by workflow id
        :param ttl     pipeline ttl in secs (after that the pods will be removed)
        """
        if not name or name not in self._workflows:
            raise ValueError('workflow {} not found'.format(name))

        workflow_path, code, _ = self._get_wf_file(name)
        pipeline = _create_pipeline(self,
                                    workflow_path,
                                    self._function_objects,
                                    secrets=self._secrets)

        artifact_path = artifact_path or self.artifact_path
        conf = new_pipe_meta(artifact_path, ttl=ttl)
        compiler.Compiler().compile(pipeline, target, pipeline_conf=conf)
        if code:
            remove(workflow_path)
示例#4
0
        name='serve',
        image='gcr.io/' + project_name + '/ml-pipeline-kubeflow-trtisserve',
        arguments=[
            "--trtserver_name", trtserver_name, '--model_version',
            model_version, '--orig_model_path',
            '%s/%s' % (mount_dir, model_name), "--model_path",
            '%s/%s' % (storage_bucket, model_dir)
        ])

    webapp = dsl.ContainerOp(
        name='webapp',
        image='gcr.io/' + project_name + '/ml-pipeline-trtis-webapp-launcher',
        arguments=[
            "--workflow_name",
            '%s' % ('{{workflow.name}}', ), "--trtserver_name", trtserver_name,
            "--model_name", model_name, "--model_version",
            str(model_version), "--webapp_prefix", webapp_prefix,
            "--webapp_port",
            str(webapp_port)
        ])

    training.after(preprocessing)
    serve.after(training)
    webapp.after(serve)


if __name__ == '__main__':
    import kfp.compiler as compiler

    compiler.Compiler().compile(end2end_demo, __file__ + '.tar.gz')
示例#5
0
          image: python:alpine3.6
          command:
          - echo
          - {inputValue: msg}
      """)


@dsl.pipeline(name='conditional-execution-pipeline',
              pipeline_root='dummy_root',
              description='Shows how to use dsl.Condition().')
def my_pipeline():
    flip = flip_coin_op()
    with dsl.Condition(flip.output == 'heads'):
        random_num_head = random_num_op(0, 9)()
        with dsl.Condition(random_num_head.output > 5):
            print_op(msg='heads and %s > 5!' % random_num_head.output)
        with dsl.Condition(random_num_head.output <= 5):
            print_op(msg='heads and %s <= 5!' % random_num_head.output)

    with dsl.Condition(flip.output == 'tails'):
        random_num_tail = random_num_op(10, 19)()
        with dsl.Condition(random_num_tail.output > 15):
            print_op(msg='tails and %s > 15!' % random_num_tail.output)
        with dsl.Condition(random_num_tail.output <= 15):
            print_op(msg='tails and %s <= 15!' % random_num_tail.output)


if __name__ == '__main__':
    compiler.Compiler().compile(pipeline_func=my_pipeline,
                                package_path=__file__.replace('.py', '.yaml'))
    test.add_volume_mount(data_volume_mount)
    test.add_env_variable(data_directory_env)
    test.add_env_variable(hydrosphere_address_env)
    test.add_env_variable(application_name_env)
    test.add_env_variable(signature_name_env)
    test.add_env_variable(warmup_count_env)
    test.add_env_variable(acceptable_accuracy_env)

    # 6. Clean environment
    clean = dsl.ContainerOp(name="clean",
                            image="tidylobster/mnist-pipeline-clean:latest")
    clean.after(test)

    clean.add_volume(data_volume)
    clean.add_volume_mount(data_volume_mount)
    clean.add_env_variable(data_directory_env)
    clean.add_volume(models_volume)
    clean.add_volume_mount(models_volume_mount)
    clean.add_env_variable(models_directory_env)


if __name__ == "__main__":
    import sys
    import kfp.compiler as compiler
    if len(sys.argv) != 2:
        print("Usage: python pipeline.py output_file")
        sys.exit(-1)

    filename = sys.argv[1]
    compiler.Compiler().compile(pipeline_definition, filename)
    # register model
    operations['register'] = dsl.ContainerOp(
        name='register',
        image='kubeflowregistry.azurecr.io/kubeflow/register:' + str(imagetag),
        command=['python'],
        arguments=[
            '/scripts/register.py', '--base_path', persistent_volume_path,
            '--model', 'latest.h5', '--model_name', model_name, '--tenant_id',
            tenant_id, '--service_principal_id', service_principal_id,
            '--service_principal_password', service_principal_password,
            '--subscription_id', subscription_id, '--resource_group',
            resource_group, '--workspace', workspace
        ])
    operations['register'].after(operations['training'])

    for _, op in operations.items():
        op.container.set_image_pull_policy("Always")
        op.add_volume(
            k8s_client.V1Volume(
                name='azure',
                persistent_volume_claim=k8s_client.
                V1PersistentVolumeClaimVolumeSource(
                    claim_name='azure-managed-disk'))).add_volume_mount(
                        k8s_client.V1VolumeMount(mount_path='/mnt/azure',
                                                 name='azure'))


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(tacosandburritos_train, __file__ + '.tar.gz')
示例#8
0
@dsl.pipeline(
    name='FfDL pipeline',
    description=
    'A pipeline for machine learning workflow using Fabric for Deep Learning and Seldon.'
)
def ffdlPipeline(
        GITHUB_TOKEN='',
        CONFIG_FILE_URL='https://raw.githubusercontent.com/user/repository/branch/creds.ini',
        model_def_file_path='gender-classification.zip',
        manifest_file_path='manifest.yml',
        model_deployment_name='gender-classifier',
        model_class_name='ThreeLayerCNN',
        model_class_file='gender_classification.py'):
    """A pipeline for end to end machine learning workflow."""

    create_secrets = configuration_op(token=GITHUB_TOKEN,
                                      url=CONFIG_FILE_URL,
                                      name=secret_name)

    train = train_op(model_def_file_path, manifest_file_path).apply(
        params.use_ai_pipeline_params(secret_name))

    serve = serve_op(train.output, model_deployment_name, model_class_name,
                     model_class_file).apply(
                         params.use_ai_pipeline_params(secret_name))


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(ffdlPipeline, __file__ + '.tar.gz')
示例#9
0
    def test_operator_to_template(self):
        """Test converting operator to template"""

        with dsl.Pipeline('somename') as p:
            msg1 = dsl.PipelineParam('msg1')
            msg2 = dsl.PipelineParam('msg2', value='value2')
            op = dsl.ContainerOp(
                name='echo',
                image='image',
                command=['sh', '-c'],
                arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)],
                file_outputs={'merged': '/tmp/message.txt'})
        golden_output = {
            'container': {
                'image':
                'image',
                'args': [
                    'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt'
                ],
                'command': ['sh', '-c'],
            },
            'inputs': {
                'parameters': [
                    {
                        'name': 'msg1'
                    },
                    {
                        'name': 'msg2',
                        'value': 'value2'
                    },
                ]
            },
            'name': 'echo',
            'outputs': {
                'parameters': [{
                    'name': 'echo-merged',
                    'valueFrom': {
                        'path': '/tmp/message.txt'
                    }
                }],
                'artifacts': [{
                    'name': 'mlpipeline-ui-metadata',
                    'path': '/mlpipeline-ui-metadata.json',
                    's3': {
                        'accessKeySecret': {
                            'key': 'accesskey',
                            'name': 'mlpipeline-minio-artifact',
                        },
                        'bucket': 'mlpipeline',
                        'endpoint': 'minio-service.kubeflow:9000',
                        'insecure': True,
                        'key':
                        'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz',
                        'secretKeySecret': {
                            'key': 'secretkey',
                            'name': 'mlpipeline-minio-artifact',
                        }
                    }
                }, {
                    'name': 'mlpipeline-metrics',
                    'path': '/mlpipeline-metrics.json',
                    's3': {
                        'accessKeySecret': {
                            'key': 'accesskey',
                            'name': 'mlpipeline-minio-artifact',
                        },
                        'bucket': 'mlpipeline',
                        'endpoint': 'minio-service.kubeflow:9000',
                        'insecure': True,
                        'key':
                        'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz',
                        'secretKeySecret': {
                            'key': 'secretkey',
                            'name': 'mlpipeline-minio-artifact',
                        }
                    }
                }]
            }
        }

        self.maxDiff = None
        self.assertEqual(golden_output,
                         compiler.Compiler()._op_to_template(op))
    train_task = train_op(EPOCHS, STEPS, BATCH_SIZE, HIDDEN_LAYER_SIZE, LEARNING_RATE)\
        .add_pvolumes(pvolumes_dict)\
        .after(data_transformation_task)
    train_task.container.working_dir = "/home/jovyan/kale/examples/taxi-cab-classification"
    train_task.container.set_security_context(
        k8s_client.V1SecurityContext(run_as_user=0))

    eval_task = eval_op(EPOCHS, STEPS, BATCH_SIZE, HIDDEN_LAYER_SIZE, LEARNING_RATE)\
        .add_pvolumes(pvolumes_dict)\
        .after(train_task)
    eval_task.container.working_dir = "/home/jovyan/kale/examples/taxi-cab-classification"
    eval_task.container.set_security_context(
        k8s_client.V1SecurityContext(run_as_user=0))


if __name__ == "__main__":
    pipeline_func = auto_generated_pipeline
    pipeline_filename = pipeline_func.__name__ + '.pipeline.tar.gz'
    import kfp.compiler as compiler
    compiler.Compiler().compile(pipeline_func, pipeline_filename)

    # Get or create an experiment and submit a pipeline run
    import kfp
    client = kfp.Client()
    experiment = client.create_experiment('Taxicab')

    # Submit a pipeline run
    run_name = 'taxicab-rhxwc_run'
    run_result = client.run_pipeline(
        experiment.id, run_name, pipeline_filename, {})
import kfp
import os
component_root = "/home/jovyan/src"
# Load the component by calling load_component_from_file or load_component_from_url
# To load the component, the pipeline author only needs to have access to the component.yaml file.
# The Kubernetes cluster executing the pipeline needs access to the container image specified in the component.
echo = kfp.components.load_component_from_file(
    os.path.join(component_root, 'component.yaml'))
# dummy_op = kfp.components.load_component_from_url('http://....../component.yaml')


# Define a pipeline and create a task from a component:
@kfp.dsl.pipeline(name='My pipeline', description='')
def my_pipeline():

    compo1 = echo(input_1_uri='https://www.w3.org/TR/PNG/iso_8859-1.txt')


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(my_pipeline,
                                'norok_reusable_compo_pipeline.tar.gz')
    workflow = '{{workflow.name}}'
    # set the flag to use GPU trainer
    use_gpu = False

    preprocess = dataflow_tf_transform_op(
        train, evaluation, schema, project, preprocess_mode, '',
        '%s/%s/transformed' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
    training = kubeflow_tf_training_op(preprocess.output,
                                       schema,
                                       learning_rate,
                                       hidden_layer_size,
                                       steps,
                                       target,
                                       '',
                                       '%s/%s/train' % (output, workflow),
                                       use_gpu=use_gpu).apply(
                                           gcp.use_gcp_secret('user-gcp-sa'))
    prediction = dataflow_tf_predict_op(
        evaluation, schema, target, training.output, predict_mode, project,
        '%s/%s/predict' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
    confusion_matrix = confusion_matrix_op(
        prediction.output, '%s/%s/confusionmatrix' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(kubeflow_training, __file__ + '.tar.gz')
示例#13
0
)


@dsl.pipeline(name='bikes_weather',
              description='Model bike rental duration given weather')
def bikes_weather(  #pylint: disable=unused-argument
    working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
    data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/bikes_weather/',
    epochs: 'Integer' = 1,
    steps_per_epoch:
    'Integer' = -1,  # if -1, don't override normal calcs based on dataset size
    load_checkpoint: String = ''):

    train = train_op(data_dir=data_dir,
                     workdir='%s/%s' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
                     epochs=epochs,
                     steps_per_epoch=steps_per_epoch,
                     load_checkpoint=load_checkpoint).apply(
                         gcp.use_gcp_secret('user-gcp-sa'))

    serve = serve_op(model_path=train.outputs['train_output_path'],
                     model_name='bikesw').apply(
                         gcp.use_gcp_secret('user-gcp-sa'))

    train.set_gpu_limit(1)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(bikes_weather, __file__ + '.tar.gz')
示例#14
0
        volume=step2.pvolume
    )

    step3 = dsl.ContainerOp(
        name="step3_copy",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["mkdir /data/step3 && "
                   "cp -av /data/step2/file1 /data/step3/file3"],
        pvolumes={"/data": step2.pvolume}
    )

    step3_snap = dsl.VolumeSnapshotOp(
        name="step3_snap",
        resource_name="step3_snap",
        volume=step3.pvolume
    )

    step4 = dsl.ContainerOp(
        name="step4_output",
        image="library/bash:4.4.23",
        command=["cat", "/data/step2/file1", "/data/step3/file3"],
        pvolumes={"/data": step3.pvolume}
    )


if __name__ == "__main__":
    import kfp.compiler as compiler
    compiler.Compiler().compile(volume_snapshotop_sequential,
                                __file__ + ".tar.gz")
示例#15
0
                                         url=CONFIG_FILE_URL,
                                         name=secret_name)

    # op2 - this operation trains the model with the model codes and data saved in the cloud object store
    wml_train = train_op(config=get_configuration.output,
                         train_code=train_code,
                         execution_command=execution_command,
                         framework=framework,
                         framework_version=framework_version,
                         runtime=runtime,
                         runtime_version=runtime_version,
                         run_definition=run_definition,
                         run_name=run_name).apply(
                             params.use_ai_pipeline_params(secret_name))

    # op3 - this operation stores the model trained above
    wml_store = store_op(wml_train.output, model_name).apply(
        params.use_ai_pipeline_params(secret_name))

    # op4 - this operation deploys the model to a web service and run scoring with the payload in the cloud object store
    wml_deploy = deploy_op(wml_store.output, model_name,
                           scoring_payload).apply(
                               params.use_ai_pipeline_params(secret_name))


if __name__ == '__main__':
    # compile the pipeline
    import kfp.compiler as compiler
    pipeline_filename = kfp_wml_pipeline.__name__ + '.zip'
    compiler.Compiler().compile(kfp_wml_pipeline, pipeline_filename)
示例#16
0
@dsl.pipeline(
    name='Exit Handler',
    description=
    'Download a message and print it out. Exit Handler will run at the end.')
def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'):
    """A sample pipeline showing exit handler."""

    exit_op = dsl.ContainerOp(name='finally',
                              image='library/bash:4.4.23',
                              command=['echo', 'exit!'])

    with dsl.ExitHandler(exit_op):

        op1 = dsl.ContainerOp(
            name='download',
            image='google/cloud-sdk:216.0.0',
            command=['sh', '-c'],
            arguments=['gsutil cat %s | tee /tmp/results.txt' % url],
            file_outputs={'downloaded': '/tmp/results.txt'})

        op2 = dsl.ContainerOp(name='echo',
                              image='library/bash:4.4.23',
                              command=['sh', '-c'],
                              arguments=['echo %s' % op1.output])


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(download_and_print, __file__ + '.zip')
示例#17
0
        model='bolts',
        version='beta1',
        tf_version='1.9',
        train_csv='gs://bolts_image_dataset/bolt_images_train.csv',
        validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv',
        labels='gs://bolts_image_dataset/labels.txt',
        depth=50,
        train_batch_size=1024,
        eval_batch_size=1024,
        steps_per_eval=250,
        train_steps=10000,
        num_train_images=218593,
        num_eval_images=54648,
        num_label_classes=10):

    preprocess = resnet_preprocess_op(project_id, output, train_csv,
                                      validation_csv,
                                      labels).apply(gcp.use_gcp_secret())
    train = resnet_train_op(preprocess.output, output, region, depth,
                            train_batch_size, eval_batch_size, steps_per_eval,
                            train_steps, num_train_images, num_eval_images,
                            num_label_classes,
                            tf_version).apply(gcp.use_gcp_secret())
    deploy = resnet_deploy_op(train.output, model, version, project_id, region,
                              tf_version).apply(gcp.use_gcp_secret())


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(resnet_train, __file__ + '.tar.gz')
示例#18
0
from kfp.dsl import PipelineVolume


@dsl.pipeline(name="Volume Op DAG",
              description="The second example of the design doc.")
def volume_op_dag():

    dataset = PipelineVolume("your-dataset")

    step1 = dsl.ContainerOp(name="step1",
                            image="library/bash:4.4.23",
                            command=["sh", "-c"],
                            arguments=["echo 1|tee /data/file1"],
                            pvolumes={"/data": dataset})

    step2 = dsl.ContainerOp(name="step2",
                            image="library/bash:4.4.23",
                            command=["sh", "-c"],
                            arguments=["cp /data/file1 /data/file2"],
                            pvolumes={"/data": step1.pvolume})

    step3 = dsl.ContainerOp(name="step3",
                            image="library/bash:4.4.23",
                            command=["cat", "/mnt/file1", "/mnt/file2"],
                            pvolumes={"/mnt": step2.pvolume})


if __name__ == "__main__":
    import kfp.compiler as compiler
    compiler.Compiler().compile(volume_op_dag, __file__ + ".tar.gz")
示例#19
0
        name='ccc',
        host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.2_arm"))).add_volume_mount(
        k8s_client.V1VolumeMount(mount_path="/home/Cambricon-Test-v8.2_arm", name='ccc')).add_volume(k8s_client.V1Volume(
        name='ddd',
        host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/arm_v8.0/v8.0_arm/ARM64-v8.0/arm64/congcan"))).add_volume_mount(
        k8s_client.V1VolumeMount(mount_path="/home/congcan", name='ddd')).add_volume(k8s_client.V1Volume(
        name='eee',
        host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.0/Cambricon-MLU100/datasets"))).add_volume_mount(
        k8s_client.V1VolumeMount(mount_path="/home/datasets", name='eee')).add_volume(k8s_client.V1Volume(
        name='fff',
        host_path=k8s_client.V1LocalVolumeSource(path="/mnt/xfs/project/camb/v8.0/Cambricon-MLU100/models"))).add_volume_mount(
        k8s_client.V1VolumeMount(mount_path="/home/models", name='fff')).add_node_selector_constraint('beta.kubernetes.io/arch', 'arm64')


    camb.add_resource_limit("cambricon.com/mlu", "1")

    # 挂载节点上的设备驱动
    device_name = "dev-cambricon"
    camb.add_volume(k8s_client.V1Volume(name=device_name, host_path=k8s_client.V1HostPathVolumeSource(
        path="/dev/cambricon_c10Dev0"))).add_volume_mount(
        k8s_client.V1VolumeMount(name=device_name, mount_path="/dev/cambricon_c10Dev0")).add_node_selector_constraint('beta.kubernetes.io/arch', 'arm64')
    # 设置安全策略
    # camb._container.set_security_context(k8s_client.V1SecurityContext(privileged=True))


if __name__ == '__main__':
    import kfp.compiler as compiler

    compiler.Compiler().compile(demo, "demo.tar.gz")

示例#20
0
                    data='user-susan:/training',
                    gpus=1,
                    workers=1,
                    cpu_limit='2',
                    metric='images/sec',
                    memory_limit='10Gi'):
    """A pipeline for end to end machine learning workflow."""

    env = ['NCCL_DEBUG=INFO', 'GIT_SYNC_BRANCH={0}'.format(git_sync_branch)]

    train = arena.mpi_job_op(name="all-reduce",
                             image=image,
                             env=env,
                             data=[data],
                             workers=workers,
                             sync_source=sync_source,
                             gpus=gpus,
                             cpu_limit=cpu_limit,
                             memory_limit=memory_limit,
                             metrics=[metric],
                             command="""
  	mpirun python code/benchmarks/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --model resnet101 \
  	--batch_size {0}  --variable_update horovod --optimizer {1}\
  	--summary_verbosity=3 --save_summaries_steps=10
  	""".format(batch_size, optimizer))


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(mpirun_pipeline, __file__ + '.tar.gz')
示例#21
0
        transform_op = TransformOp('transform', project, region,
                                   create_cluster_op.output, train_data,
                                   eval_data, target, analyze_op.output,
                                   '%s/{{workflow.name}}/transform' % output)

        train_op = TrainerOp('train', project, region,
                             create_cluster_op.output,
                             transform_op.outputs['train'],
                             transform_op.outputs['eval'], target,
                             analyze_op.output, workers, rounds,
                             '%s/{{workflow.name}}/model' % output)

        predict_op = PredictOp('predict', project, region,
                               create_cluster_op.output,
                               transform_op.outputs['eval'], train_op.output,
                               target, analyze_op.output,
                               '%s/{{workflow.name}}/predict' % output)

        confusion_matrix_op = ConfusionMatrixOp(
            'confusion-matrix', predict_op.output,
            '%s/{{workflow.name}}/confusionmatrix' % output)

        roc_op = RocOp('roc', predict_op.output, true_label,
                       '%s/{{workflow.name}}/roc' % output)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(xgb_train_pipeline, __file__ + '.tar.gz')
            name="my-in-coop2",
            image="library/bash:4.4.23",
            command=["sh", "-c"],
            arguments=["echo op2 %s" % item.b],
        )

    op_out = dsl.ContainerOp(
        name="my-out-cop",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo %s" % my_pipe_param],
    )


if __name__ == '__main__':
    from kfp import compiler
    import kfp
    import time
    client = kfp.Client(host='127.0.0.1:8080/pipeline')
    print(compiler.Compiler().compile(pipeline, package_path=None))

    pkg_path = '/tmp/witest_pkg.tar.gz'
    compiler.Compiler().compile(pipeline, package_path=pkg_path)
    exp = client.create_experiment('withparams_exp')
    client.run_pipeline(
        experiment_id=exp.id,
        job_name='withitem_nested_{}'.format(time.time()),
        pipeline_package_path=pkg_path,
        params={},
    )
示例#23
0
        gcp.use_gcp_secret(secret_name='user-gcp-sa',
                           secret_file_path_in_volume='/user-gcp-sa.json',
                           volume_name='gcp-credentials-user-gcp-sa'))

    tfserve = dsl.ContainerOp(
        name='tfserve',
        image='gcr.io/speedy-aurora-193605/retina-tfserve:latest',
        arguments=[
            "--model_name",
            model_name,
            "--model_path",
            save_model_dir,
            "--num_gpus",
            num_gpus_serve,
        ],
        # file_outputs={'output': '/tmp/output'}
    ).apply(
        gcp.use_gcp_secret(secret_name='admin-gcp-sa',
                           secret_file_path_in_volume='/admin-gcp-sa.json',
                           volume_name='gcp-credentials-admin-gcp-sa'))

    train.set_gpu_limit('2')
    train.set_memory_request('8G')
    train.set_cpu_request('4')
    tfserve.after(train)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(dp_inf_pipe, 'train_admin_sa.tar.gz')
示例#24
0
    def export(self, pipeline, pipeline_export_format, pipeline_export_path, overwrite):
        if pipeline_export_format not in ["yaml", "py"]:
            raise ValueError("Pipeline export format {} not recognized.".format(pipeline_export_format))

        t0_all = time.time()
        timestamp = datetime.now().strftime("%m%d%H%M%S")
        pipeline_name = pipeline.name
        pipeline_version_name = f'{pipeline_name}-{timestamp}'
        # work around https://github.com/kubeflow/pipelines/issues/5172
        experiment_name = pipeline_name.lower()
        # Unique identifier for the pipeline run
        job_name = f'{pipeline_name}-{timestamp}'
        # Unique location on COS where the pipeline run artifacts
        # will be stored
        cos_directory = f'{pipeline_name}-{timestamp}'

        # Since pipeline_export_path may be relative to the notebook directory, ensure
        # we're using its absolute form.
        absolute_pipeline_export_path = get_absolute_path(self.root_dir, pipeline_export_path)

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)
        api_endpoint = runtime_configuration.metadata['api_endpoint']
        namespace = runtime_configuration.metadata.get('user_namespace')
        engine = runtime_configuration.metadata.get('engine')
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if os.path.exists(absolute_pipeline_export_path) and not overwrite:
            raise ValueError("File " + absolute_pipeline_export_path + " already exists.")

        self.log_pipeline_info(pipeline_name, f"exporting pipeline as a .{pipeline_export_format} file")
        if pipeline_export_format != "py":
            # Export pipeline as static configuration file (YAML formatted)
            try:
                # Exported pipeline is not associated with an experiment
                # or a version. The association is established when the
                # pipeline is imported into KFP by the user.
                pipeline_function = lambda: self._cc_pipeline(pipeline,
                                                              pipeline_name,
                                                              cos_directory=cos_directory)  # nopep8

                if 'Tekton' == engine:
                    self.log.info("Compiling pipeline for Tekton engine")
                    kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, absolute_pipeline_export_path)
                else:
                    self.log.info("Compiling pipeline for Argo engine")
                    kfp_argo_compiler.Compiler().compile(pipeline_function, absolute_pipeline_export_path)
            except Exception as ex:
                if ex.__cause__:
                    raise RuntimeError(str(ex)) from ex
                raise RuntimeError('Error pre-processing pipeline {} for export at {}'.
                                   format(pipeline_name, absolute_pipeline_export_path), str(ex)) from ex
        else:
            # Export pipeline as Python DSL
            # Load template from installed elyra package

            loader = PackageLoader('elyra', 'templates/kfp')
            template_env = Environment(loader=loader, trim_blocks=True)

            template_env.filters['to_basename'] = lambda path: os.path.basename(path)

            template = template_env.get_template('kfp_template.jinja2')

            defined_pipeline = self._cc_pipeline(pipeline,
                                                 pipeline_name,
                                                 pipeline_version=pipeline_version_name,
                                                 experiment_name=experiment_name,
                                                 cos_directory=cos_directory,
                                                 export=True)

            description = f'Created with Elyra {__version__} pipeline editor using {pipeline.source}.'

            for key, operation in defined_pipeline.items():
                self.log.debug("component :\n "
                               "container op name : %s \n "
                               "inputs : %s \n "
                               "outputs : %s \n ",
                               operation.name,
                               operation.inputs,
                               operation.outputs)

            # The exported pipeline is by default associated with
            # an experiment.
            # The user can manually customize the generated code
            # and change the associations as desired.

            python_output = template.render(operations_list=defined_pipeline,
                                            pipeline_name=pipeline_name,
                                            pipeline_version=pipeline_version_name,
                                            experiment_name=experiment_name,
                                            run_name=job_name,
                                            engine=engine,
                                            cos_secret=cos_secret,
                                            namespace=namespace,
                                            api_endpoint=api_endpoint,
                                            pipeline_description=description,
                                            writable_container_dir=self.WCD)

            # Write to Python file and fix formatting
            with open(absolute_pipeline_export_path, "w") as fh:
                autopep_output = autopep8.fix_code(python_output)
                output_to_file = format_str(autopep_output, mode=FileMode())
                fh.write(output_to_file)

            self.log_pipeline_info(pipeline_name, "pipeline rendered", duration=(time.time() - t0_all))

        self.log_pipeline_info(pipeline_name,
                               f"pipeline exported: {pipeline_export_path}",
                               duration=(time.time() - t0_all))

        return pipeline_export_path  # Return the input value, not its absolute form
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import kfp.dsl as dsl


@dsl.pipeline(
    name="Param Substitutions",
    description="Test the same PipelineParam getting substituted in multiple "
    "places")
def param_substitutions():
    vop = dsl.VolumeOp(name="create_volume", resource_name="data", size="1Gi")

    op = dsl.ContainerOp(name="cop",
                         image="image",
                         arguments=["--param", vop.output],
                         pvolumes={"/mnt": vop.volume})


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(param_substitutions, __file__ + '.tar.gz')
示例#26
0
    def process(self, pipeline):
        """Runs a pipeline on Kubeflow Pipelines

        Each time a pipeline is processed, a new version
        is uploaded and run under the same experiment name.
        """

        t0_all = time.time()
        timestamp = datetime.now().strftime("%m%d%H%M%S")

        runtime_configuration = self._get_metadata_configuration(namespace=MetadataManager.NAMESPACE_RUNTIMES,
                                                                 name=pipeline.runtime_config)

        api_endpoint = runtime_configuration.metadata['api_endpoint']
        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        user_namespace = runtime_configuration.metadata.get('user_namespace')

        # TODO: try to encapsulate the info below
        api_username = runtime_configuration.metadata.get('api_username')
        api_password = runtime_configuration.metadata.get('api_password')

        engine = runtime_configuration.metadata.get('engine')

        pipeline_name = pipeline.name
        try:
            # Connect to the Kubeflow server, determine whether it is secured,
            # and if it is try to authenticate with the user-provided credentials
            # (if any were defined in the runtime configuration)

            endpoint = api_endpoint.replace('/pipeline', '')
            auth_info = \
                KfpPipelineProcessor._get_user_auth_session_cookie(endpoint,
                                                                   api_username,
                                                                   api_password)

            self.log.debug(f"Kubeflow authentication info: {auth_info}")

            if auth_info['endpoint_secured'] and \
               auth_info['authservice_session_cookie'] is None:
                # Kubeflow is secured but our attempt to authenticate did
                # not yield the expected results. Log the collected authentication
                # information and abort processing.
                self.log.warning(f"Kubeflow authentication info: {auth_info}")
                raise RuntimeError(f"Error connecting to Kubeflow at '{endpoint}'"
                                   f": Authentication request failed. Check the "
                                   f"Kubeflow Pipelines credentials in runtime "
                                   f"configuration '{pipeline.runtime_config}'.")

            # Create a KFP client
            if 'Tekton' == engine:
                client = TektonClient(host=api_endpoint,
                                      cookies=auth_info['authservice_session_cookie'])
            else:
                client = ArgoClient(host=api_endpoint,
                                    cookies=auth_info['authservice_session_cookie'])

            # Determine whether a pipeline with the provided
            # name already exists
            pipeline_id = client.get_pipeline_id(pipeline_name)
            if pipeline_id is None:
                # The KFP default version name is the pipeline
                # name
                pipeline_version_name = pipeline_name
            else:
                # Append timestamp to generate unique version name
                pipeline_version_name = f'{pipeline_name}-{timestamp}'
            # Establish a 1:1 relationship with an experiment
            # work around https://github.com/kubeflow/pipelines/issues/5172
            experiment_name = pipeline_name.lower()
            # Unique identifier for the pipeline run
            job_name = f'{pipeline_name}-{timestamp}'
            # Unique location on COS where the pipeline run artifacts
            # will be stored
            cos_directory = f'{pipeline_name}-{timestamp}'

        except MaxRetryError as ex:
            raise RuntimeError('Error connecting to pipeline server {}'.format(api_endpoint)) from ex
        except LocationValueError as lve:
            if api_username:
                raise ValueError("Failure occurred uploading pipeline, check your credentials") from lve
            else:
                raise lve

        # Verify that user-entered namespace is valid
        try:
            client.list_experiments(namespace=user_namespace,
                                    page_size=0)
        except ApiException as ae:
            error_msg = f"{ae.reason} ({ae.status})"
            if ae.body:
                error_body = json.loads(ae.body)
                error_msg += f": {error_body['error']}"
            if error_msg[-1] not in ['.', '?', '!']:
                error_msg += '.'

            namespace = "namespace" if not user_namespace else f"namespace {user_namespace}"

            self.log.error(f"Error validating {namespace}: {error_msg}")
            raise RuntimeError(f"Error validating {namespace}: {error_msg} " +
                               "Please validate your runtime configuration details and retry.") from ae

        self.log_pipeline_info(pipeline_name, "submitting pipeline")
        with tempfile.TemporaryDirectory() as temp_dir:
            pipeline_path = os.path.join(temp_dir, f'{pipeline_name}.tar.gz')

            self.log.debug("Creating temp directory %s", temp_dir)

            # Compile the new pipeline
            try:
                pipeline_function = lambda: self._cc_pipeline(pipeline,  # nopep8 E731
                                                              pipeline_name=pipeline_name,
                                                              pipeline_version=pipeline_version_name,
                                                              experiment_name=experiment_name,
                                                              cos_directory=cos_directory)
                if 'Tekton' == engine:
                    kfp_tekton_compiler.TektonCompiler().compile(pipeline_function, pipeline_path)
                else:
                    kfp_argo_compiler.Compiler().compile(pipeline_function, pipeline_path)
            except Exception as ex:
                if ex.__cause__:
                    raise RuntimeError(str(ex)) from ex
                raise RuntimeError('Error pre-processing pipeline {} for engine {} at {}'.
                                   format(pipeline_name, engine, pipeline_path), str(ex)) from ex

            self.log.debug("Kubeflow Pipeline was created in %s", pipeline_path)

            # Upload the compiled pipeline, create an experiment and run

            try:
                description = f"Created with Elyra {__version__} pipeline editor using '{pipeline.source}'."
                t0 = time.time()

                if pipeline_id is None:
                    # Upload new pipeline. The call returns
                    # a unique pipeline id.
                    kfp_pipeline = \
                        client.upload_pipeline(pipeline_path,
                                               pipeline_name,
                                               description)
                    pipeline_id = kfp_pipeline.id
                    version_id = None
                else:
                    # Upload a pipeline version. The call returns
                    # a unique version id.
                    kfp_pipeline = \
                        client.upload_pipeline_version(pipeline_path,
                                                       pipeline_version_name,
                                                       pipeline_id=pipeline_id)
                    version_id = kfp_pipeline.id

                self.log_pipeline_info(pipeline_name, 'pipeline uploaded', duration=(time.time() - t0))
            except MaxRetryError as ex:
                raise RuntimeError('Error connecting to pipeline server {}'.format(api_endpoint)) from ex

            except LocationValueError as lve:
                if api_username:
                    raise ValueError("Failure occurred uploading pipeline, check your credentials") from lve
                else:
                    raise lve

            # Create a new experiment. If it already exists this is
            # a no-op.
            experiment = client.create_experiment(name=experiment_name,
                                                  namespace=user_namespace)
            self.log_pipeline_info(pipeline_name,
                                   f'Created experiment {experiment_name}',
                                   duration=(time.time() - t0_all))

            # Run the pipeline (or specified pipeline version)
            run = client.run_pipeline(experiment_id=experiment.id,
                                      job_name=job_name,
                                      pipeline_id=pipeline_id,
                                      version_id=version_id)

            self.log_pipeline_info(pipeline_name,
                                   f"pipeline submitted: {api_endpoint}/#/runs/details/{run.id}",
                                   duration=(time.time() - t0_all))

            return KfpPipelineProcessorResponse(
                run_url=f'{api_endpoint}/#/runs/details/{run.id}',
                object_storage_url=f'{cos_endpoint}',
                object_storage_path=f'/{cos_bucket}/{cos_directory}',
            )

        return None
示例#27
0
    training_output = '%s/{{workflow.name}}/train' % output
    analysis_output = '%s/{{workflow.name}}/analysis' % output
    prediction_output = '%s/{{workflow.name}}/predict' % output
    tf_server_name = 'taxi-cab-classification-model-{{workflow.name}}'

    validation = dataflow_tf_data_validation_op(train, evaluation,
                                                column_names, key_columns,
                                                project, validation_mode,
                                                validation_output)
    schema = '%s/schema.json' % validation.outputs['output']

    preprocess = dataflow_tf_transform_op(train, evaluation, schema, project,
                                          preprocess_mode, preprocess_module,
                                          transform_output)
    training = tf_train_op(preprocess.output, schema, learning_rate,
                           hidden_layer_size, steps, target, preprocess_module,
                           training_output)
    analysis = dataflow_tf_model_analyze_op(training.output, evaluation,
                                            schema, project, analyze_mode,
                                            analyze_slice_column,
                                            analysis_output)
    prediction = dataflow_tf_predict_op(evaluation, schema, target,
                                        training.output, predict_mode, project,
                                        prediction_output)
    deploy = kubeflow_deploy_op(training.output, tf_server_name)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(taxi_cab_classification, __file__ + '.tar.gz')
示例#28
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import kfp.dsl as dsl


class RandomFailure1Op(dsl.ContainerOp):
    """A component that fails randomly."""
    def __init__(self, exit_codes):
        super(RandomFailure1Op, self).__init__(
            name='random_failure',
            image='python:alpine3.6',
            command=['python', '-c'],
            arguments=[
                "import random; import sys; exit_code = random.choice([%s]); print(exit_code); sys.exit(exit_code)"
                % exit_codes
            ])


@dsl.pipeline(name='pipeline includes two steps which fail randomly.',
              description='shows how to use ContainerOp set_retry().')
def retry_sample_pipeline():
    op1 = RandomFailure1Op('0,1,2,3').set_retry(10)
    op2 = RandomFailure1Op('0,1').set_retry(5)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(retry_sample_pipeline, __file__ + '.tar.gz')
示例#29
0
                                       "--out-path",
                                       lr_prediction_path,
                                       "--c-param",
                                       lr_c_param,
                                       "--action",
                                       "train",
                                       "--model-path",
                                       lr_model_path,
                                   ],
                                   pvolumes={"/mnt": vectorize_step.pvolume})

    try:
        seldon_config = yaml.load(
            open("../deploy_pipeline/seldon_production_pipeline.yaml"))
    except:
        # If this file is run from the project core directory
        seldon_config = yaml.load(
            open("deploy_pipeline/seldon_production_pipeline.yaml"))

    deploy_step = dsl.ResourceOp(
        name="seldondeploy",
        k8s_resource=seldon_config,
        attribute_outputs={"name": "{.metadata.name}"})

    deploy_step.after(predict_step)


if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(nlp_pipeline, __file__ + '.tar.gz')
        volume=vop.volume
    )
    
    compo2 = multiply(
        input_file='/data/input_compo2.txt',
        multiplier=7,
        output_uri='/data/output_compo2.txt',
        output_uri_in_file='/data/output_compo2_uri.txt',
        volume=vop.volume
    )
    
#     compo3 = concatenate(
#         input_file1='/data/input_compo1.txt',
#         input_file2='/data/input_compo2.txt',
#         output_uri='/data/output_compo3.txt',
#         output_uri_in_file='/data/output_compo3_uri.txt',
#         volume=vop.volume
#     )
    
    compo3 = concatenate(
        input_file1=compo1.outputs['output_uri_in_file'],
        input_fi1e2=compo2.outputs['output_uri_in_file'],
        output_uri='/data/output_compo3.txt',
        output_uri_in_file='/data/output_compo3_uri.txt',
        volume=vop.volume
    )
    
if __name__ == '__main__':
    import kfp.compiler as compiler
    compiler.Compiler().compile(my_pipeline, 'small_pipeline.tar.gz')