def my_pipeline(msg1, json, kind, msg2='value2'): op = dsl.ContainerOp(name='echo', image='image', command=['sh', '-c'], arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)], file_outputs={'merged': '/tmp/message.txt'}) \ .add_volume_mount(k8s_client.V1VolumeMount( mount_path='/secret/gcp-credentials', name='gcp-credentials')) \ .add_env_variable(k8s_client.V1EnvVar( name='GOOGLE_APPLICATION_CREDENTIALS', value='/secret/gcp-credentials/user-gcp-sa.json')) res = dsl.ResourceOp( name="test-resource", k8s_resource=k8s_client.V1PersistentVolumeClaim( api_version="v1", kind=kind, metadata=k8s_client.V1ObjectMeta(name="resource")), attribute_outputs={"out": json}) golden_output = { 'container': { 'image': 'image', 'args': [ 'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt' ], 'command': ['sh', '-c'], 'env': [{ 'name': 'GOOGLE_APPLICATION_CREDENTIALS', 'value': '/secret/gcp-credentials/user-gcp-sa.json' }], 'volumeMounts': [{ 'mountPath': '/secret/gcp-credentials', 'name': 'gcp-credentials', }] }, 'inputs': { 'parameters': [ { 'name': 'msg1' }, { 'name': 'msg2' }, ] }, 'name': 'echo', 'outputs': { 'artifacts': [ { 'name': 'echo-merged', 'path': '/tmp/message.txt', }, ], 'parameters': [{ 'name': 'echo-merged', 'valueFrom': { 'path': '/tmp/message.txt' } }], } } res_output = { 'inputs': { 'parameters': [{ 'name': 'json' }, { 'name': 'kind' }] }, 'name': 'test-resource', 'outputs': { 'parameters': [{ 'name': 'test-resource-manifest', 'valueFrom': { 'jsonPath': '{}' } }, { 'name': 'test-resource-name', 'valueFrom': { 'jsonPath': '{.metadata.name}' } }, { 'name': 'test-resource-out', 'valueFrom': { 'jsonPath': '{{inputs.parameters.json}}' } }] }, 'resource': { 'action': 'create', 'manifest': ("apiVersion: v1\n" "kind: '{{inputs.parameters.kind}}'\n" "metadata:\n" " name: resource\n") } } self.maxDiff = None self.assertEqual(golden_output, compiler._op_to_template._op_to_template(op)) self.assertEqual(res_output, compiler._op_to_template._op_to_template(res))
def nlp_pipeline( csv_url="https://raw.githubusercontent.com/axsauze/reddit-classification-exploration/master/data/reddit_train.csv", csv_encoding="ISO-8859-1", features_column="BODY", labels_column="REMOVED", raw_text_path='/mnt/text.data', labels_path='/mnt/labels.data', clean_text_path='/mnt/clean.data', spacy_tokens_path='/mnt/tokens.data', tfidf_vectors_path='/mnt/tfidf.data', lr_prediction_path='/mnt/prediction.data', tfidf_model_path='/mnt/tfidf.model', lr_model_path='/mnt/lr.model', lr_c_param=0.1, tfidf_max_features=10000, tfidf_ngram_range=3, batch_size='100', github_branch='master'): """ Pipeline """ pvc_metadata = V1ObjectMeta(name="{{workflow.name}}-my-pvc", labels={ "branch": "{{workflow.parameters.github-branch}}", "app": "nlp" }) requested_resources = V1ResourceRequirements(requests={"storage": "1Gi"}) pvc_spec = V1PersistentVolumeClaimSpec(access_modes=["ReadWriteOnce"], resources=requested_resources) pvc = V1PersistentVolumeClaim(api_version="v1", kind="PersistentVolumeClaim", metadata=pvc_metadata, spec=pvc_spec) vop = dsl.VolumeOp(name="create-pvc", k8s_resource=pvc, modes=None) download_step = dsl.ContainerOp( name='data_downloader', image='maximmold/data_downloader:0.1', command="python", arguments=[ "/microservice/pipeline_step.py", "--labels-path", labels_path, "--features-path", raw_text_path, "--csv-url", csv_url, "--csv-encoding", csv_encoding, "--features-column", features_column, "--labels-column", labels_column ], pvolumes={"/mnt": vop.volume}) clean_step = dsl.ContainerOp(name='clean_text', image='maximmold/clean_text_transformer:0.1', command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", raw_text_path, "--out-path", clean_text_path, ], pvolumes={"/mnt": download_step.pvolume}) tokenize_step = dsl.ContainerOp(name='tokenize', image='maximmold/spacy_tokenizer:0.1', command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", clean_text_path, "--out-path", spacy_tokens_path, ], pvolumes={"/mnt": clean_step.pvolume}) vectorize_step = dsl.ContainerOp(name='vectorize', image='maximmold/tfidf_vectorizer:0.1', command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", spacy_tokens_path, "--out-path", tfidf_vectors_path, "--max-features", tfidf_max_features, "--ngram-range", tfidf_ngram_range, "--action", "train", "--model-path", tfidf_model_path, ], pvolumes={"/mnt": tokenize_step.pvolume}) predict_step = dsl.ContainerOp(name='predictor', image='maximmold/lr_text_classifier:0.1', command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", tfidf_vectors_path, "--labels-path", labels_path, "--out-path", lr_prediction_path, "--c-param", lr_c_param, "--action", "train", "--model-path", lr_model_path, ], pvolumes={"/mnt": vectorize_step.pvolume}) try: seldon_config = yaml.load( open("../deploy_pipeline/seldon_production_pipeline.yaml")) except: # If this file is run from the project core directory seldon_config = yaml.load( open("deploy_pipeline/seldon_production_pipeline.yaml")) deploy_step = dsl.ResourceOp( action="apply", name="seldondeploy", k8s_resource=seldon_config, attribute_outputs={"name": "{.metadata.name}"}) deploy_step.after(predict_step) delete_previous_pvc = dsl.ContainerOp( name="deletepreviouspvc", image="bitnami/kubectl", command="kubectl", arguments=[ "delete", "pvc", "-l", "app=nlp,branch={{workflow.parameters.github-branch}}", "--field-selector", "metadata.name!={{workflow.name}}-my-pvc", "--grace-period=0", "--force", "--wait=false" ]) delete_previous_pvc.after(deploy_step) patch_pvc_finalizer = dsl.ContainerOp( name="patchpvcfinalizer", image="bitnami/kubectl", command=["bash"], arguments=[ "-c", 'for j in $(kubectl get pvc -o name -l app=nlp,branch={{workflow.parameters.github-branch}} --field-selector metadata.name!={{workflow.name}}-my-pvc -n kubeflow); do kubectl patch $j -p ' "'" '{"metadata":{"finalizers": []}}' "'" ' -n kubeflow --type=merge; done' ]) patch_pvc_finalizer.after(delete_previous_pvc)
def nlp_pipeline( namespace="kubeflow", seldon_server="SKLEARN_SERVER", model_path="gs://seldon-models/v1.13.0-dev/sklearn/iris", gateway_endpoint="istio-ingressgateway.istio-system.svc.cluster.local", retries=3, replicas=10, workers=100, input_path="data/input-data.txt", output_path="data/output-data.txt", ): """ Pipeline """ vop = dsl.VolumeOp( name="seldon-batch-pvc", resource_name="seldon-batch-pvc", modes=dsl.VOLUME_MODE_RWO, size="2Mi", ) seldon_deployment_yaml = f""" apiVersion: machinelearning.seldon.io/v1 kind: SeldonDeployment metadata: name: "{{{{workflow.name}}}}" namespace: "{namespace}" spec: name: "{{{{workflow.name}}}}" predictors: - graph: children: [] implementation: "{seldon_server}" modelUri: "{model_path}" name: classifier name: default """ deploy_step = dsl.ResourceOp( name="deploy_seldon", action="create", k8s_resource=yaml.safe_load(seldon_deployment_yaml), ) scale_and_wait = dsl.ContainerOp( name="scale_and_wait_seldon", image="bitnami/kubectl:1.17", command="bash", arguments=[ "-c", f"sleep 10 && kubectl scale --namespace {namespace} --replicas={replicas} sdep/{{{{workflow.name}}}} && sleep 2 && kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id={{{{workflow.name}}}} -o jsonpath='{{.items[0].metadata.name'}})", ], ) download_from_object_store = dsl.ContainerOp( name="download-from-object-store", image="minio/mc:RELEASE.2020-04-17T08-55-48Z", command="sh", arguments=[ "-c", f"mc config host add minio-local http://minio.default.svc.cluster.local:9000 minioadmin minioadmin && mc cp minio-local/{input_path} /assets/input-data.txt", ], pvolumes={"/assets": vop.volume}, ) batch_process_step = dsl.ContainerOp( name="data_downloader", image="seldonio/seldon-core-s2i-python37:1.1.1-rc", command="seldon-batch-processor", arguments=[ "--deployment-name", "{{workflow.name}}", "--namespace", namespace, "--host", gateway_endpoint, "--retries", retries, "--input-data-path", "/assets/input-data.txt", "--output-data-path", "/assets/output-data.txt", "--benchmark", ], pvolumes={"/assets": vop.volume}, ) upload_to_object_store = dsl.ContainerOp( name="upload-to-object-store", image="minio/mc:RELEASE.2020-04-17T08-55-48Z", command="sh", arguments=[ "-c", f"mc config host add minio-local http://minio.default.svc.cluster.local:9000 minioadmin minioadmin && mc cp /assets/output-data.txt minio-local/{output_path}", ], pvolumes={"/assets": vop.volume}, ) delete_step = dsl.ResourceOp( name="delete_seldon", action="delete", k8s_resource=yaml.safe_load(seldon_deployment_yaml), ) scale_and_wait.after(deploy_step) download_from_object_store.after(scale_and_wait) batch_process_step.after(download_from_object_store) upload_to_object_store.after(batch_process_step) delete_step.after(upload_to_object_store)
def mabdeploy_seldon(docker_image1='seldonio/mock_classifier:1.0', docker_image2='seldonio/mock_classifier:1.0', mab_router_image='seldonio/mab_epsilon_greedy:1.1'): #serve two models load balanced as bandit as per https://github.com/SeldonIO/seldon-core/blob/master/notebooks/helm_examples.ipynb #in this example no volumes or buckets required as the models are baked into images #seldon can also be used with volumes - see seldon tf mnist example mabjson_template = Template(""" { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier-mab" }, "spec": { "name": "mnist-classifier-mab", "predictors": [ { "name": "abtest", "replicas": 1, "componentSpecs": [{ "spec": { "containers": [ { "image": "$image1", "imagePullPolicy": "IfNotPresent", "name": "classifier-1", "resources": { "requests": { "memory": "1Mi" } } }], "terminationGracePeriodSeconds": 20 }}, { "metadata":{ "labels":{ "version":"v2" } }, "spec":{ "containers":[ { "image": "$image2", "imagePullPolicy": "IfNotPresent", "name": "classifier-2", "resources": { "requests": { "memory": "1Mi" } } } ], "terminationGracePeriodSeconds": 20 } }, { "spec":{ "containers": [{ "image": "$router", "name": "eg-router" }], "terminationGracePeriodSeconds": 20 }} ], "graph": { "name": "eg-router", "type":"ROUTER", "parameters": [ { "name": "n_branches", "value": "2", "type": "INT" }, { "name": "epsilon", "value": "0.2", "type": "FLOAT" }, { "name": "verbose", "value": "1", "type": "BOOL" } ], "children": [ { "name": "classifier-1", "endpoint":{ "type":"REST" }, "type":"MODEL", "children":[] }, { "name": "classifier-2", "endpoint":{ "type":"REST" }, "type":"MODEL", "children":[] } ] } } ] } } """) mabjson = mabjson_template.substitute({ 'image1': str(docker_image1), 'image2': str(docker_image2), 'router': str(mab_router_image) }) mabdeployment = json.loads(mabjson) deploy = dsl.ResourceOp(name="deploy", k8s_resource=mabdeployment, action="apply", success_condition='status.state == Available')
def patch_notebook_volume_mounts_op(tasks: List[models.Task], namespace: str): """ Creates a kfp.ResourceOp that patches notebook server. Sets volumes and a volumeMounts for all tasks. Parameters ---------- tasks : List[models.Task] namespace : str Returns ------- kfp.dsl.ResourceOp """ k8s_resource = { "apiVersion": "kubeflow.org/v1", "kind": "Notebook", "metadata": { "name": "server", "namespace": namespace, "labels": {"app": "server"}, }, "spec": { "template": { "spec": { "containers": [ { "name": "server", "image": "platiagro/platiagro-notebook-image:0.3.0", "env": [ {"name": "EXPERIMENT_ID", "value": "notebook"}, {"name": "OPERATOR_ID", "value": "notebook"}, { "name": "MINIO_ENDPOINT", "value": "minio.platiagro:9000", }, { "name": "MINIO_ACCESS_KEY", "valueFrom": { "secretKeyRef": { "key": "MINIO_ACCESS_KEY", "name": "minio-secrets", } }, }, { "name": "MINIO_SECRET_KEY", "valueFrom": { "secretKeyRef": { "key": "MINIO_SECRET_KEY", "name": "minio-secrets", } }, }, ], "volumeMounts": [ { "mountPath": "/home/jovyan/tasks", "name": "vol-tasks", }, { "mountPath": "/home/jovyan/experiments", "name": "vol-experiments", }, {"mountPath": "/tmp/data", "name": "vol-datasets"}, ], } ], "serviceAccountName": "default-editor", "ttlSecondsAfterFinished": 300, "volumes": [ { "name": "vol-tasks", "persistentVolumeClaim": {"claimName": "vol-tasks"}, }, { "name": "vol-experiments", "persistentVolumeClaim": {"claimName": "vol-experiments"}, }, { "name": "vol-datasets", "persistentVolumeClaim": {"claimName": "vol-datasets"}, }, ], } } }, } for task in tasks: name = f"vol-task-{task.uuid}" mount_path = f"{TASK_VOLUME_MOUNT_PATH}/{task.name}" k8s_resource["spec"]["template"]["spec"]["volumes"].append( { "name": name, "persistentVolumeClaim": { "claimName": name, }, } ) k8s_resource["spec"]["template"]["spec"]["containers"][0][ "volumeMounts" ].append( { "mountPath": mount_path, "name": name, } ) return dsl.ResourceOp( name="patch-notebook", k8s_resource=k8s_resource, action="apply", attribute_outputs={ "name": "{.metadata.name}", "created_at": datetime.utcnow().isoformat(), }, # makes this ResourceOp to have a unique cache key )
def mnist_tf_volume( docker_repo_training='seldonio/deepmnistclassifier_trainer', docker_tag_training='0.3', docker_repo_serving='seldonio/deepmnistclassifier_runtime', docker_tag_serving='0.3'): #use volume for storing model #here model is saved and mounted into pre-defined image for serving #alternatively model can be baked into image - for that see mabdeploy-seldon.py #requires seldon v0.3.0 or higher modelvolop = dsl.VolumeOp(name="modelpvc", resource_name="modelpvc", size="50Mi", modes=dsl.VOLUME_MODE_RWO) tfjobjson_template = Template(""" { "apiVersion": "kubeflow.org/v1beta1", "kind": "TFJob", "metadata": { "name": "mnist-train-{{workflow.uid}}", "ownerReferences": [ { "apiVersion": "argoproj.io/v1alpha1", "kind": "Workflow", "controller": true, "name": "{{workflow.name}}", "uid": "{{workflow.uid}}" } ] }, "spec": { "tfReplicaSpecs": { "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "image": "$dockerrepotraining:$dockertagtraining", "name": "tensorflow", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "restartPolicy": "OnFailure", "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ] } }, "tfReplicaType": "MASTER" } } } } """) tfjobjson = tfjobjson_template.substitute({ 'dockerrepotraining': str(docker_repo_training), 'dockertagtraining': str(docker_tag_training), 'modelpvc': modelvolop.outputs["name"] }) tfjob = json.loads(tfjobjson) train = dsl.ResourceOp( name="train", k8s_resource=tfjob, success_condition='status.replicaStatuses.Worker.succeeded == 1') seldon_serving_json_template = Template(""" { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [ { "spec": { "containers": [ { "image": "$dockerreposerving:$dockertagserving", "imagePullPolicy": "Always", "name": "mnist-classifier", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ] } } ], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "mnist-classifier", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } """) seldon_serving_json = seldon_serving_json_template.substitute({ 'dockerreposerving': str(docker_repo_serving), 'dockertagserving': str(docker_tag_serving), 'modelpvc': modelvolop.outputs["name"] }) seldon_deployment = json.loads(seldon_serving_json) serve = dsl.ResourceOp( name='serve', k8s_resource=seldon_deployment, success_condition='status.state == Available').after(train)
def mnist_train_pipeline(docker_org="index.docker.io/seldonio", train_container_version="0.2", serve_container_version="0.1"): vop = dsl.VolumeOp(name="create_pvc", resource_name="nfs-1", modes=dsl.VOLUME_MODE_RWO, size="10G") volume = vop.volume train = dsl.ContainerOp( name='sk-train', image= f"{docker_org}/skmnistclassifier_trainer:{train_container_version}", pvolumes={"/data": volume}) seldon_serving_json_template = Template(""" { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [ { "spec": { "containers": [ { "image": "$dockerreposerving:$dockertagserving", "imagePullPolicy": "Always", "name": "mnist-classifier", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ] } } ], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "mnist-classifier", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } """) seldon_serving_json = seldon_serving_json_template.substitute({ 'dockerreposerving': f"{docker_org}/skmnistclassifier_runtime", 'dockertagserving': str(serve_container_version), 'modelpvc': vop.outputs["name"] }) seldon_deployment = json.loads(seldon_serving_json) serve = dsl.ResourceOp( name='serve', k8s_resource=seldon_deployment, success_condition='status.state == Available').after(train)
def resource_pipeline(): op = dsl.ResourceOp(name='resource-job', k8s_resource=json.loads(_job_manifest), action='create')
def trusted_ai( namespace: str = "anonymous", fgsm_attack_epsilon: str = '0.2', model_class_file: str = 'PyTorchModel.py', model_class_name: str = 'ThreeLayerCNN', feature_testset_path: str = 'processed_data/X_test.npy', label_testset_path: str = 'processed_data/y_test.npy', protected_label_testset_path: str = 'processed_data/p_test.npy', favorable_label: str = '0.0', unfavorable_label: str = '1.0', privileged_groups: str = "[{'race': 0.0}]", unprivileged_groups: str = "[{'race': 4.0}]", loss_fn: str = 'torch.nn.CrossEntropyLoss()', optimizer: str = 'torch.optim.Adam(model.parameters(), lr=0.001)', clip_values: str = '(0, 1)', nb_classes: str = '2', input_shape: str = '(1,3,64,64)'): job_manifest = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": "trusted-ai-train-job", "namespace": namespace }, "spec": { "ttlSecondsAfterFinished": 100, "template": { "metadata": { "annotations": { "sidecar.istio.io/inject": "false" } }, "spec": { "restartPolicy": "Never", "containers": [{ "name": "classification-training", "image": "aipipeline/gender-classification:latest", "command": [ "python", "-u", "gender_classification_training.py", "--data_bucket", "mlpipeline", "--result_bucket", "mlpipeline" ], "env": [{ 'name': 'S3_ENDPOINT', 'value': 'minio-service.kubeflow:9000' }] }], } } } } train_step = dsl.ResourceOp(name="trust-ai-train-step", k8s_resource=job_manifest, action='create', success_condition='status.succeeded > 0', failure_condition='status.failed > 0') fairness_check = fairness_check_ops( model_id='training-example', model_class_file=model_class_file, model_class_name=model_class_name, feature_testset_path=feature_testset_path, label_testset_path=label_testset_path, protected_label_testset_path=protected_label_testset_path, favorable_label=favorable_label, unfavorable_label=unfavorable_label, privileged_groups=privileged_groups, unprivileged_groups=unprivileged_groups, data_bucket_name='mlpipeline', result_bucket_name='mlpipeline').after( train_step).set_image_pull_policy("Always") robustness_check = robustness_check_ops( model_id='training-example', epsilon=fgsm_attack_epsilon, model_class_file=model_class_file, model_class_name=model_class_name, feature_testset_path=feature_testset_path, label_testset_path=label_testset_path, loss_fn=loss_fn, optimizer=optimizer, clip_values=clip_values, nb_classes=nb_classes, input_shape=input_shape, data_bucket_name='mlpipeline', result_bucket_name='mlpipeline').after( train_step).set_image_pull_policy("Always")
def undeploy(): dsl.ResourceOp(name="undeploy", k8s_resource=resource, action="delete")
def mnist_tf(docker_secret='docker-config', training_repo='https://github.com/kubeflow/example-seldon.git', training_branch='master', training_files='./example-seldon/models/tf_mnist/train/*', docker_repo_training='seldonio/deepmnistclassifier_trainer', docker_tag_training='0.3', serving_repo='https://github.com/kubeflow/example-seldon.git', serving_branch='master', serving_files='./example-seldon/models/tf_mnist/runtime/*', docker_repo_serving='seldonio/deepmnistclassifier_runtime', docker_tag_serving='0.3'): #will be pushing image so need docker secret #create from local with `kubectl create secret generic docker-config --from-file=config.json=${DOCKERHOME}/config.json --type=kubernetes.io/config` secret = k8s_client.V1Volume( name="docker-config-secret", secret=k8s_client.V1SecretVolumeSource(secret_name=docker_secret) ) #use volume for storing model modelvolop = dsl.VolumeOp( name="modelpvc", resource_name="modelpvc", size="50Mi", modes=dsl.VOLUME_MODE_RWO ) #and another as working directory between steps wkdirop = dsl.VolumeOp( name="wkdirpvc", resource_name="wkdirpvc", size="50Mi", modes=dsl.VOLUME_MODE_RWO ) #clone the training code and move to workspace dir as kaniko (next step) expects that clone = dsl.ContainerOp( name="clone", image="alpine/git:latest", command=["sh", "-c"], arguments=["git clone --depth 1 --branch "+str(training_branch)+" "+str(training_repo)+"; cp "+str(training_files)+" /workspace; ls /workspace/;"], pvolumes={"/workspace": wkdirop.volume} ) #build and push image for training build = dsl.ContainerOp( name="build", image="gcr.io/kaniko-project/executor:latest", arguments=["--dockerfile","Dockerfile","--destination",str(docker_repo_training)+":"+str(docker_tag_training)], pvolumes={"/workspace": clone.pvolume,"/root/.docker/": secret} ) tfjobjson_template = Template(""" { "apiVersion": "kubeflow.org/v1beta1", "kind": "TFJob", "metadata": { "name": "mnist-train-{{workflow.uid}}", "ownerReferences": [ { "apiVersion": "argoproj.io/v1alpha1", "kind": "Workflow", "controller": true, "name": "{{workflow.name}}", "uid": "{{workflow.uid}}" } ] }, "spec": { "tfReplicaSpecs": { "Worker": { "replicas": 1, "template": { "spec": { "containers": [ { "image": "$dockerrepotraining:$dockertagtraining", "name": "tensorflow", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "restartPolicy": "OnFailure", "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ] } } } } } } """) tfjobjson = tfjobjson_template.substitute({ 'dockerrepotraining': str(docker_repo_training),'dockertagtraining': str(docker_tag_training),'modelpvc': modelvolop.outputs["name"]}) tfjob = json.loads(tfjobjson) train = dsl.ResourceOp( name="train", k8s_resource=tfjob, success_condition='status.replicaStatuses.Worker.succeeded == 1' ).after(build) #prepare the serving code clone_serving = dsl.ContainerOp( name="clone_serving", image="alpine/git:latest", command=["sh", "-c"], arguments=["rm -rf /workspace/*; git clone --depth 1 --branch "+str(serving_branch)+" "+str(serving_repo)+"; cp "+str(serving_files)+" /workspace; ls /workspace/;"], pvolumes={"/workspace": wkdirop.volume} ).after(train) build_serving = dsl.ContainerOp( name="build_serving", image="gcr.io/kaniko-project/executor:latest", arguments=["--dockerfile","Dockerfile","--destination",str(docker_repo_serving)+":"+str(docker_tag_serving)], pvolumes={"/workspace": clone_serving.pvolume,"/root/.docker/": secret} ).after(clone_serving) seldon_serving_json_template = Template(""" { "apiVersion": "machinelearning.seldon.io/v1alpha2", "kind": "SeldonDeployment", "metadata": { "labels": { "app": "seldon" }, "name": "mnist-classifier" }, "spec": { "annotations": { "deployment_version": "v1", "project_name": "MNIST Example" }, "name": "mnist-classifier", "predictors": [ { "annotations": { "predictor_version": "v1" }, "componentSpecs": [ { "spec": { "containers": [ { "image": "$dockerreposerving:$dockertagserving", "imagePullPolicy": "Always", "name": "mnist-classifier", "volumeMounts": [ { "mountPath": "/data", "name": "persistent-storage" } ] } ], "terminationGracePeriodSeconds": 1, "volumes": [ { "name": "persistent-storage", "persistentVolumeClaim": { "claimName": "$modelpvc" } } ] } } ], "graph": { "children": [], "endpoint": { "type": "REST" }, "name": "mnist-classifier", "type": "MODEL" }, "name": "mnist-classifier", "replicas": 1 } ] } } """) seldon_serving_json = seldon_serving_json_template.substitute({ 'dockerreposerving': str(docker_repo_serving),'dockertagserving': str(docker_tag_serving),'modelpvc': modelvolop.outputs["name"]}) seldon_deployment = json.loads(seldon_serving_json) serve = dsl.ResourceOp( name='serve', k8s_resource=seldon_deployment, success_condition='status.state == Available' ).after(build_serving)
def create_resource_op(operators, project_id, experiment_id, deployment_id, deployment_name): """ Create kfp.dsl.ResourceOp container from an operator list. Parameters ---------- operators : list project_id : str experiment_id : str deployment_id : str deployment_name : str Returns ------- kfp.dsl.ResourceOp """ component_specs = [] for operator in operators: memory_limit = operator.task.memory_limit if memory_limit is None: memory_limit = TASK_DEFAULT_MEMORY_LIMIT memory_request = operator.task.memory_request if memory_request is None: memory_request = TASK_DEFAULT_MEMORY_REQUEST component_specs.append( COMPONENT_SPEC.substitute({ "image": TASK_DEFAULT_DEPLOYMENT_IMAGE, "operatorId": operator.uuid, "experimentId": experiment_id, "deploymentId": deployment_id, "taskId": operator.task.uuid, "memoryLimit": memory_limit, "memoryRequest": memory_request, "taskName": operator.task.name, "nvidiaVisibleDevices": TASK_NVIDIA_VISIBLE_DEVICES, })) first = None graph = defaultdict(list) for operator in operators: if len(operator.dependencies) == 0: first = operator.uuid for dependency_id in operator.dependencies: graph[dependency_id].append({operator.uuid: graph[operator.uuid]}) if first is None: raise ValueError("deployment can't have cycles") def build_graph(operator_id, children): if len(children) > 1: raise ValueError("deployment can't have multiple dependencies") elif len(children) == 1: child_operator_id, children = next(iter(children[0].items())) children = build_graph(child_operator_id, children) else: children = "" return GRAPH.substitute({ "name": operator_id, "children": children, }) graph = build_graph(operator_id=first, children=graph[first]) graph = loads(graph) graph["logger"] = { "mode": "response", "url": f"{SELDON_LOGGER_ENDPOINT}/projects/{project_id}/deployments/{deployment_id}/responses", } seldon_deployment = SELDON_DEPLOYMENT.substitute({ "namespace": KF_PIPELINES_NAMESPACE, "deploymentId": deployment_id, "componentSpecs": ",".join(component_specs), "graph": dumps(graph), "projectId": project_id, "restTimeout": SELDON_REST_TIMEOUT, }) sdep_resource = loads(seldon_deployment) # mounts the "/tmp/data" volume from experiment (if exists) sdep_resource = mount_volume_from_experiment(sdep_resource, experiment_id) resource_op = dsl.ResourceOp( name="deployment", k8s_resource=sdep_resource, success_condition="status.state == Available", ).set_timeout(int(DEPLOYMENT_INIT_TIMEOUT)) return resource_op
def fashion_mnist_pipeline(name="fashion-mnist-{{workflow.uid}}", katib_namespace="<SET_USER_NAMSPACE>", goal=0.9, max_trial_count=12, parallel_trial_count=3, training_steps=5, training_image="<SET_TRAINING_IMAGE_NAME>", training_namespace="kubeflow", export_dir="gs://<SET_BUCKET_NAME>/export/002"): ### Step 1: Hyper-parameter tuning with Katib objectiveConfig = { "type": "maximize", "goal": goal, "objectiveMetricName": "val_accuracy", "additionalMetricNames": ["loss", "accuracy"] } algorithmConfig = {"algorithmName": "random"} metricsCollectorSpec = {"collector": {"kind": "StdOut"}} parameters = [ { "name": "--tf-learning-rate", "parameterType": "double", "feasibleSpace": { "min": "0.001", "max": "0.05" } }, ] rawTemplate = { "apiVersion": "batch/v1", "kind": "Job", "metadata": { "name": "{{.Trial}}", "namespace": "{{.NameSpace}}" }, "spec": { "template": { "spec": { "restartPolicy": "Never", "containers": [{ "name": "{{.Trial}}", "image": str(training_image), "imagePullPolicy": "Always", "command": [ "python /opt/model.py --tf-mode=local {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}" ] }] } } } } trialTemplate = {"goTemplate": {"rawTemplate": json.dumps(rawTemplate)}} katib_experiment_launcher_op = components.load_component_from_url( 'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/katib-launcher/component.yaml' ) katib_op = katib_experiment_launcher_op( experiment_name=name, experiment_namespace=katib_namespace, parallel_trial_count=parallel_trial_count, max_trial_count=max_trial_count, objective=str(objectiveConfig), algorithm=str(algorithmConfig), trial_template=str(trialTemplate), parameters=str(parameters), metrics_collector=str(metricsCollectorSpec), delete_finished_experiment=False) ### Step 2 : convert the optimized result to extract optimal hyperparameters convert_op = components.func_to_container_op(convert_result) op2 = convert_op(katib_op.output) ## Step 3 : training training_template = Template(""" { "apiVersion": "kubeflow.org/v1", "kind": "TFJob", "metadata": { "generateName": "tfjob", "name": "$name", "namespace": "$namespace" }, "spec": { "tfReplicaSpecs": { "Chief": { "replicas": 1, "restartPolicy": "OnFailure", "template": { "spec": { "containers": [ { "name": "tensorflow", "image": "$image", "command": [ "python", "/opt/model.py", "--tf-export-dir=$export", "--tf-mode=gcs", "--tf-train-steps=$training_steps", "$args" ], "env": [ { "name": "GOOGLE_APPLICATION_CREDENTIALS", "value": "/var/secrets/user-gcp-sa.json" } ], "volumeMounts": [ { "name": "sa", "mountPath": "/var/secrets", "readOnly": true } ] } ], "volumes": [ { "name": "sa", "secret": { "secretName": "user-gcp-sa" } } ] } } }, "Worker": { "replicas": 2, "restartPolicy": "OnFailure", "template": { "spec": { "containers": [ { "name": "tensorflow", "image": "$image", "command": [ "python", "/opt/model.py", "--tf-export-dir=$export", "--tf-mode=gcs", "--tf-train-steps=$training_steps", "$args" ], "env": [ { "name": "GOOGLE_APPLICATION_CREDENTIALS", "value": "/var/secrets/user-gcp-sa.json" } ], "volumeMounts": [ { "name": "sa", "mountPath": "/var/secrets", "readOnly": true } ] } ], "volumes": [ { "name": "sa", "secret": { "secretName": "user-gcp-sa" } } ] } } } } } } """) trainingjson = training_template.substitute({ 'name': str(name), 'namespace': str(training_namespace), 'image': str(training_image), 'export': str(export_dir), 'training_steps': training_steps, 'args': op2.output }) trainingdeployment = json.loads(trainingjson) train = dsl.ResourceOp( name="train", k8s_resource=trainingdeployment, action="apply", success_condition= "status.replicaStatuses.Worker.succeeded==2,status.replicaStatuses.Chief.succeeded==1" )
def nlp_pipeline( csv_url="https://raw.githubusercontent.com/axsauze/reddit-classification-exploration/master/data/reddit_train.csv", csv_encoding="ISO-8859-1", features_column="BODY", labels_column="REMOVED", raw_text_path="/mnt/text.data", labels_path="/mnt/labels.data", clean_text_path="/mnt/clean.data", spacy_tokens_path="/mnt/tokens.data", tfidf_vectors_path="/mnt/tfidf.data", lr_prediction_path="/mnt/prediction.data", tfidf_model_path="/mnt/tfidf.model", lr_model_path="/mnt/lr.model", lr_c_param=0.1, tfidf_max_features=10000, tfidf_ngram_range=3, batch_size="100", ): """ Pipeline """ vop = dsl.VolumeOp(name="my-pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWO, size="20Mi") download_step = dsl.ContainerOp( name="data_downloader", image="data_downloader:0.1", command="python", arguments=[ "/microservice/pipeline_step.py", "--labels-path", labels_path, "--features-path", raw_text_path, "--csv-url", csv_url, "--csv-encoding", csv_encoding, "--features-column", features_column, "--labels-column", labels_column, ], pvolumes={"/mnt": vop.volume}, ) clean_step = dsl.ContainerOp( name="clean_text", image="clean_text_transformer:0.1", command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", raw_text_path, "--out-path", clean_text_path, ], pvolumes={"/mnt": download_step.pvolume}, ) tokenize_step = dsl.ContainerOp( name="tokenize", image="spacy_tokenizer:0.1", command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", clean_text_path, "--out-path", spacy_tokens_path, ], pvolumes={"/mnt": clean_step.pvolume}, ) vectorize_step = dsl.ContainerOp( name="vectorize", image="tfidf_vectorizer:0.1", command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", spacy_tokens_path, "--out-path", tfidf_vectors_path, "--max-features", tfidf_max_features, "--ngram-range", tfidf_ngram_range, "--action", "train", "--model-path", tfidf_model_path, ], pvolumes={"/mnt": tokenize_step.pvolume}, ) predict_step = dsl.ContainerOp( name="predictor", image="lr_text_classifier:0.1", command="python", arguments=[ "/microservice/pipeline_step.py", "--in-path", tfidf_vectors_path, "--labels-path", labels_path, "--out-path", lr_prediction_path, "--c-param", lr_c_param, "--action", "train", "--model-path", lr_model_path, ], pvolumes={"/mnt": vectorize_step.pvolume}, ) try: seldon_config = yaml.load( open("../deploy_pipeline/seldon_production_pipeline.yaml")) except: # If this file is run from the project core directory seldon_config = yaml.load( open("deploy_pipeline/seldon_production_pipeline.yaml")) deploy_step = dsl.ResourceOp( name="seldondeploy", k8s_resource=seldon_config, attribute_outputs={"name": "{.metadata.name}"}, ) deploy_step.after(predict_step)
def nlp_pipeline( csv_url="https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_multilingual_UK_v1_00.tsv.gz", embed_weights_url="http://nlp.stanford.edu/data/glove.42B.300d.zip", features_column="review_body", labels_column="product_category", raw_text_path='/mnt/text.data', labels_path='/mnt/labels.data', data_folder='/mnt/data', clean_text_path='/mnt/clean.data', tokens_path='/mnt/tokens.data', tfidf_vectors_path='/mnt/tfidf.data', model_prediction_path='/mnt/predicted_train.data', tfidf_model_path='/mnt/tfidf.model', word_index_path='/mnt/word_index.data', embedded_matrix_path='/mnt/embedded_matrix.data', pre_embedded_weights='/mnt/data/glove.42B.300d.txt', train_ratio=0.98, validation_ratio=0.01, test_ratio=0.01, num_words=20000, sentence_max_length=50, deep_model='/mnt/deep_model.model', batch_size='100'): """ Pipeline """ vop = dsl.VolumeOp(name='lr-pvc', resource_name="lr-pvc", modes=["ReadWriteMany"], size="30Gi") download_step = dsl.ContainerOp( name='data_downloader', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/data_downloader/pipeline_step.py", "--labels-path", labels_path, "--data-folder", data_folder, "--features-path", raw_text_path, "--csv-url", csv_url, "--features-column", features_column, "--labels-column", labels_column ], pvolumes={"/mnt": vop.volume}) clean_step = dsl.ContainerOp( name='clean_text', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/clean_text/pipeline_step.py", "--in-path", raw_text_path, "--out-path", clean_text_path, ], pvolumes={ "/mnt": vop.volume }).after(download_step) data_split_step = dsl.ContainerOp( name='data_splitter', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/train_val_test/pipeline_step.py", "--in-path", clean_text_path, "--labels-path", labels_path, "--out-folder", data_folder, "--train-ratio", train_ratio, "--validation-ratio", validation_ratio, "--test-ratio", test_ratio ], pvolumes={ "/mnt": vop.volume }).after(clean_step) tfidf_step = dsl.ContainerOp( name='tfidf', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/tfidf_vectorizer/pipeline_step.py", "--in-path", "/mnt/data/train.data", "--out-path", tokens_path, "--model-path", "/mnt/tfidf.model", "--action", "train", "--ngram-range", 2, "--max-features", 1000, ], pvolumes={ "/mnt": vop.volume }).after(data_split_step) tfidf_step_val = dsl.ContainerOp( name='tfidf_val', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/tfidf_vectorizer/pipeline_step.py", "--in-path", "/mnt/data/val.data", "--out-path", "/mnt/data/tokenized_val.data", "--model-path", "/mnt/tfidf.model", "--action", "predict", ], pvolumes={ "/mnt": vop.volume }).after(tfidf_step) tfidf_step_test = dsl.ContainerOp( name='tfidf_test', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/tfidf_vectorizer/pipeline_step.py", "--in-path", "/mnt/data/test.data", "--out-path", "/mnt/data/tokenized_test.data", "--model-path", "/mnt/tfidf.model", "--action", "predict", ], pvolumes={ "/mnt": vop.volume }).after(tfidf_step) train_step = dsl.ContainerOp( name='predictor', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/lr_text_model/pipeline_step.py", "--in-path", tokens_path, "--out-path", model_prediction_path, "--model-path", '/mnt/lr_text.model', "--action", "train", "--c-param", 0.1, ], pvolumes={ "/mnt": vop.volume }).after(tfidf_step) predict_val = dsl.ContainerOp( name='val_predictor', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/lr_text_model/pipeline_step.py", "--in-path", "/mnt/data/tokenized_val.data", "--out-path", '/mnt/predicted_val.data', "--model-path", '/mnt/lr_text.model', "--action", "predict", ], pvolumes={ "/mnt": vop.volume }).after(train_step, tfidf_step_val) predict_test = dsl.ContainerOp( name='test_predictor', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/lr_text_model/pipeline_step.py", "--in-path", "/mnt/data/tokenized_test.data", "--out-path", '/mnt/predicted_test.data', "--model-path", '/mnt/lr_text.model', "--action", "predict", ], pvolumes={ "/mnt": vop.volume }).after(train_step, tfidf_step_test) evaluate_model = dsl.ContainerOp( name='model_evaluator', image='docker.io/cyferino/component-kubeflow:0.0.14', command="python", arguments=[ "/src/pipeline_steps/evaluate_model/pipeline_step.py", "--data-folder", "/mnt/data", "--predicted-train-data", '/mnt/predicted_train.data', "--predicted-val-data", '/mnt/predicted_val.data', "--predicted-test-data", '/mnt/predicted_test.data' ], pvolumes={ "/mnt": vop.volume }, file_outputs={ 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }, output_artifact_paths={ 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).after(train_step, predict_val, predict_test) seldon_config = yaml.load(open("seldon_production_pipeline.yaml")) deploy_step = dsl.ResourceOp( name="seldondeploy", k8s_resource=seldon_config, attribute_outputs={"name": "{.metadata.name}"}) deploy_step.after(train_step)
def resourceop_basic(): # Start a container. Print out env vars. op = dsl.ResourceOp(name='test-step', k8s_resource=json.loads(_CONTAINER_MANIFEST), action='create')