def deploy_kubeflow(test_case): """Deploy Kubeflow.""" args = parse_args() test_dir = test_case.test_suite.test_dir src_root_dir = args.src_root_dir namespace = args.namespace api_client = deploy_utils.create_k8s_client() app_dir = deploy_utils.setup_ks_app(test_dir, src_root_dir, namespace, args.github_token, api_client) # Deploy Kubeflow util.run(["ks", "generate", "tf-job-operator", "tf-job-operator"], cwd=app_dir) util.run( ["ks", "generate", "argo", "kubeflow-argo", "--name=kubeflow-argo"], cwd=app_dir) cmd = "ks param set tf-job-operator namespace " + namespace util.run(cmd.split(), cwd=app_dir) # cmd = "ks param set tf-job-operator tfJobImage \ # gcr.io/kubeflow-images-public/tf_operator:v20180522-77375baf" # util.run(cmd.split(), cwd=app_dir) cmd = "ks param set tf-job-operator tfJobVersion v1beta1" util.run(cmd.split(), cwd=app_dir) cmd = "ks param set kubeflow-argo namespace " + namespace util.run(cmd.split(), cwd=app_dir) apply_command = [ "ks", "apply", "default", "-c", "tf-job-operator", "-c", "kubeflow-argo" ] if args.as_gcloud_user: account = deploy_utils.get_gcp_identity() logging.info("Impersonate %s", account) # If we don't use --as to impersonate the service account then we # observe RBAC errors when doing certain operations. The problem appears # to be that we end up using the in cluster config (e.g. pod service account) # and not the GCP service account which has more privileges. apply_command.append("--as=" + account) util.run(apply_command, cwd=app_dir) # Verify that the TfJob operator is actually deployed. tf_job_deployment_name = "tf-job-operator-v1beta1" logging.info("Verifying TfJob controller started.") util.wait_for_deployment(api_client, namespace, tf_job_deployment_name) # Verify that the Argo operator is deployed. argo_deployment_name = "workflow-controller" logging.info("Verifying Argo controller started.") util.wait_for_deployment(api_client, namespace, argo_deployment_name) # change the namespace to default to set up nfs-volume and nfs-server namespace = "default" deploy_utils.set_clusterrole(namespace)
def deploy_kubeflow(test_case): # pylint: disable=unused-argument """Deploy Kubeflow.""" args = parse_args() src_root_dir = args.src_root_dir namespace = args.namespace api_client = deploy_utils.create_k8s_client() manifest_repo_dir = path.join(src_root_dir, "kubeflow", "manifests") argo_manifest_dir = path.join(manifest_repo_dir, "argo", "base") tfoperator_manifest_dir = path.join(manifest_repo_dir, "tf-training", "tf-job-operator", "base") deploy_utils.setup_test(api_client, namespace) apply_args = "-f -" if args.as_gcloud_user: account = deploy_utils.get_gcp_identity() logging.info("Impersonate %s", account) # If we don't use --as to impersonate the service account then we # observe RBAC errors when doing certain operations. The problem appears # to be that we end up using the in cluster config (e.g. pod service account) # and not the GCP service account which has more privileges. apply_args = " ".join(["--as=" + account, apply_args]) # Deploy argo logging.info("Deploying argo") util.run(["kustomize", "edit", "set", "namespace", namespace], cwd=argo_manifest_dir) util.run(["sh", "-c", "kustomize build | kubectl apply " + apply_args], cwd=argo_manifest_dir) # Deploy tf-job-operator logging.info("Deploying tf-job-operator") util.run(["kustomize", "edit", "set", "namespace", namespace], cwd=tfoperator_manifest_dir) util.run(["sh", "-c", "kustomize build | kubectl apply " + apply_args], cwd=tfoperator_manifest_dir) # Verify that the TfJob operator is actually deployed. tf_job_deployment_name = "tf-job-operator" logging.info("Verifying TfJob controller started.") util.wait_for_deployment(api_client, namespace, tf_job_deployment_name) # Verify that the Argo operator is deployed. argo_deployment_name = "workflow-controller" logging.info("Verifying Argo controller started.") util.wait_for_deployment(api_client, namespace, argo_deployment_name) deploy_utils.set_clusterrole(namespace)
def run_smoke_test(test_case): """Run a smoke test.""" args = parse_args() test_dir = test_case.test_suite.test_dir src_root_dir = args.src_root_dir namespace = args.namespace api_client = deploy_utils.create_k8s_client() app_dir = deploy_utils.setup_ks_app(test_dir, src_root_dir, namespace, args.github_token, api_client) job_name = "smoke-test-job" pvc_name = "kubebench-pvc" pvc_mount = "/kubebench" config_name = "job-config" # set the namespace of kb job to default namespace = "default" # Deploy Kubebench util.run([ "ks", "generate", "kubebench-job-legacy", job_name, "--name=" + job_name, "--namespace=" + namespace ], cwd=app_dir) cmd = "ks param set " + job_name + " name " + job_name util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + " namespace " + namespace util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + \ " config_image gcr.io/xyhuang-kubeflow/kubebench-configurator:v20180809-1" util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + \ " report_image gcr.io/xyhuang-kubeflow/kubebench-tf-cnn-csv-reporter:v20180522-1" util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + " config_args -- --config-file=" + pvc_mount + \ "/config/" + config_name + ".yaml" util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + " report_args -- --output-file=" + pvc_mount + \ "/output/results.csv" util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + " pvc_name " + pvc_name util.run(cmd.split(), cwd=app_dir) cmd = "ks param set " + job_name + " pvc_mount " + pvc_mount util.run(cmd.split(), cwd=app_dir) apply_command = ["ks", "apply", "default", "-c", "smoke-test-job"] if args.as_gcloud_user: account = deploy_utils.get_gcp_identity() logging.info("Impersonate %s", account) # If we don't use --as to impersonate the service account then we # observe RBAC errors when doing certain operations. The problem appears # to be that we end up using the in cluster config (e.g. pod service account) # and not the GCP service account which has more privileges. apply_command.append("--as=" + account) util.run(apply_command, cwd=app_dir) cmd = "kubectl get pods -n " + namespace util.run(cmd.split(), cwd=app_dir) time.sleep(240) cmd = "kubectl get pods -n " + namespace util.run(cmd.split(), cwd=app_dir) ret = deploy_utils.check_kb_job(job_name, namespace) if not ret: logging.error("Job FAILED.") deploy_utils.cleanup_kb_job(app_dir, job_name) sys.exit(1) deploy_utils.cleanup_kb_job(app_dir, job_name) sys.exit(0)