Пример #1
0
def deploy_kubeflow(test_case):
    """Deploy Kubeflow."""
    args = parse_args()
    test_dir = test_case.test_suite.test_dir
    src_root_dir = args.src_root_dir
    namespace = args.namespace
    api_client = deploy_utils.create_k8s_client()
    app_dir = deploy_utils.setup_ks_app(test_dir, src_root_dir, namespace,
                                        args.github_token, api_client)

    # Deploy Kubeflow
    util.run(["ks", "generate", "tf-job-operator", "tf-job-operator"],
             cwd=app_dir)
    util.run(
        ["ks", "generate", "argo", "kubeflow-argo", "--name=kubeflow-argo"],
        cwd=app_dir)
    cmd = "ks param set tf-job-operator namespace " + namespace
    util.run(cmd.split(), cwd=app_dir)
    # cmd = "ks param set tf-job-operator tfJobImage \
    #         gcr.io/kubeflow-images-public/tf_operator:v20180522-77375baf"
    # util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set tf-job-operator tfJobVersion v1beta1"
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set kubeflow-argo namespace " + namespace
    util.run(cmd.split(), cwd=app_dir)
    apply_command = [
        "ks", "apply", "default", "-c", "tf-job-operator", "-c",
        "kubeflow-argo"
    ]
    if args.as_gcloud_user:
        account = deploy_utils.get_gcp_identity()
        logging.info("Impersonate %s", account)
        # If we don't use --as to impersonate the service account then we
        # observe RBAC errors when doing certain operations. The problem appears
        # to be that we end up using the in cluster config (e.g. pod service account)
        # and not the GCP service account which has more privileges.
        apply_command.append("--as=" + account)
    util.run(apply_command, cwd=app_dir)

    # Verify that the TfJob operator is actually deployed.
    tf_job_deployment_name = "tf-job-operator-v1beta1"
    logging.info("Verifying TfJob controller started.")
    util.wait_for_deployment(api_client, namespace, tf_job_deployment_name)

    # Verify that the Argo operator is deployed.
    argo_deployment_name = "workflow-controller"
    logging.info("Verifying Argo controller started.")
    util.wait_for_deployment(api_client, namespace, argo_deployment_name)

    # change the namespace to default to set up nfs-volume and nfs-server
    namespace = "default"

    deploy_utils.set_clusterrole(namespace)
Пример #2
0
def deploy_kubeflow(test_case):  # pylint: disable=unused-argument
    """Deploy Kubeflow."""
    args = parse_args()
    src_root_dir = args.src_root_dir
    namespace = args.namespace
    api_client = deploy_utils.create_k8s_client()

    manifest_repo_dir = path.join(src_root_dir, "kubeflow", "manifests")
    argo_manifest_dir = path.join(manifest_repo_dir, "argo", "base")
    tfoperator_manifest_dir = path.join(manifest_repo_dir, "tf-training",
                                        "tf-job-operator", "base")

    deploy_utils.setup_test(api_client, namespace)

    apply_args = "-f -"
    if args.as_gcloud_user:
        account = deploy_utils.get_gcp_identity()
        logging.info("Impersonate %s", account)
        # If we don't use --as to impersonate the service account then we
        # observe RBAC errors when doing certain operations. The problem appears
        # to be that we end up using the in cluster config (e.g. pod service account)
        # and not the GCP service account which has more privileges.
        apply_args = " ".join(["--as=" + account, apply_args])

    # Deploy argo
    logging.info("Deploying argo")
    util.run(["kustomize", "edit", "set", "namespace", namespace],
             cwd=argo_manifest_dir)
    util.run(["sh", "-c", "kustomize build | kubectl apply " + apply_args],
             cwd=argo_manifest_dir)

    # Deploy tf-job-operator
    logging.info("Deploying tf-job-operator")
    util.run(["kustomize", "edit", "set", "namespace", namespace],
             cwd=tfoperator_manifest_dir)
    util.run(["sh", "-c", "kustomize build | kubectl apply " + apply_args],
             cwd=tfoperator_manifest_dir)

    # Verify that the TfJob operator is actually deployed.
    tf_job_deployment_name = "tf-job-operator"
    logging.info("Verifying TfJob controller started.")
    util.wait_for_deployment(api_client, namespace, tf_job_deployment_name)

    # Verify that the Argo operator is deployed.
    argo_deployment_name = "workflow-controller"
    logging.info("Verifying Argo controller started.")
    util.wait_for_deployment(api_client, namespace, argo_deployment_name)

    deploy_utils.set_clusterrole(namespace)
Пример #3
0
def run_smoke_test(test_case):
    """Run a smoke test."""
    args = parse_args()
    test_dir = test_case.test_suite.test_dir
    src_root_dir = args.src_root_dir
    namespace = args.namespace
    api_client = deploy_utils.create_k8s_client()
    app_dir = deploy_utils.setup_ks_app(test_dir, src_root_dir, namespace,
                                        args.github_token, api_client)

    job_name = "smoke-test-job"
    pvc_name = "kubebench-pvc"
    pvc_mount = "/kubebench"
    config_name = "job-config"

    # set the namespace of kb job to default
    namespace = "default"
    # Deploy Kubebench
    util.run([
        "ks", "generate", "kubebench-job-legacy", job_name,
        "--name=" + job_name, "--namespace=" + namespace
    ],
             cwd=app_dir)
    cmd = "ks param set " + job_name + " name " + job_name
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + " namespace " + namespace
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + \
            " config_image gcr.io/xyhuang-kubeflow/kubebench-configurator:v20180809-1"
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + \
            " report_image gcr.io/xyhuang-kubeflow/kubebench-tf-cnn-csv-reporter:v20180522-1"
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + " config_args -- --config-file=" + pvc_mount + \
            "/config/" + config_name + ".yaml"
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + " report_args -- --output-file=" + pvc_mount + \
            "/output/results.csv"
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + " pvc_name " + pvc_name
    util.run(cmd.split(), cwd=app_dir)
    cmd = "ks param set " + job_name + " pvc_mount " + pvc_mount
    util.run(cmd.split(), cwd=app_dir)

    apply_command = ["ks", "apply", "default", "-c", "smoke-test-job"]
    if args.as_gcloud_user:
        account = deploy_utils.get_gcp_identity()
        logging.info("Impersonate %s", account)
        # If we don't use --as to impersonate the service account then we
        # observe RBAC errors when doing certain operations. The problem appears
        # to be that we end up using the in cluster config (e.g. pod service account)
        # and not the GCP service account which has more privileges.
        apply_command.append("--as=" + account)
    util.run(apply_command, cwd=app_dir)

    cmd = "kubectl get pods -n " + namespace
    util.run(cmd.split(), cwd=app_dir)
    time.sleep(240)
    cmd = "kubectl get pods -n " + namespace
    util.run(cmd.split(), cwd=app_dir)

    ret = deploy_utils.check_kb_job(job_name, namespace)
    if not ret:
        logging.error("Job FAILED.")
        deploy_utils.cleanup_kb_job(app_dir, job_name)
        sys.exit(1)

    deploy_utils.cleanup_kb_job(app_dir, job_name)
    sys.exit(0)