示例#1
0
def test_katib_is_ready(record_xml_attribute, namespace):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_katib_is_ready")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    deployment_names = [
        "katib-controller",
        "katib-db",
        "katib-manager",
        "katib-ui",
    ]
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)
示例#2
0
def test_kf_is_ready(namespace, use_basic_auth):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """

    logging.info("Using namespace %s", namespace)

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    # Verify that components are actually deployed.
    # TODO(jlewi): We need to parameterize this list based on whether
    # we are using IAP or basic auth.
    deployment_names = [
        "argo-ui",
        "centraldashboard",
        "cert-manager",
        "cloud-endpoints-controller",
        "jupyter-web-app",
        "ml-pipeline",
        "ml-pipeline-scheduledworkflow",
        "ml-pipeline-ui",
        "notebooks-controller",
        "tf-job-operator",
        "profiles",
        "pytorch-operator",
        "studyjob-controller",
        "workflow-controller",
    ]

    stateful_sets = [
        "backend-updater",
    ]

    if use_basic_auth:
        deployment_names.extend(["basic-auth"])
    else:
        deployment_names.extend(["iap-enabler"])

    # TODO(jlewi): Might want to parallelize this.
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name)

    for name in stateful_sets:
        logging.info("Verifying that statefulset %s started...", name)
        util.wait_for_statefulset(api_client, namespace, name)
示例#3
0
def test_gcp_access(record_xml_attribute, namespace, app_path, project):
    """Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_gcp_access")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    platform, app_name = get_platform_app_name(app_path)
    if platform == "gcp":
        # check secret
        util.check_secret(api_client, namespace, "user-gcp-sa")

        cred = GoogleCredentials.get_application_default()
        # Create the Cloud IAM service object
        service = googleapiclient.discovery.build('iam',
                                                  'v1',
                                                  credentials=cred)

        userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (
            project, app_name, project)
        adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (
            app_name, project)

        request = service.projects().serviceAccounts().getIamPolicy(
            resource=userSa)
        response = request.execute()
        roleToMembers = {}
        for binding in response['bindings']:
            roleToMembers[binding['role']] = set(binding['members'])

        if 'roles/owner' not in roleToMembers:
            raise Exception("roles/owner missing in iam-policy of %s" % userSa)

        if adminSa not in roleToMembers['roles/owner']:
            raise Exception("Admin %v should be owner of user %s" %
                            (adminSa, userSa))

        workloadIdentityRole = 'roles/iam.workloadIdentityUser'
        if workloadIdentityRole not in roleToMembers:
            raise Exception(
                "roles/iam.workloadIdentityUser missing in iam-policy of %s" %
                userSa)
示例#4
0
def deploy_kubeflow(_):
    """Deploy Kubeflow."""
    args = parse_args()
    namespace = args.namespace
    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    # Verify that Jupyter is actually deployed.
    jupyter_name = "jupyter"
    logging.info("Verifying TfHub started.")
    util.wait_for_statefulset(api_client, namespace, jupyter_name)

    # Verify that core components are actually deployed.
    deployment_names = [
        "tf-job-operator-v1beta1", "pytorch-operator", "studyjob-controller"
    ]
    for deployment_name in deployment_names:
        logging.info("Verifying that %s started...", deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name)
示例#5
0
def deploy_kubeflow(_):
  """Deploy Kubeflow."""
  args = parse_args()
  namespace = args.namespace
  api_client = deploy_utils.create_k8s_client()

  util.load_kube_config()
  # Verify that the TfJob operator is actually deployed.
  tf_job_deployment_name = "tf-job-operator-v1beta1"
  logging.info("Verifying TfJob controller started.")
  util.wait_for_deployment(api_client, namespace, tf_job_deployment_name)

  # Verify that Jupyter is actually deployed.
  jupyter_name = "jupyter"
  logging.info("Verifying TfHub started.")
  util.wait_for_statefulset(api_client, namespace, jupyter_name)

  # Verify that PyTorch Operator actually deployed
  pytorch_operator_deployment_name = "pytorch-operator"
  logging.info("Verifying PyTorchJob controller started.")
  util.wait_for_deployment(api_client, namespace, pytorch_operator_deployment_name)
示例#6
0
def test_kf_is_ready(namespace, use_basic_auth, use_istio):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """

    logging.info("Using namespace %s", namespace)

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    # Verify that components are actually deployed.
    # TODO(jlewi): We need to parameterize this list based on whether
    # we are using IAP or basic auth.
    deployment_names = [
        "argo-ui",
        "centraldashboard",
        "cloud-endpoints-controller",
        "jupyter-web-app-deployment",
        "metadata-db",
        "metadata-deployment",
        "metadata-ui",
        "ml-pipeline",
        "ml-pipeline-scheduledworkflow",
        "ml-pipeline-ui",
        "notebook-controller-deployment",
        "tf-job-operator",
        "pytorch-operator",
        "katib-controller",
        "workflow-controller",
    ]

    stateful_set_names = [
        "kfserving-controller-manager",
    ]

    ingress_related_deployments = []
    ingress_related_stateful_sets = []

    if use_basic_auth:
        deployment_names.extend(["basic-auth-login"])
        ingress_related_stateful_sets.extend(["backend-updater"])
    else:
        ingress_related_deployments.extend(["iap-enabler"])
        ingress_related_stateful_sets.extend(["backend-updater"])

    # TODO(jlewi): Might want to parallelize this.
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)

    ingress_namespace = "istio-system" if use_istio else namespace
    for deployment_name in ingress_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, ingress_namespace,
                                 deployment_name, 10)

    all_stateful_sets = [(namespace, name) for name in stateful_set_names]
    all_stateful_sets.extend([(ingress_namespace, name)
                              for name in ingress_related_stateful_sets])

    for ss_namespace, name in all_stateful_sets:
        logging.info("Verifying that stateful set %s.%s started...",
                     ss_namespace, name)
        try:
            util.wait_for_statefulset(api_client, ss_namespace, name)
        except:
            # Collect debug information by running describe
            util.run([
                "kubectl", "-n", ss_namespace, "describe", "statefulsets", name
            ])
            raise

    # TODO(jlewi): We should verify that the ingress is created and healthy.

    knative_namespace = "knative-serving"
    knative_related_deployments = [
        "activator",
        "autoscaler",
        "controller",
    ]
    for deployment_name in knative_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, knative_namespace,
                                 deployment_name, 10)
示例#7
0
def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    logging.info("Using namespace %s", namespace)

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    # Verify that components are actually deployed.
    # TODO(jlewi): We need to parameterize this list based on whether
    # we are using IAP or basic auth.
    # TODO(yanniszark): This list is incomplete and missing a lot of components.
    deployment_names = [
        "argo-ui",
        "centraldashboard",
        "jupyter-web-app-deployment",
        "minio",
        "ml-pipeline",
        "ml-pipeline-persistenceagent",
        "ml-pipeline-scheduledworkflow",
        "ml-pipeline-ui",
        "ml-pipeline-viewer-controller-deployment",
        "mysql",
        "notebook-controller-deployment",
        "profiles-deployment",
        "pytorch-operator",
        "tf-job-operator",
        "workflow-controller",
    ]

    stateful_set_names = []

    with open(os.path.join(app_path, "app.yaml")) as f:
        kfdef = yaml.safe_load(f)
    platform = kfdef["spec"]["platform"]

    ingress_related_deployments = [
        "istio-citadel",
        "istio-egressgateway",
        "istio-galley",
        "istio-ingressgateway",
        "istio-pilot",
        "istio-policy",
        "istio-sidecar-injector",
        "istio-telemetry",
        "istio-tracing",
        "kiali",
        "prometheus",
    ]
    ingress_related_stateful_sets = []

    knative_namespace = "knative-serving"
    knative_related_deployments = [
        "activator",
        "autoscaler",
        "controller",
    ]

    if platform == "gcp":
        deployment_names.extend(["cloud-endpoints-controller"])
        stateful_set_names.extend(["kfserving-controller-manager"])
        if use_basic_auth:
            deployment_names.extend(["basic-auth-login"])
            ingress_related_stateful_sets.extend(["backend-updater"])
        else:
            ingress_related_deployments.extend(["iap-enabler"])
            ingress_related_stateful_sets.extend(["backend-updater"])
    elif platform == "existing_arrikto":
        deployment_names.extend(["dex"])
        ingress_related_deployments.extend(["authservice"])
        knative_related_deployments = []

    # TODO(jlewi): Might want to parallelize this.
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)

    ingress_namespace = "istio-system" if use_istio else namespace
    for deployment_name in ingress_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, ingress_namespace,
                                 deployment_name, 10)

    all_stateful_sets = [(namespace, name) for name in stateful_set_names]
    all_stateful_sets.extend([(ingress_namespace, name)
                              for name in ingress_related_stateful_sets])

    for ss_namespace, name in all_stateful_sets:
        logging.info("Verifying that stateful set %s.%s started...",
                     ss_namespace, name)
        try:
            util.wait_for_statefulset(api_client, ss_namespace, name)
        except:
            # Collect debug information by running describe
            util.run([
                "kubectl", "-n", ss_namespace, "describe", "statefulsets", name
            ])
            raise

    # TODO(jlewi): We should verify that the ingress is created and healthy.

    for deployment_name in knative_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, knative_namespace,
                                 deployment_name, 10)