示例#1
0
def check_statefulsets_ready(record_xml_attribute, namespace, name, stateful_sets):
  """Test that Kubeflow deployments are successfully deployed.

  Args:
    namespace: The namespace to check
  """
  set_logging()
  # TODO(jlewi): Should we do this in the calling function)?
  util.set_pytest_junit(record_xml_attribute, name)

  # Need to activate account for scopes.
  if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
    util.run(["gcloud", "auth", "activate-service-account",
              "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]])

  api_client = deploy_utils.create_k8s_client()

  util.load_kube_config()

  for set_name in stateful_sets:
    logging.info("Verifying that stateful set %s.%s started...", namespace,
                 set_name)
    try:
      util.wait_for_statefulset(api_client, namespace, set_name)
    except:
      # Collect debug information by running describe
      util.run(["kubectl", "-n", namespace, "describe", "statefulsets",
                set_name])
      raise Exception(f"Stateful set {namespace}.{name} is not ready")
示例#2
0
def test_katib_is_ready(record_xml_attribute, namespace):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_katib_is_ready")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    deployment_names = [
        "katib-controller",
        "katib-mysql",
        "katib-db-manager",
        "katib-ui",
    ]
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)
示例#3
0
def test_gcp_kf_admin_wi(record_xml_attribute, namespace, app_name, platform,
                         project):
  """Test that the kubeflow admin SA has proper workload identity binding.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
  set_logging()
  util.set_pytest_junit(record_xml_attribute, "test_gcp_kf_admin_wi")

  # Need to activate account for scopes.
  if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
    util.run(["gcloud", "auth", "activate-service-account",
              "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]])

  api_client = deploy_utils.create_k8s_client()

  if platform != "gcp":

    pytest.skip("Not running on GCP")
    return

  cred = GoogleCredentials.get_application_default()
  # Create the Cloud IAM service object
  service = googleapiclient.discovery.build('iam', 'v1', credentials=cred)

  adminGcpSa = ('projects/%s/serviceAccounts/'
                '%s-admin@%s.iam.gserviceaccount.com') % (
                  project, app_name, project)
  adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (app_name, project)

  request = service.projects().serviceAccounts().getIamPolicy(
    resource=adminGcpSa)
  response = request.execute()
  roleToMembers = {}
  for binding in response['bindings']:
    roleToMembers[binding['role']] = set(binding['members'])

  workloadIdentityRole = 'roles/iam.workloadIdentityUser'
  if workloadIdentityRole not in roleToMembers:
    raise Exception("roles/iam.workloadIdentityUser missing in iam-policy of "
                    "service account %s" % adminGcpSa)

  account_str = "{project}.svc.id.goog[{namespace}/{account}]"

  # Expected workload identity users of the admin service account
  expected_wi_sa = [(namespace, "kf-admin"),
                    (namespace, "profiles-controller-service-account"),
                    ("istio-system", "kf-admin")]

  for sa in expected_wi_sa:
    gcp_sa = account_str.format(project=project, namespace=sa[0], account=sa[1])

    error_message = ("GCP SA {0} missing workload identity binding for "
                     "{1}").format(adminGcpSa, gcp_sa)

    binding = "serviceAccount:" + gcp_sa
    assert binding in roleToMembers[workloadIdentityRole], error_message
示例#4
0
def test_gcp_access(record_xml_attribute, namespace, app_path, project):
    """Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_gcp_access")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    platform, app_name = get_platform_app_name(app_path)
    if platform == "gcp":
        # check secret
        util.check_secret(api_client, namespace, "user-gcp-sa")

        cred = GoogleCredentials.get_application_default()
        # Create the Cloud IAM service object
        service = googleapiclient.discovery.build('iam',
                                                  'v1',
                                                  credentials=cred)

        userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % (
            project, app_name, project)
        adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % (
            app_name, project)

        request = service.projects().serviceAccounts().getIamPolicy(
            resource=userSa)
        response = request.execute()
        roleToMembers = {}
        for binding in response['bindings']:
            roleToMembers[binding['role']] = set(binding['members'])

        if 'roles/owner' not in roleToMembers:
            raise Exception("roles/owner missing in iam-policy of %s" % userSa)

        if adminSa not in roleToMembers['roles/owner']:
            raise Exception("Admin %v should be owner of user %s" %
                            (adminSa, userSa))

        workloadIdentityRole = 'roles/iam.workloadIdentityUser'
        if workloadIdentityRole not in roleToMembers:
            raise Exception(
                "roles/iam.workloadIdentityUser missing in iam-policy of %s" %
                userSa)
示例#5
0
def check_deployments_ready(record_xml_attribute, namespace, name, deployments,
                            cluster_name):
    """Test that Kubeflow deployments are successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, name)

    kfctl_aws_util.aws_auth_load_kubeconfig(cluster_name)

    api_client = deploy_utils.create_k8s_client()

    for deployment_name in deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)
示例#6
0
def check_deployments_ready(record_xml_attribute, namespace, name, deployments):
  """Test that Kubeflow deployments are successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
  set_logging()
  # TODO(jlewi): Should we do this in the calling function)?
  util.set_pytest_junit(record_xml_attribute, name)

  # Need to activate account for scopes.
  if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
    util.run(["gcloud", "auth", "activate-service-account",
              "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]])

  api_client = deploy_utils.create_k8s_client()

  util.load_kube_config()

  for deployment_name in deployments:
    logging.info("Verifying that deployment %s started...", deployment_name)
    util.wait_for_deployment(api_client, namespace, deployment_name, 10)
示例#7
0
def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth,
                     use_istio, app_path):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    api_client = deploy_utils.create_k8s_client()

    util.load_kube_config()

    # Verify that components are actually deployed.
    # TODO(jlewi): We need to parameterize this list based on whether
    # we are using IAP or basic auth.
    # TODO(yanniszark): This list is incomplete and missing a lot of components.
    deployment_names = [
        "argo-ui",
        "centraldashboard",
        "jupyter-web-app-deployment",
        "minio",
        "ml-pipeline",
        "ml-pipeline-persistenceagent",
        "ml-pipeline-scheduledworkflow",
        "ml-pipeline-ui",
        "ml-pipeline-viewer-controller-deployment",
        "mysql",
        "notebook-controller-deployment",
        "profiles-deployment",
        "pytorch-operator",
        "tf-job-operator",
        "workflow-controller",
    ]

    stateful_set_names = []

    platform, _ = get_platform_app_name(app_path)

    ingress_related_deployments = [
        "istio-egressgateway",
        "istio-ingressgateway",
        "istio-pilot",
        "istio-policy",
        "istio-sidecar-injector",
        "istio-telemetry",
        "istio-tracing",
        "prometheus",
    ]
    ingress_related_stateful_sets = []

    knative_namespace = "knative-serving"
    knative_related_deployments = [
        "activator",
        "autoscaler",
        "controller",
    ]

    if platform == "gcp":
        deployment_names.extend(["cloud-endpoints-controller"])
        stateful_set_names.extend(["kfserving-controller-manager"])
        if use_basic_auth:
            deployment_names.extend(["basic-auth-login"])
            ingress_related_stateful_sets.extend(["backend-updater"])
        else:
            ingress_related_deployments.extend(["iap-enabler"])
            ingress_related_stateful_sets.extend(["backend-updater"])
    elif platform == "existing_arrikto":
        deployment_names.extend(["dex"])
        ingress_related_deployments.extend(["authservice"])
        knative_related_deployments = []

    # TODO(jlewi): Might want to parallelize this.
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)

    ingress_namespace = "istio-system" if use_istio else namespace
    for deployment_name in ingress_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, ingress_namespace,
                                 deployment_name, 10)

    all_stateful_sets = [(namespace, name) for name in stateful_set_names]
    all_stateful_sets.extend([(ingress_namespace, name)
                              for name in ingress_related_stateful_sets])

    for ss_namespace, name in all_stateful_sets:
        logging.info("Verifying that stateful set %s.%s started...",
                     ss_namespace, name)
        try:
            util.wait_for_statefulset(api_client, ss_namespace, name)
        except:
            # Collect debug information by running describe
            util.run([
                "kubectl", "-n", ss_namespace, "describe", "statefulsets", name
            ])
            raise

    # TODO(jlewi): We should verify that the ingress is created and healthy.

    for deployment_name in knative_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, knative_namespace,
                                 deployment_name, 10)
示例#8
0
def test_kf_is_ready(record_xml_attribute, namespace, use_basic_auth, app_path,
                     cluster_name):
    """Test that Kubeflow was successfully deployed.

  Args:
    namespace: The namespace Kubeflow is deployed to.
  """
    set_logging()
    util.set_pytest_junit(record_xml_attribute, "test_kf_is_ready")

    kfctl_aws_util.aws_auth_load_kubeconfig(cluster_name)

    api_client = deploy_utils.create_k8s_client()

    # Verify that components are actually deployed.
    deployment_names = []

    stateful_set_names = []

    platform, _ = get_platform_app_name(app_path)

    # TODO(PatrickXYS): not sure why istio-galley can't found
    ingress_related_deployments = [
        "cluster-local-gateway",
        "istio-citadel",
        "istio-ingressgateway",
        "istio-pilot",
        "istio-policy",
        "istio-sidecar-injector",
        "istio-telemetry",
        "prometheus",
    ]
    ingress_related_stateful_sets = []

    knative_namespace = "knative-serving"
    knative_related_deployments = [
        "activator",
        "autoscaler",
        "autoscaler-hpa",
        "controller",
        "networking-istio",
        "webhook",
    ]

    if platform == "gcp":
        deployment_names.extend(["cloud-endpoints-controller"])
        stateful_set_names.extend(["kfserving-controller-manager"])
        if use_basic_auth:
            deployment_names.extend(["basic-auth-login"])
            ingress_related_stateful_sets.extend(["backend-updater"])
        else:
            ingress_related_deployments.extend(["iap-enabler"])
            ingress_related_stateful_sets.extend(["backend-updater"])
    elif platform == "existing_arrikto":
        deployment_names.extend(["dex"])
        ingress_related_deployments.extend(["authservice"])
        knative_related_deployments = []
    elif platform == "aws":
        # TODO(PatrickXYS): Extend List with AWS Deployment
        deployment_names.extend(["alb-ingress-controller"])

    # TODO(jlewi): Might want to parallelize this.
    for deployment_name in deployment_names:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, namespace, deployment_name, 10)

    ingress_namespace = "istio-system"
    for deployment_name in ingress_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, ingress_namespace,
                                 deployment_name, 10)

    all_stateful_sets = [(namespace, name) for name in stateful_set_names]
    all_stateful_sets.extend([(ingress_namespace, name)
                              for name in ingress_related_stateful_sets])

    for ss_namespace, name in all_stateful_sets:
        logging.info("Verifying that stateful set %s.%s started...",
                     ss_namespace, name)
        try:
            util.wait_for_statefulset(api_client, ss_namespace, name)
        except:
            # Collect debug information by running describe
            util.run([
                "kubectl", "-n", ss_namespace, "describe", "statefulsets", name
            ])
            raise

    ingress_names = ["istio-ingress"]
    # Check if Ingress is Ready and Healthy
    if platform in ["aws"]:
        for ingress_name in ingress_names:
            logging.info("Verifying that ingress %s started...", ingress_name)
            util.wait_for_ingress(api_client, ingress_namespace, ingress_name,
                                  10)

    for deployment_name in knative_related_deployments:
        logging.info("Verifying that deployment %s started...",
                     deployment_name)
        util.wait_for_deployment(api_client, knative_namespace,
                                 deployment_name, 10)

    # Check if Dex is Ready and Healthy
    dex_deployment_names = ["dex"]
    dex_namespace = "auth"
    for dex_deployment_name in dex_deployment_names:
        logging.info("Verifying that deployment %s started...",
                     dex_deployment_name)
        util.wait_for_deployment(api_client, dex_namespace,
                                 dex_deployment_name, 10)

    # Check if Cert-Manager is Ready and Healthy
    cert_manager_deployment_names = [
        "cert-manager",
        "cert-manager-cainjector",
        "cert-manager-webhook",
    ]
    cert_manager_namespace = "cert-manager"
    for cert_manager_deployment_name in cert_manager_deployment_names:
        logging.info("Verifying that deployment %s started...",
                     cert_manager_deployment_name)
        util.wait_for_deployment(api_client, cert_manager_namespace,
                                 cert_manager_deployment_name, 10)