def test_katib_is_ready(record_xml_attribute, namespace): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_katib_is_ready") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() deployment_names = [ "katib-controller", "katib-db", "katib-manager", "katib-ui", ] for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10)
def test_kf_is_ready(namespace, use_basic_auth): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ logging.info("Using namespace %s", namespace) # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that components are actually deployed. # TODO(jlewi): We need to parameterize this list based on whether # we are using IAP or basic auth. deployment_names = [ "argo-ui", "centraldashboard", "cert-manager", "cloud-endpoints-controller", "jupyter-web-app", "ml-pipeline", "ml-pipeline-scheduledworkflow", "ml-pipeline-ui", "notebooks-controller", "tf-job-operator", "profiles", "pytorch-operator", "studyjob-controller", "workflow-controller", ] stateful_sets = [ "backend-updater", ] if use_basic_auth: deployment_names.extend(["basic-auth"]) else: deployment_names.extend(["iap-enabler"]) # TODO(jlewi): Might want to parallelize this. for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name) for name in stateful_sets: logging.info("Verifying that statefulset %s started...", name) util.wait_for_statefulset(api_client, namespace, name)
def test_gcp_access(record_xml_attribute, namespace, app_path, project): """Test that Kubeflow gcp was configured with workload_identity and GCP service account credentails. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() util.set_pytest_junit(record_xml_attribute, "test_gcp_access") # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() platform, app_name = get_platform_app_name(app_path) if platform == "gcp": # check secret util.check_secret(api_client, namespace, "user-gcp-sa") cred = GoogleCredentials.get_application_default() # Create the Cloud IAM service object service = googleapiclient.discovery.build('iam', 'v1', credentials=cred) userSa = 'projects/%s/serviceAccounts/%s-user@%s.iam.gserviceaccount.com' % ( project, app_name, project) adminSa = 'serviceAccount:%s-admin@%s.iam.gserviceaccount.com' % ( app_name, project) request = service.projects().serviceAccounts().getIamPolicy( resource=userSa) response = request.execute() roleToMembers = {} for binding in response['bindings']: roleToMembers[binding['role']] = set(binding['members']) if 'roles/owner' not in roleToMembers: raise Exception("roles/owner missing in iam-policy of %s" % userSa) if adminSa not in roleToMembers['roles/owner']: raise Exception("Admin %v should be owner of user %s" % (adminSa, userSa)) workloadIdentityRole = 'roles/iam.workloadIdentityUser' if workloadIdentityRole not in roleToMembers: raise Exception( "roles/iam.workloadIdentityUser missing in iam-policy of %s" % userSa)
def deploy_kubeflow(_): """Deploy Kubeflow.""" args = parse_args() namespace = args.namespace api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that Jupyter is actually deployed. jupyter_name = "jupyter" logging.info("Verifying TfHub started.") util.wait_for_statefulset(api_client, namespace, jupyter_name) # Verify that core components are actually deployed. deployment_names = [ "tf-job-operator-v1beta1", "pytorch-operator", "studyjob-controller" ] for deployment_name in deployment_names: logging.info("Verifying that %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name)
def deploy_kubeflow(_): """Deploy Kubeflow.""" args = parse_args() namespace = args.namespace api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that the TfJob operator is actually deployed. tf_job_deployment_name = "tf-job-operator-v1beta1" logging.info("Verifying TfJob controller started.") util.wait_for_deployment(api_client, namespace, tf_job_deployment_name) # Verify that Jupyter is actually deployed. jupyter_name = "jupyter" logging.info("Verifying TfHub started.") util.wait_for_statefulset(api_client, namespace, jupyter_name) # Verify that PyTorch Operator actually deployed pytorch_operator_deployment_name = "pytorch-operator" logging.info("Verifying PyTorchJob controller started.") util.wait_for_deployment(api_client, namespace, pytorch_operator_deployment_name)
def test_kf_is_ready(namespace, use_basic_auth, use_istio): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ logging.info("Using namespace %s", namespace) # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that components are actually deployed. # TODO(jlewi): We need to parameterize this list based on whether # we are using IAP or basic auth. deployment_names = [ "argo-ui", "centraldashboard", "cloud-endpoints-controller", "jupyter-web-app-deployment", "metadata-db", "metadata-deployment", "metadata-ui", "ml-pipeline", "ml-pipeline-scheduledworkflow", "ml-pipeline-ui", "notebook-controller-deployment", "tf-job-operator", "pytorch-operator", "katib-controller", "workflow-controller", ] stateful_set_names = [ "kfserving-controller-manager", ] ingress_related_deployments = [] ingress_related_stateful_sets = [] if use_basic_auth: deployment_names.extend(["basic-auth-login"]) ingress_related_stateful_sets.extend(["backend-updater"]) else: ingress_related_deployments.extend(["iap-enabler"]) ingress_related_stateful_sets.extend(["backend-updater"]) # TODO(jlewi): Might want to parallelize this. for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10) ingress_namespace = "istio-system" if use_istio else namespace for deployment_name in ingress_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10) all_stateful_sets = [(namespace, name) for name in stateful_set_names] all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets]) for ss_namespace, name in all_stateful_sets: logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name) try: util.wait_for_statefulset(api_client, ss_namespace, name) except: # Collect debug information by running describe util.run([ "kubectl", "-n", ss_namespace, "describe", "statefulsets", name ]) raise # TODO(jlewi): We should verify that the ingress is created and healthy. knative_namespace = "knative-serving" knative_related_deployments = [ "activator", "autoscaler", "controller", ] for deployment_name in knative_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)
def test_kf_is_ready(namespace, use_basic_auth, use_istio, app_path): """Test that Kubeflow was successfully deployed. Args: namespace: The namespace Kubeflow is deployed to. """ set_logging() logging.info("Using namespace %s", namespace) # Need to activate account for scopes. if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"): util.run([ "gcloud", "auth", "activate-service-account", "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ]) api_client = deploy_utils.create_k8s_client() util.load_kube_config() # Verify that components are actually deployed. # TODO(jlewi): We need to parameterize this list based on whether # we are using IAP or basic auth. # TODO(yanniszark): This list is incomplete and missing a lot of components. deployment_names = [ "argo-ui", "centraldashboard", "jupyter-web-app-deployment", "minio", "ml-pipeline", "ml-pipeline-persistenceagent", "ml-pipeline-scheduledworkflow", "ml-pipeline-ui", "ml-pipeline-viewer-controller-deployment", "mysql", "notebook-controller-deployment", "profiles-deployment", "pytorch-operator", "tf-job-operator", "workflow-controller", ] stateful_set_names = [] with open(os.path.join(app_path, "app.yaml")) as f: kfdef = yaml.safe_load(f) platform = kfdef["spec"]["platform"] ingress_related_deployments = [ "istio-citadel", "istio-egressgateway", "istio-galley", "istio-ingressgateway", "istio-pilot", "istio-policy", "istio-sidecar-injector", "istio-telemetry", "istio-tracing", "kiali", "prometheus", ] ingress_related_stateful_sets = [] knative_namespace = "knative-serving" knative_related_deployments = [ "activator", "autoscaler", "controller", ] if platform == "gcp": deployment_names.extend(["cloud-endpoints-controller"]) stateful_set_names.extend(["kfserving-controller-manager"]) if use_basic_auth: deployment_names.extend(["basic-auth-login"]) ingress_related_stateful_sets.extend(["backend-updater"]) else: ingress_related_deployments.extend(["iap-enabler"]) ingress_related_stateful_sets.extend(["backend-updater"]) elif platform == "existing_arrikto": deployment_names.extend(["dex"]) ingress_related_deployments.extend(["authservice"]) knative_related_deployments = [] # TODO(jlewi): Might want to parallelize this. for deployment_name in deployment_names: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, namespace, deployment_name, 10) ingress_namespace = "istio-system" if use_istio else namespace for deployment_name in ingress_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, ingress_namespace, deployment_name, 10) all_stateful_sets = [(namespace, name) for name in stateful_set_names] all_stateful_sets.extend([(ingress_namespace, name) for name in ingress_related_stateful_sets]) for ss_namespace, name in all_stateful_sets: logging.info("Verifying that stateful set %s.%s started...", ss_namespace, name) try: util.wait_for_statefulset(api_client, ss_namespace, name) except: # Collect debug information by running describe util.run([ "kubectl", "-n", ss_namespace, "describe", "statefulsets", name ]) raise # TODO(jlewi): We should verify that the ingress is created and healthy. for deployment_name in knative_related_deployments: logging.info("Verifying that deployment %s started...", deployment_name) util.wait_for_deployment(api_client, knative_namespace, deployment_name, 10)