Exemplo n.º 1
0
def test_deploy_pytorchjob(record_xml_attribute, kfctl_repo_path, namespace):
    """Deploy PytorchJob."""
    util.load_kube_config()
    util.load_kube_credentials()
    logging.info("using kfctl repo: %s" % kfctl_repo_path)
    util.run([
        "kubectl", "apply", "-f",
        os.path.join(
            kfctl_repo_path,
            "py/kubeflow/kfctl/testing/pytests/testdata/pytorch_job.yaml")
    ])
    api_client = k8s_client.ApiClient()
    api = k8s_client.CoreV1Api(api_client)

    # If the call throws exception, let it emit as an error case.
    resp = api.list_namespaced_pod(namespace)
    names = {
        "pytorch-mnist-ddp-cpu-master-0": False,
        "pytorch-mnist-ddp-cpu-worker-0": False,
    }

    for pod in resp.items:
        name = pod.metadata.name
        if name in names:
            names[name] = True

    msg = []
    for n in names:
        if not names[n]:
            msg.append("pod %s is not found" % n)
    if msg:
        raise ValueError("; ".join(msg))
Exemplo n.º 2
0
def test_kfam(record_xml_attribute):
  util.set_pytest_junit(record_xml_attribute, "test_kfam_e2e")
  util.load_kube_config()
  util.load_kube_credentials()

  getcmd = "kubectl get pods -n kubeflow -l=app=jupyter-web-app --template '{{range.items}}{{.metadata.name}}{{end}}'"
  jupyterpod = util.run(getcmd.split(' '))[1:-1]

  logging.info("accessing kfam svc from jupyter pod %s" % jupyterpod)

  sleep(10)
  # Profile Creation
  profile_name = "testprofile-%s" % uuid.uuid4().hex[0:7]
  util.run(['kubectl', 'exec', jupyterpod, '-n', 'kubeflow', '--', 'curl',
            '--silent', '-X', 'POST', '-d',
            '{"metadata":{"name":"%s"},"spec":{"owner":{"kind":"User","name":"*****@*****.**"}}}' % profile_name,
            'profiles-kfam.kubeflow:8081/kfam/v1/profiles'])

  assert verify_profile_creation(jupyterpod, profile_name)
Exemplo n.º 3
0
def test_jupyter(record_xml_attribute, kfctl_repo_path, namespace):
  """Test the jupyter notebook.
  Args:
    record_xml_attribute: Test fixture provided by pytest.
    env: ksonnet environment.
    namespace: namespace to run in.
  """
  util.load_kube_config()
  util.load_kube_credentials()
  logging.info("using kfctl repo: %s" % kfctl_repo_path)
  util.run(["kubectl", "apply", "-f",
            os.path.join(kfctl_repo_path,
                         "py/kubeflow/kfctl/testing/pytests/testdata/jupyter_test.yaml")])
  api_client = k8s_client.ApiClient()
  api = k8s_client.CoreV1Api(api_client)

  resp = api.list_namespaced_service(namespace)
  names = [service.metadata.name for service in resp.items]
  if not "jupyter-test" in names:
    raise ValueError("not able to find jupyter-test service.")
Exemplo n.º 4
0
def test_build_kfctl_go(record_xml_attribute, app_name, app_path, project,
                        use_basic_auth, use_istio, config_path,
                        build_and_apply, kfctl_repo_path,
                        cluster_creation_script, self_signed_cert, values):
    """Test building and deploying Kubeflow.

  Args:
    app_name: kubeflow deployment name.
    app_path: The path to the Kubeflow app.
    project: The GCP project to use.
    use_basic_auth: Whether to use basic_auth.
    use_istio: Whether to use Istio or not
    config_path: Path to the KFDef spec file.
    cluster_creation_script: script invoked to create a new cluster
    build_and_apply: whether to build and apply or apply
    kfctl_repo_path: path to the kubeflow/kfctl repo.
    self_signed_cert: whether to use self-signed cert for ingress.
    values: Comma separated list of variables to substitute into config_path
  """
    util.set_pytest_junit(record_xml_attribute, "test_build_kfctl_go")

    # Need to activate account for scopes.
    if os.getenv("GOOGLE_APPLICATION_CREDENTIALS"):
        util.run([
            "gcloud", "auth", "activate-service-account",
            "--key-file=" + os.environ["GOOGLE_APPLICATION_CREDENTIALS"]
        ])

    # TODO(yanniszark): split this into a separate workflow step
    if cluster_creation_script:
        logging.info("Cluster creation script specified: %s",
                     cluster_creation_script)
        util.run(["/bin/bash", "-c", cluster_creation_script])

    logging.info("using kfctl repo: %s" % kfctl_repo_path)

    if values:
        pairs = values.split(",")
        path_vars = {}
        for p in pairs:
            k, v = p.split("=")
            path_vars[k] = v

        config_path = config_path.format(**path_vars)
        logging.info("config_path after substitution: %s", config_path)

    kfctl_path = kfctl_util.build_kfctl_go(kfctl_repo_path)
    app_path = kfctl_util.kfctl_deploy_kubeflow(app_path, project,
                                                use_basic_auth, use_istio,
                                                config_path, kfctl_path,
                                                build_and_apply)
    if not cluster_creation_script:
        kfctl_util.verify_kubeconfig(app_path)

    # Use self-signed cert for testing to prevent quota limiting.
    if self_signed_cert:
        logging.info("Configuring self signed certificate")
        util.load_kube_credentials()
        api_client = k8s_client.ApiClient()
        ingress_namespace = "istio-system"
        ingress_name = "envoy-ingress"
        tls_endpoint = "{0}.endpoints.{1}.cloud.goog".format(app_name, project)
        logging.info("Configuring self signed cert for %s", tls_endpoint)
        util.use_self_signed_for_ingress(ingress_namespace, ingress_name,
                                         tls_endpoint, api_client)
Exemplo n.º 5
0
def check_if_kfapp_exists(project, name, zone): # pylint: disable=too-many-branches
  """Check if a deployment with the specified name already exists."""
  credentials = GoogleCredentials.get_application_default()
  dm = discovery.build("deploymentmanager", "v2", credentials=credentials)

  deployments_client = dm.deployments()
  enable_api = False
  try:
    deployments_client.get(project=project, deployment=name).execute()
  except errors.HttpError as e:
    if not e.content:
      raise
    error_content = json.loads(e.content)
    if error_content.get("error", {}).get("code", 0) == 404: # pylint: disable=no-else-return
      return False
    elif error_content.get("error", {}).get("code", 0) == 403:
      # We get a 403 if the deployment manager API isn't enabled
      logging.info("Fetching deployment %s in project %s returned error:\n%s",
                   name, project, error_content)
      enable_api = True
    else:
      raise

  if enable_api:
    logging.info("Enabling the deployment manager api.")
    util.run(["gcloud", "--project=" + project, "services", "enable",
              "deploymentmanager.googleapis.com"])
    logging.info("Api enabled; raising ApiNotEnabledError to force retry")
    raise ApiNotEnabledError

  # TODO(jlewi): It would be better to get the actual zone of the deployment
  util.run(["gcloud", "--project=" + project, "container", "clusters",
            "get-credentials", "--zone=" + zone, name])
  logging.info("Checking if project %s kfapp %s finished setup.", project, name)
  util.load_kube_credentials()

  # TODO(jlewi): This is a bit of a hack for v0.6. For v0.6 we check if the
  # ingress already exists and if it does we report it as true and otherwise
  # false. The reasoning is if the ingress doesn't exist we want to see
  # if we can fix/resume the deployment by running reapply
  # With v0.7 kfctl apply should be an idempotent operation so we can always
  # rerun apply; but with v0.6 rerunning apply if the ingress exists results
  # in an error.
  api_client = k8s_client.ApiClient()
  v1 = k8s_client.CoreV1Api(api_client)
  ingress_namespace = "istio-system"
  ingress_name = "envoy-ingress"

  extensions = k8s_client.ExtensionsV1beta1Api(api_client)

  missing_ingress = True
  try:
    logging.info("Trying to read ingress %s.%s", ingress_name,
                 ingress_namespace)
    extensions.read_namespaced_ingress(ingress_name, ingress_namespace)
    missing_ingress = False
    logging.info("Ingress %s.%s exists", ingress_name, ingress_namespace)
  except rest.ApiException as e:
    if e.status == 404:
      logging.info("Project: %s, KFApp: %s is missing ingress %s.%s",
                   project, name, ingress_namespace, ingress_name)
      missing_ingress = True
    else:
      raise

  if missing_ingress:
    # Check if the service istio-ingressgateway already exists
    # if it does we need to delete it before rerunning apply.
    service_name = "istio-ingressgateway"
    logging.info("ingress %s.%s exists; checking if service %s.%s exists",
                 ingress_namespace, ingress_name, ingress_namespace,
                 service_name)

    has_service = False
    try:
      v1.read_namespaced_service(service_name, ingress_namespace)
      has_service = True
    except rest.ApiException as e:
      if e.status == 404:
        logging.info("Project: %s, KFApp: %s is missing service %s.%s",
                     project, name, ingress_namespace, service_name)
      else:
        raise

    if has_service:
      logging.info("Deleting service: %s.%s", ingress_namespace, service_name)
      v1.delete_namespaced_service(service_name, ingress_namespace,
                                   body=k8s_client.V1DeleteOptions())
      logging.info("Deleted service: %s.%s", ingress_namespace, service_name)

    return False


  return True
Exemplo n.º 6
0
def deploy_with_kfctl_go(kfctl_path, args, app_dir, env, labels=None): # pylint: disable=too-many-branches
  """Deploy Kubeflow using kfctl go binary."""
  # username and password are passed as env vars and won't appear in the logs
  #
  # We need to edit and rewrite the config file to the app dir because
  # kfctl uses the path of the config file as the app dir.s
  logging.warning("Loading configs %s.", args.kfctl_config)

  if args.kfctl_config.startswith("http"):
    response = requests.get(args.kfctl_config)
    raw_config = response.content
  else:
    with open(args.kfctl_config) as hf:
      raw_config = hf.read()

  config_spec = yaml.load(raw_config)

  # We need to specify a valid email because
  #  1. We need to create appropriate RBAC rules to allow the current user
  #     to create the required K8s resources.
  #  2. Setting the IAM policy will fail if the email is invalid.
  email = args.email

  if not email:
    logging.info("email not set trying to get default from gcloud")
    email = util.run(["gcloud", "auth", "list",
                      "--filter", "status:ACTIVE", "--format", "value(account)"])

  if not email:
    raise ValueError("Could not determine GCP account being used.")

  kfdef_version = config_spec["apiVersion"].strip().lower()

  if kfdef_version == KFDEF_V1ALPHA1:
    config_spec = build_v06_spec(config_spec, args.project, email, args.zone,
                                 args.setup_project)
  else:
    config_spec = build_v07_spec(config_spec, args.project, email, args.zone,
                                 args.setup_project)

  config_spec["spec"] = util.filter_spartakus(config_spec["spec"])

  # Remove name because we will auto infer from directory.
  if "name" in config_spec["metadata"]:
    logging.info("Deleting name in kfdef spec.")
    del config_spec["metadata"]["name"]

  app_name = os.path.basename(app_dir)
  if not "labels" in config_spec["metadata"]:
    config_spec["metadata"]["labels"] = {}

  if labels:
    config_spec["metadata"]["labels"].update(labels)

  logging.info("KFDefSpec:\n%s", yaml.safe_dump(config_spec))

  if kfdef_version == KFDEF_V1ALPHA1:
    logging.info("Deploying using v06 syntax")

    logging.info("Checking if deployment %s already exists in project %s",
                 args.project, app_name)

    if check_if_kfapp_exists(args.project, app_name, args.zone):
      # With v0.6 kfctl can't successfully run apply a 2nd time so if
      # the deployment already exists we can't redeploy.
      logging.info("Deployment %s already exists in project %s; not "
                   "redeploying", args.project, app_name)
      return

    with tempfile.NamedTemporaryFile(prefix="tmpkf_config", suffix=".yaml",
                                     delete=False) as hf:
      config_file = hf.name
      logging.info("Writing file %s", config_file)
      yaml.dump(config_spec, hf)

    util.run([kfctl_path, "init", app_dir, "-V", "--config=" + config_file],
             env=env)

    util.run([kfctl_path, "generate", "-V", "all"], env=env, cwd=app_dir)

    util.run([kfctl_path, "apply", "-V", "all"], env=env, cwd=app_dir)
  else:
    logging.info("Deploying using v07 syntax")

    if not os.path.exists(app_dir):
      logging.info("Creating app dir %s", app_dir)
      os.makedirs(app_dir)

    config_file = os.path.join(app_dir, "kf_config.yaml")
    with open(config_file, "w") as hf:
      logging.info("Writing file %s", config_file)
      yaml.dump(config_spec, hf)

    util.run([kfctl_path, "apply", "-V", "-f", config_file], env=env)

  # We will hit lets encrypt rate limiting with the managed certificates
  # So create a self signed certificate and update the ingress to use it.
  if args.use_self_cert:
    logging.info("Configuring self signed certificate")

    util.load_kube_credentials()

    api_client = k8s_client.ApiClient()
    ingress_namespace = "istio-system"
    ingress_name = "envoy-ingress"
    tls_endpoint = "{0}.endpoints.{1}.cloud.goog".format(app_name, args.project)
    logging.info("Configuring self signed cert for %s", tls_endpoint)
    util.use_self_signed_for_ingress(ingress_namespace, ingress_name,
                                     tls_endpoint, api_client)
Exemplo n.º 7
0
    def delete(
            self,
            project_base_name,
            start_index,
            end_index,
            kfname,  # pylint: disable=too-many-arguments
            job_file=None,
            output_dir=None,
            namespace=DEFAULT_NAMESPACE):
        """Fire off a bunch of K8s jobs to delete many Kubeflow instances.

    Args:
      project_base_name: The base name for the projects. Should end
        with "-"
      start_index: The start index
      end_index: The index non inclusive
      kfname: The name of the of the kubeflow app
      job_file: The path to the YAML file containing a K8s Job that serves
        as the template for the jobs to be launched.

      output_dir: Directory to write the job specs to.
    """
        util.load_kube_credentials()

        # Create an API client object to talk to the K8s master.
        api_client = k8s_client.ApiClient()
        batch_api = k8s_client.BatchV1Api(api_client)

        if not job_file:
            job_file = self._default_job_file()

        if not os.path.exists(job_file):
            raise ValueError("job file {0} does not exist".format(job_file))

        logging.info("Job file: %s", job_file)

        if not output_dir:
            output_dir = tempfile.mkdtemp()

        logging.info("output_dir: %s", output_dir)

        # Generate a common label for all the jobs. This way we can potentially
        # wait for all the jobs based on the label.
        group_label = (datetime.datetime.now().strftime("%Y%m%d-%H%M%S-") +
                       uuid.uuid4().hex[0:4])
        for index in range(start_index, end_index):
            project = "{0}{1}".format(project_base_name, index)

            logging.info("Processing project=%s", project)

            job = self._create_delete_job_spec(job_file, group_label, project,
                                               kfname, namespace)
            output_file = os.path.join(output_dir,
                                       "delete-{0}.yaml".format(project))
            logging.info("Writing job spec to %s", output_file)
            with open(output_file, "w") as hf:
                yaml.safe_dump(job, hf)

            # submit the job
            logging.info("Creating job")
            actual_job = batch_api.create_namespaced_job(
                job["metadata"]["namespace"], job)

            logging.info("Created job %s.%s:\n%s",
                         actual_job.metadata.namespace,
                         actual_job.metadata.name,
                         yaml.safe_dump(actual_job.to_dict()))

        self.wait_for_jobs(namespace, "group=" + group_label)