Пример #1
0
def use_cluster_credentials(cluster_name):
    """
    Quickly gain command-line access to a cluster by updating the current
    kubeconfig file to include the deployer's access credentials for the named
    cluster and mark it as the cluster to work against by default.

    This function is to be used with the `use-cluster-credentials` CLI
    command only - it is not used by the rest of the deployer codebase.
    """
    validate_cluster_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    # Cluster.auth() method has the context manager decorator so cannot call
    # it like a normal function
    with cluster.auth():
        # This command will spawn a new shell with all the env vars (including
        # KUBECONFIG) inherited, and once you quit that shell the python program
        # will resume as usual.
        # TODO: Figure out how to change the PS1 env var of the spawned shell
        # to change the prompt to f"cluster-{cluster.spec['name']}". This will
        # make it visually clear that the user is now operating in a different
        # shell.
        subprocess.check_call([os.environ["SHELL"], "-l"])
Пример #2
0
def deploy(cluster_name, hub_name, config_path, dask_gateway_version):
    """
    Deploy one or more hubs in a given cluster
    """
    validate_cluster_config(cluster_name)
    validate_hub_config(cluster_name, hub_name)
    assert_single_auth_method_enabled(cluster_name, hub_name)

    with get_decrypted_file(config_path) as decrypted_file_path:
        with open(decrypted_file_path) as f:
            config = yaml.load(f)

    # Most of our hubs use Auth0 for Authentication. This lets us programmatically
    # determine what auth provider each hub uses - GitHub, Google, etc. Without
    # this, we'd have to manually generate credentials for each hub - and we
    # don't want to do that. Auth0 domains are tied to a account, and
    # this is our auth0 domain for the paid account that 2i2c has.
    auth0 = config["auth0"]

    k = KeyProvider(auth0["domain"], auth0["client_id"],
                    auth0["client_secret"])

    # Each hub needs a unique proxy.secretToken. However, we don't want
    # to manually generate & save it. We also don't want it to change with
    # each deploy - that causes a pod restart with downtime. So instead,
    # we generate it based on a single secret key (`PROXY_SECRET_KEY`)
    # combined with the name of each hub. This way, we get unique,
    # cryptographically secure proxy.secretTokens without having to
    # keep much state. We can rotate them by changing `PROXY_SECRET_KEY`.
    # However, if `PROXY_SECRET_KEY` leaks, that means all the hub's
    # proxy.secretTokens have leaked. So let's be careful with that!
    SECRET_KEY = bytes.fromhex(config["secret_key"])

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    with cluster.auth():
        hubs = cluster.hubs
        if hub_name:
            hub = next((hub for hub in hubs if hub.spec["name"] == hub_name),
                       None)
            print_colour(f"Deploying hub {hub.spec['name']}...")
            hub.deploy(k, SECRET_KEY, dask_gateway_version)
        else:
            for i, hub in enumerate(hubs):
                print_colour(
                    f"{i+1} / {len(hubs)}: Deploying hub {hub.spec['name']}..."
                )
                hub.deploy(k, SECRET_KEY, dask_gateway_version)
Пример #3
0
def deploy_support(cluster_name, cert_manager_version):
    """Deploy support components to a cluster

    Args:
        cluster_name (str): The name of the cluster to deploy support components to
        cert_manager_version (str): The version of cert-manager to deploy to the
            cluster, in the form vX.Y.Z. where X.Y.Z is valid SemVer.
    """
    validate_cluster_config(cluster_name)
    validate_support_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    if cluster.support:
        with cluster.auth():
            cluster.deploy_support(cert_manager_version=cert_manager_version)
Пример #4
0
def main():
    argparser = argparse.ArgumentParser(
        description="""A command line tool to perform various functions related
        to deploying and maintaining a JupyterHub running on kubernetes
        infrastructure
        """)
    subparsers = argparser.add_subparsers(required=True,
                                          dest="action",
                                          help="Available subcommands")

    # === Arguments and options shared across subcommands go here ===#
    # NOTE: If we do not add a base_parser here with the add_help=False
    #       option, then we see a "conflicting option strings" error when
    #       running `python deployer --help`
    base_parser = argparse.ArgumentParser(add_help=False)
    base_parser.add_argument(
        "cluster_name",
        type=str,
        help="The name of the cluster to perform actions on",
    )

    # === Add new subcommands in this section ===#
    # Deploy subcommand
    deploy_parser = subparsers.add_parser(
        "deploy",
        parents=[base_parser],
        help="Install/upgrade the helm charts of JupyterHubs on a cluster",
    )
    deploy_parser.add_argument(
        "hub_name",
        nargs="?",
        help="The hub, or list of hubs, to install/upgrade the helm chart for",
    )
    deploy_parser.add_argument(
        "--config-path",
        help="File to read secret deployment configuration from",
        # This filepath is relative to the PROJECT ROOT
        default="shared/deployer/enc-auth-providers-credentials.secret.yaml",
    )
    deploy_parser.add_argument(
        "--dask-gateway-version",
        type=str,
        # This version must match what is listed in daskhub's Chart.yaml file
        # https://github.com/2i2c-org/infrastructure/blob/HEAD/helm-charts/daskhub/Chart.yaml#L14
        default="2022.6.1",
        help=
        "For daskhubs, the version of dask-gateway to install for the CRDs. Default: 2022.6.1",
    )

    # Validate subcommand
    validate_parser = subparsers.add_parser(
        "validate",
        parents=[base_parser],
        help=
        "Validate the cluster.yaml configuration itself, as well as the provided non-encrypted helm chart values files for each hub or the specified hub.",
    )
    validate_parser.add_argument(
        "hub_name",
        nargs="?",
        help=
        "The hub, or list of hubs, to validate provided non-encrypted helm chart values for.",
    )

    # deploy-support subcommand
    deploy_support_parser = subparsers.add_parser(
        "deploy-support",
        parents=[base_parser],
        help="Install/upgrade the support helm release on a given cluster",
    )
    deploy_support_parser.add_argument(
        "--cert-manager-version",
        type=str,
        default="v1.3.1",
        help=
        "The version of cert-manager to deploy in the form vX.Y.Z. Defaults to v1.3.1",
    )

    # deploy-grafana-dashboards subcommand
    deploy_grafana_dashboards_parser = subparsers.add_parser(
        "deploy-grafana-dashboards",
        parents=[base_parser],
        help=
        "Deploy grafana dashboards to a cluster for monitoring JupyterHubs. deploy-support must be run first!",
    )

    # use-cluster-credentials subcommand
    use_cluster_credentials_parser = subparsers.add_parser(
        "use-cluster-credentials",
        parents=[base_parser],
        help=
        "Modify the current kubeconfig with the deployer's access credentials for the named cluster",
    )

    # generate-helm-upgrade-jobs subcommand
    # This subparser does not depend on the base parser.
    generate_helm_upgrade_jobs_parser = subparsers.add_parser(
        "generate-helm-upgrade-jobs",
        help=
        "Generate a set of matrix jobs to perform a helm upgrade in parallel across clusters and hubs. Emit JSON to stdout that can be read by the strategy.matrix field of a GitHub Actions workflow.",
    )
    generate_helm_upgrade_jobs_parser.add_argument(
        "filepaths",
        nargs="?",
        type=_converted_string_to_list,
        help=
        "A singular or space-delimited list of added or modified filepaths in the repo",
    )

    # run-hub-health-check subcommand
    run_hub_health_check_parser = subparsers.add_parser(
        "run-hub-health-check",
        parents=[base_parser],
        help=
        "Run a health check against a given hub deployed on a given cluster",
    )
    run_hub_health_check_parser.add_argument(
        "hub_name",
        help="The hub to run health checks against.",
    )
    run_hub_health_check_parser.add_argument(
        "--check-dask-scaling",
        action="store_true",
        help="For daskhubs, optionally check that dask workers can be scaled",
    )

    # exec-homes subcommand
    homes_parser = subparsers.add_parser(
        "exec-homes-shell",
        parents=[base_parser],
        help="Pop a shell with home directories of given hub mounted",
    )
    homes_parser.add_argument(
        "hub_name",
        help="The deployed hub whose home directories are to be examined",
    )
    # === End section ===#

    args = argparser.parse_args()

    if args.action == "deploy":
        deploy(
            args.cluster_name,
            args.hub_name,
            args.config_path,
            dask_gateway_version=args.dask_gateway_version,
        )
    elif args.action == "exec-homes-shell":
        exec_homes_shell(args.cluster_name, args.hub_name)
    elif args.action == "validate":
        validate_cluster_config(args.cluster_name)
        validate_support_config(args.cluster_name)
        validate_hub_config(args.cluster_name, args.hub_name)
    elif args.action == "deploy-support":
        deploy_support(args.cluster_name,
                       cert_manager_version=args.cert_manager_version)
    elif args.action == "deploy-grafana-dashboards":
        deploy_grafana_dashboards(args.cluster_name)
    elif args.action == "use-cluster-credentials":
        use_cluster_credentials(args.cluster_name)
    elif args.action == "generate-helm-upgrade-jobs":
        generate_helm_upgrade_jobs(args.filepaths)
    elif args.action == "run-hub-health-check":
        run_hub_health_check(
            args.cluster_name,
            args.hub_name,
            check_dask_scaling=args.check_dask_scaling,
        )
Пример #5
0
def main():
    argparser = argparse.ArgumentParser(
        description="""A command line tool to perform various functions related
        to deploying and maintaining a JupyterHub running on kubernetes
        infrastructure
        """)
    subparsers = argparser.add_subparsers(required=True,
                                          dest="action",
                                          help="Available subcommands")

    # === Arguments and options shared across subcommands go here ===#
    # NOTE: If we do not add a base_parser here with the add_help=False
    #       option, then we see a "conflicting option strings" error when
    #       running `python deployer --help`
    base_parser = argparse.ArgumentParser(add_help=False)
    base_parser.add_argument(
        "cluster_name",
        type=str,
        help="The name of the cluster to perform actions on",
    )

    # === Add new subcommands in this section ===#
    # Deploy subcommand
    deploy_parser = subparsers.add_parser(
        "deploy",
        parents=[base_parser],
        help="Install/upgrade the helm charts of JupyterHubs on a cluster",
    )
    deploy_parser.add_argument(
        "hub_name",
        nargs="?",
        help="The hub, or list of hubs, to install/upgrade the helm chart for",
    )
    deploy_parser.add_argument("--skip-hub-health-test",
                               action="store_true",
                               help="Bypass the hub health test")
    deploy_parser.add_argument(
        "--config-path",
        help="File to read secret deployment configuration from",
        # This filepath is relative to the PROJECT ROOT
        default="shared/deployer/enc-auth-providers-credentials.secret.yaml",
    )

    # Validate subcommand
    validate_parser = subparsers.add_parser(
        "validate",
        parents=[base_parser],
        help=
        "Validate the cluster.yaml configuration itself, as well as the provided non-encrypted helm chart values files for each hub or the specified hub.",
    )
    validate_parser.add_argument(
        "hub_name",
        nargs="?",
        help=
        "The hub, or list of hubs, to validate provided non-encrypted helm chart values for.",
    )

    # deploy-support subcommand
    deploy_support_parser = subparsers.add_parser(
        "deploy-support",
        parents=[base_parser],
        help="Install/upgrade the support helm release on a given cluster",
    )

    # deploy-grafana-dashboards subcommand
    deploy_grafana_dashboards_parser = subparsers.add_parser(
        "deploy-grafana-dashboards",
        parents=[base_parser],
        help=
        "Deploy grafana dashboards to a cluster for monitoring JupyterHubs. deploy-support must be run first!",
    )

    # use-cluster-credentials subcommand
    use_cluster_credentials_parser = subparsers.add_parser(
        "use-cluster-credentials",
        parents=[base_parser],
        help=
        "Modify the current kubeconfig with the deployer's access credentials for the named cluster",
    )
    # === End section ===#

    args = argparser.parse_args()

    if args.action == "deploy":
        deploy(
            args.cluster_name,
            args.hub_name,
            args.skip_hub_health_test,
            args.config_path,
        )
    elif args.action == "validate":
        validate_cluster_config(args.cluster_name)
        validate_support_config(args.cluster_name)
        validate_hub_config(args.cluster_name, args.hub_name)
    elif args.action == "deploy-support":
        deploy_support(args.cluster_name)
    elif args.action == "deploy-grafana-dashboards":
        deploy_grafana_dashboards(args.cluster_name)
    elif args.action == "use-cluster-credentials":
        use_cluster_credentials(args.cluster_name)
Пример #6
0
def deploy_grafana_dashboards(cluster_name):
    """
    Deploy grafana dashboards to a cluster that provide useful metrics
    for operating a JupyterHub

    Grafana dashboards and deployment mechanism in question are maintained in
    this repo: https://github.com/jupyterhub/grafana-dashboards
    """
    validate_cluster_config(cluster_name)
    validate_support_config(cluster_name)

    config_file_path = find_absolute_path_to_cluster_file(cluster_name)
    with open(config_file_path) as f:
        cluster = Cluster(yaml.load(f), config_file_path.parent)

    # If grafana support chart is not deployed, then there's nothing to do
    if not cluster.support:
        print_colour(
            "Support chart has not been deployed. Skipping Grafana dashboards deployment..."
        )
        return

    grafana_token_file = (
        config_file_path.parent).joinpath("enc-grafana-token.secret.yaml")

    # Read the cluster specific secret grafana token file
    with get_decrypted_file(grafana_token_file) as decrypted_file_path:
        with open(decrypted_file_path) as f:
            config = yaml.load(f)

    # Check GRAFANA_TOKEN exists in the secret config file before continuing
    if "grafana_token" not in config.keys():
        raise ValueError(
            f"`grafana_token` not provided in secret file! Please add it and try again: {grafana_token_file}"
        )

    # FIXME: We assume grafana_url and uses_tls config will be defined in the first
    #        file listed under support.helm_chart_values_files.
    support_values_file = cluster.support.get("helm_chart_values_files", [])[0]
    with open(config_file_path.parent.joinpath(support_values_file)) as f:
        support_values_config = yaml.load(f)

    # Get the url where grafana is running from the support values file
    grafana_url = (support_values_config.get("grafana",
                                             {}).get("ingress",
                                                     {}).get("hosts", {}))
    uses_tls = (support_values_config.get("grafana",
                                          {}).get("ingress",
                                                  {}).get("tls", {}))

    if not grafana_url:
        print_colour(
            "Couldn't find `config.grafana.ingress.hosts`. Skipping Grafana dashboards deployment..."
        )
        return

    grafana_url = (f"https://{grafana_url[0]}"
                   if uses_tls else f"http://{grafana_url[0]}")

    # Use the jupyterhub/grafana-dashboards deployer to deploy the dashboards to this cluster's grafana
    print_colour("Cloning jupyterhub/grafana-dashboards...")

    dashboards_dir = "grafana_dashboards"

    subprocess.check_call([
        "git",
        "clone",
        "https://github.com/jupyterhub/grafana-dashboards",
        dashboards_dir,
    ])

    # We need the existing env too for the deployer to be able to find jssonnet and grafonnet
    deploy_env = os.environ.copy()
    deploy_env.update({"GRAFANA_TOKEN": config["grafana_token"]})

    try:
        print_colour(f"Deploying grafana dashboards to {cluster_name}...")
        subprocess.check_call(["./deploy.py", grafana_url],
                              env=deploy_env,
                              cwd=dashboards_dir)

        print_colour(f"Done! Dashboards deployed to {grafana_url}.")
    finally:
        # Delete the directory where we cloned the repo.
        # The deployer cannot call jsonnet to deploy the dashboards if using a temp directory here.
        # Might be because opening more than once of a temp file is tried
        # (https://docs.python.org/3.8/library/tempfile.html#tempfile.NamedTemporaryFile)
        shutil.rmtree(dashboards_dir)