Пример #1
0
 def load_manifest_from_file(filename: str, type: Type[T]) -> T:
     _logger.debug("Loading manifest file (%s)", filename)
     filepath = os.path.abspath(filename)
     _logger.debug("filepath: %s", filepath)
     filedir: str = os.path.dirname(filepath)
     utils.print_dir(dir=filedir)
     YamlIncludeConstructor.add_to_loader_class(
         loader_class=yaml.SafeLoader, base_dir=filedir)
     _add_ssm_param_injector()
     _add_env_var_injector()
     with open(filepath, "r") as f:
         raw: Dict[str, Any] = cast(Dict[str, Any], yaml.safe_load(f))
     _logger.debug("raw: %s", raw)
     if type is Manifest:
         raw["SsmParameterName"] = f"/orbit/{raw['Name']}/manifest"
         manifest: T = cast(
             T,
             Manifest.Schema().load(data=raw,
                                    many=False,
                                    partial=False,
                                    unknown=EXCLUDE))
     elif type is FoundationManifest:
         raw["SsmParameterName"] = f"/orbit-f/{raw['Name']}/manifest"
         manifest = cast(
             T,
             FoundationManifest.Schema().load(data=raw,
                                              many=False,
                                              partial=False,
                                              unknown=EXCLUDE))
     else:
         raise ValueError("Unknown 'manifest' Type")
     ManifestSerDe.dump_manifest_to_ssm(manifest=manifest)
     return manifest
Пример #2
0
def destroy_kubeflow(context: Context) -> None:
    stack_name: str = f"orbit-{context.name}"
    final_eks_stack_name: str = f"eksctl-{stack_name}-cluster"
    _logger.debug("EKSCTL stack name: %s", final_eks_stack_name)

    if cfn.does_stack_exist(stack_name=final_eks_stack_name):
        kubectl.write_kubeconfig(context=context)

        for line in sh.run_iterating("kubectl get namespace kubeflow"):
            if '"kubeflow" not found' in line:
                return

        cluster_name = f"orbit-{context.name}"
        output_path = os.path.join(".orbit.out", context.name, "kubeflow",
                                   cluster_name)
        gen_kubeflow_config(context, output_path, cluster_name)

        _logger.debug("Destroying Kubeflow")
        output_path = os.path.abspath(output_path)
        _logger.debug(f"kubeflow config dir: {output_path}")
        utils.print_dir(output_path)

        timeouts = 0
        while timeouts < 3:
            try:
                _logger.info("Deleting kubeflow resources")
                sh.run("./delete_kf.sh", cwd=output_path)
            except FailedShellCommand:
                _logger.info(
                    "The command returned a non-zero exit code. Retrying to delete resources"
                )
                timeouts += 1
                time.sleep(300)
Пример #3
0
def update_docker_file(context: "Context", dir: str) -> None:
    _logger.debug("Docker directory before building: %s", os.path.abspath(dir))
    utils.print_dir(dir)
    docker_file = os.path.join(dir, "Dockerfile")
    if os.path.exists(docker_file):
        _logger.info("Building DockerFile %s", docker_file)
        tag = context.images.jupyter_user.version
        jupyter_user_base = (
            f"{context.account_id}.dkr.ecr.{context.region}.amazonaws.com/orbit-{context.name}/jupyter-user:{tag}"
            if context.images.jupyter_user.get_source(
                account_id=context.account_id, region=context.region) == "code"
            else f"{context.images.jupyter_user.repository}:{tag}")

        with open(docker_file, "r") as file:
            content: str = file.read()
        content = utils.resolve_parameters(
            content,
            dict(
                region=context.region,
                account=context.account_id,
                env=context.name,
                jupyter_user_base=jupyter_user_base,
            ),
        )
        with open(docker_file, "w") as file:
            file.write(content)
def remote_cli(command: str, args: Tuple[str]) -> None:
    """Run command remotely on CodeBuild"""
    enable_debug(format=DEBUG_LOGGING_FORMAT_REMOTE)
    from aws_orbit.remote_files import REMOTE_FUNC_TYPE, RemoteCommands

    _logger.debug("Remote bundle structure:")
    print_dir(os.getcwd(), exclude=["__pycache__", "cdk", ".venv", ".mypy_cache"])
    remote_func: REMOTE_FUNC_TYPE = getattr(RemoteCommands, command)
    remote_func(args)
def deploy(
    plugin_id: str,
    context: "Context",
    team_context: "TeamContext",
    parameters: Dict[str, Any],
) -> None:
    _logger.debug("Team Env name: %s | Team name: %s", context.name,
                  team_context.name)
    plugin_id = plugin_id.replace("_", "-")
    _logger.debug("plugin_id: %s", plugin_id)
    release_name = f"{team_context.name}-{plugin_id}"

    _logger.info("Checking Chart %s is installed...", release_name)
    fresh_install = True
    if helm.is_exists_chart_release(release_name, team_context.name):
        _logger.info(
            "Chart %s already installed, removing to begin new install",
            release_name)

    vars: Dict[str, Optional[str]] = dict(
        team=team_context.name,
        region=context.region,
        account_id=context.account_id,
        env_name=context.name,
        plugin_id=plugin_id,
    )

    chart_path = helm.create_team_charts_copy(team_context=team_context,
                                              path=CHART_PATHS,
                                              target_path=plugin_id)
    _logger.debug("package dir")
    utils.print_dir(CHART_PATHS)
    _logger.debug("copy chart dir")
    utils.print_dir(chart_path)

    repo_location = team_context.team_helm_repository
    repo = team_context.name
    helm.add_repo(repo=repo, repo_location=repo_location)
    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo, chart_path=chart_path, values=vars)

    _logger.info("Chart %s installing ", release_name)
    helm.install_chart(
        repo=repo,
        namespace=team_context.name,
        name=release_name,
        chart_name=chart_name,
        chart_version=chart_version,
    )

    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo, chart_path=chart_path, values=vars)
    _logger.info(
        f"Sagemaker Operator Helm Chart {chart_name}@{chart_version} installed for {team_context.name} at {chart_package}"
    )
Пример #6
0
def deploy(plugin_id: str, context: "Context", team_context: "TeamContext",
           parameters: Dict[str, Any]) -> None:
    _logger.debug("Team Env name: %s | Team name: %s", context.name,
                  team_context.name)
    plugin_id = plugin_id.replace("_", "-")
    _logger.debug("plugin_id: %s", plugin_id)
    chart_path = helm.create_team_charts_copy(team_context=team_context,
                                              path=CHART_PATH)
    _logger.debug("package dir")
    utils.print_dir(CHART_PATH)
    _logger.debug("copy chart dir")
    utils.print_dir(chart_path)
    vars: Dict[str, Optional[str]] = dict(
        team=team_context.name,
        region=context.region,
        account_id=context.account_id,
        env_name=context.name,
        tag=context.images.jupyter_hub.version,
        restart_policy=parameters["restartPolicy"]
        if "restartPolicy" in parameters else "Never",
        plugin_id=plugin_id,
        toolkit_s3_bucket=context.toolkit.s3_bucket,
        image_pull_policy="Always"
        if aws_orbit.__version__.endswith(".dev0") else "IfNotPresent",
    )

    if "script" in parameters:
        script_body = parameters["script"]
    else:
        raise Exception(f"Plugin {plugin_id} must define parameter 'script'")
    script_file = os.path.join(chart_path, "script.txt")

    script_body = utils.resolve_parameters(script_body, vars)
    with open(script_file, "w") as file:
        file.write(script_body)

    repo_location = helm.init_team_repo(context=context,
                                        team_context=team_context)
    repo = team_context.name
    _logger.debug(script_body)
    helm.add_repo(repo=repo, repo_location=repo_location)
    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo, chart_path=chart_path, values=vars)
    helm.install_chart(
        repo=repo,
        namespace=team_context.name,
        name=f"{team_context.name}-{plugin_id}",
        chart_name=chart_name,
        chart_version=chart_version,
    )
Пример #7
0
def destroy_teams(context: "Context") -> None:
    eks_stack_name: str = f"eksctl-orbit-{context.name}-cluster"
    _logger.debug("EKSCTL stack name: %s", eks_stack_name)
    if cfn.does_stack_exist(stack_name=eks_stack_name):
        sh.run(
            f"eksctl utils write-kubeconfig --cluster orbit-{context.name} --set-kubeconfig-context"
        )
        k8s_context = get_k8s_context(context=context)
        _logger.debug("kubectl k8s_context: %s", k8s_context)
        _logger.debug("Attempting kubectl delete")
        output_path = _generate_teams_manifest(context=context)
        utils.print_dir(dir=output_path)
        try:
            sh.run(f"kubectl delete -f {output_path} --grace-period=0 --force "
                   f"--ignore-not-found --wait=false --context {k8s_context}")
        except exceptions.FailedShellCommand as ex:
            _logger.debug("Skipping: %s", ex)
            pass  # Let's leave for eksctl, it will destroy everything anyway...
Пример #8
0
def deploy_kubeflow(context: Context) -> None:
    stack_name: str = f"orbit-{context.name}"
    final_eks_stack_name: str = f"eksctl-{stack_name}-cluster"
    _logger.debug("EKSCTL stack name: %s", final_eks_stack_name)

    if cfn.does_stack_exist(stack_name=final_eks_stack_name):
        cluster_name = f"orbit-{context.name}"

        output_path = os.path.join(".orbit.out", context.name, "kubeflow",
                                   cluster_name)
        gen_kubeflow_config(context, output_path, cluster_name)

        _logger.debug("Deploying Kubeflow")
        output_path = os.path.abspath(output_path)
        _logger.debug(f"kubeflow config dir: {output_path}")
        utils.print_dir(output_path)
        sh.run("./apply_kf.sh", cwd=output_path)
        time.sleep(120)
        sh.run("kubectl delete ds -n kubeflow nvidia-device-plugin-daemonset")
Пример #9
0
def update_docker_file(account_id: str, region: str, env: str, tag: str,
                       dir: str) -> None:
    _logger.debug("Docker directory before building: %s", os.path.abspath(dir))
    utils.print_dir(dir)
    docker_file = os.path.join(dir, "Dockerfile")
    if os.path.exists(docker_file):
        _logger.info("Building DockerFile %s", docker_file)
        jupyter_user_base = f"{account_id}.dkr.ecr.{region}.amazonaws.com/orbit-{env}/jupyter-user:{tag}"
        _logger.debug(
            f"update_docker_file: jupyter_user_base =  {jupyter_user_base}")
        with open(docker_file, "r") as file:
            content: str = file.read()
        content = utils.resolve_parameters(
            content,
            dict(
                region=region,
                account=account_id,
                env=env,
                jupyter_user_base=jupyter_user_base,
            ),
        )
        with open(docker_file, "w") as file:
            file.write(content)
def deploy(
    plugin_id: str,
    context: "Context",
    team_context: "TeamContext",
    parameters: Dict[str, Any],
) -> None:
    _logger.debug("Team Env name: %s | Team name: %s", context.name,
                  team_context.name)
    plugin_id = plugin_id.replace("_", "-")
    _logger.debug("plugin_id: %s", plugin_id)
    release_name = f"{team_context.name}-{plugin_id}"
    _logger.info("Checking Chart %s is installed...", release_name)

    fs_name = f"lustre-{team_context.name}-fs-{plugin_id}"

    vars: Dict[str, Optional[str]] = dict(
        team=team_context.name,
        region=context.region,
        account_id=context.account_id,
        env_name=context.name,
        plugin_id=plugin_id,
        deploymentType="SCRATCH_2",
        sg=team_context.team_security_group_id,
        subnet=context.networking.data.nodes_subnets[0],
        s3importpath=
        f"s3://{team_context.scratch_bucket}/{team_context.name}/lustre",
        s3exportpath=
        f"s3://{team_context.scratch_bucket}/{team_context.name}/lustre",
        storage=parameters["storage"] if "storage" in parameters else "1200Gi",
        folder=parameters["folder"] if "folder" in parameters else "data",
        k8s_utilities_image=
        f"{context.images.k8s_utilities.repository}:{context.images.k8s_utilities.version}",
        fs_name=fs_name,
    )

    if not helm.is_exists_chart_release(release_name, team_context.name):
        _logger.info("Chart %s already installed, skipping installation",
                     release_name)

        ec2.authorize_security_group_ingress(
            group_id=cast(str, team_context.team_security_group_id),
            ip_permissions=[
                IpPermission(
                    from_port=988,
                    to_port=988,
                    ip_protocol="tcp",
                    user_id_group_pairs=[
                        UserIdGroupPair(
                            description="All from Cluster",
                            group_id=cast(str, context.cluster_sg_id),
                        )
                    ],
                )
            ],
        )

        chart_path = helm.create_team_charts_copy(team_context=team_context,
                                                  path=TEAM_CHARTS_PATH,
                                                  target_path=plugin_id)
        _logger.debug("package dir")
        utils.print_dir(TEAM_CHARTS_PATH)
        _logger.debug("copy chart dir")
        utils.print_dir(chart_path)

        if not team_context.team_helm_repository:
            raise Exception("Missing team helm repository")

        repo_location = team_context.team_helm_repository

        repo = team_context.name
        helm.add_repo(repo=repo, repo_location=repo_location)
        chart_name, chart_version, chart_package = helm.package_chart(
            repo=repo,
            chart_path=os.path.join(chart_path, "fsx-storageclass"),
            values=vars)
        helm.install_chart_no_upgrade(
            repo=repo,
            namespace=team_context.name,
            name=release_name,
            chart_name=chart_name,
            chart_version=chart_version,
        )

    get_user_pv(fs_name, plugin_id, context, team_context, vars)

    # install this package at the user helm repository such that its installed on every user space
    chart_path = helm.create_team_charts_copy(team_context=team_context,
                                              path=USER_CHARTS_PATH,
                                              target_path=plugin_id)

    if not team_context.user_helm_repository:
        raise Exception("Missing user helm repository")
    user_location = team_context.user_helm_repository

    user_repo = team_context.name + "--user"
    helm.add_repo(repo=user_repo, repo_location=user_location)

    chart_name, chart_version, chart_package = helm.package_chart(
        repo=user_repo,
        chart_path=os.path.join(chart_path, "fsx-filesystem"),
        values=vars)
    _logger.info(
        f"Lustre Helm Chart {chart_name}@{chart_version} installed for {team_context.name} at {chart_package}"
    )
Пример #11
0
def deploy(plugin_id: str, context: "Context", team_context: "TeamContext",
           parameters: Dict[str, Any]) -> None:
    _logger.debug("Team Env name: %s | Team name: %s", context.name,
                  team_context.name)
    plugin_id = plugin_id.replace("_", "-")
    _logger.debug("plugin_id: %s", plugin_id)
    release_name = f"{team_context.name}-{plugin_id}"
    _logger.info("Checking Chart %s is installed...", release_name)
    if helm.is_exists_chart_release(release_name, team_context.name):
        _logger.info("Chart %s already installed, skipping installation",
                     release_name)
        return
    try:
        sh.run(
            f"kubectl delete sc fsx-lustre-{team_context.name}-fast-fs-lustre")
    except Exception as e:
        _logger.error(
            f"Deleting prior sc 'fsx-lustre-{team_context.name}-fast-fs-lustre' failed with:%s",
            str(e))

    vars: Dict[str, Optional[str]] = dict(
        team=team_context.name,
        region=context.region,
        account_id=context.account_id,
        env_name=context.name,
        plugin_id=plugin_id,
        deploymentType="SCRATCH_2",
        sg=team_context.team_security_group_id,
        subnet=context.networking.data.nodes_subnets[0],
        s3importpath=
        f"s3://{team_context.scratch_bucket}/{team_context.name}/lustre",
        s3exportpath=
        f"s3://{team_context.scratch_bucket}/{team_context.name}/lustre",
    )

    ec2.authorize_security_group_ingress(
        group_id=cast(str, team_context.team_security_group_id),
        ip_permissions=[
            IpPermission(
                from_port=988,
                to_port=988,
                ip_protocol="tcp",
                user_id_group_pairs=[
                    UserIdGroupPair(description="All from Cluster",
                                    group_id=cast(str, context.cluster_sg_id))
                ],
            )
        ],
    )

    chart_path = helm.create_team_charts_copy(team_context=team_context,
                                              path=CHARTS_PATH)
    _logger.debug("package dir")
    utils.print_dir(CHARTS_PATH)
    _logger.debug("copy chart dir")
    utils.print_dir(chart_path)

    repo_location = helm.init_team_repo(context=context,
                                        team_context=team_context)
    repo = team_context.name
    helm.add_repo(repo=repo, repo_location=repo_location)
    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo,
        chart_path=os.path.join(chart_path, "fsx_storageclass"),
        values=vars)
    helm.install_chart_no_upgrade(
        repo=repo,
        namespace=team_context.name,
        name=release_name,
        chart_name=chart_name,
        chart_version=chart_version,
    )

    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo,
        chart_path=os.path.join(chart_path, "fsx_filesystem"),
        values=vars)
    _logger.info(
        f"Lustre Helm Chart {chart_name}@{chart_version} installed for {team_context.name} at {chart_package}"
    )
def helm_package(plugin_id: str, context: "Context",
                 team_context: "TeamContext",
                 parameters: Dict[str, Any]) -> Tuple[str, str, str]:
    chart_path = helm.create_team_charts_copy(team_context=team_context,
                                              path=CHART_PATH,
                                              target_path=plugin_id)
    _logger.debug("copy chart dir")
    utils.print_dir(chart_path)
    if "image" not in parameters:
        image = f"{context.images.jupyter_user.repository}:{context.images.jupyter_user.version}"
    elif "aws-orbit-workbench/utility-data" in parameters["image"]:
        image = f"{context.images.utility_data.repository}:{context.images.utility_data.version}"
    else:
        image = parameters["image"]

    _logger.debug(f"For plugin {plugin_id} using image: {image}")

    vars: Dict[str, Optional[str]] = dict(
        team=team_context.name,
        region=context.region,
        account_id=context.account_id,
        env_name=context.name,
        tag=parameters["tag"]
        if "tag" in parameters else context.images.jupyter_user.version,
        restart_policy=parameters["restartPolicy"]
        if "restartPolicy" in parameters else "Never",
        plugin_id=plugin_id,
        toolkit_s3_bucket=context.toolkit.s3_bucket,
        image_pull_policy="Always"
        if aws_orbit.__version__.endswith(".dev0") else "IfNotPresent",
        image=image,
        uid=parameters["uid"] if "uid" in parameters else "1000",
        gid=parameters["gid"] if "gid" in parameters else "100",
    )

    if "script" in parameters:
        script_body = parameters["script"]
    else:
        raise Exception(f"Plugin {plugin_id} must define parameter 'script'")
    script_file = os.path.join(chart_path, "team-script-launcher",
                               "script.txt")

    script_body = utils.resolve_parameters(script_body, vars)
    with open(script_file, "w") as file:
        file.write(script_body)

    if not team_context.team_helm_repository:
        raise Exception("Missing team helm repository")

    repo_location = team_context.team_helm_repository
    repo = team_context.name
    _logger.debug(script_body)
    _init_team_repo(context=context,
                    team_context=team_context,
                    repo_location=repo_location)
    helm.add_repo(repo=repo, repo_location=repo_location)
    chart_name, chart_version, chart_package = helm.package_chart(
        repo=repo,
        chart_path=os.path.join(chart_path, "team-script-launcher"),
        values=vars)
    return (chart_name, chart_version, chart_package)