示例#1
0
def generate_report(
    clusters,
    cluster_registry,
    kubeconfig_path,
    kubeconfig_contexts: set,
    application_registry,
    use_cache,
    no_ingress_status,
    output_dir,
    system_namespaces,
    include_clusters,
    exclude_clusters,
    additional_cost_per_cluster,
    alpha_ema,
    cluster_summaries,
    pricing_file,
    links_file,
    node_labels,
):
    notifications: List[tuple] = []

    if pricing_file:
        pricing.regenerate_cost_dict(pricing_file)

    if links_file:
        with open(links_file, "rb") as fd:
            links = yaml.safe_load(fd)
    else:
        links = {}

    start = datetime.datetime.utcnow()

    out = OutputManager(Path(output_dir))
    # the data collection might take a long time, so first write index.html
    # to give users feedback that Kubernetes Resource Report has started
    # first copy CSS/JS/..
    out.copy_static_assets()
    write_loading_page(out)

    pickle_file_name = "dump.pickle"

    if use_cache and out.exists(pickle_file_name):
        with out.open(pickle_file_name, "rb") as fd:
            data = pickle.load(fd)
        cluster_summaries = data["cluster_summaries"]
        teams = data["teams"]

    else:
        cluster_summaries = get_cluster_summaries(
            clusters,
            cluster_registry,
            kubeconfig_path,
            kubeconfig_contexts,
            include_clusters,
            exclude_clusters,
            system_namespaces,
            notifications,
            additional_cost_per_cluster,
            alpha_ema,
            cluster_summaries,
            no_ingress_status,
            node_labels,
        )
        teams = {}

    applications: Dict[str, dict] = {}
    namespace_usage: Dict[tuple, dict] = {}

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for _k, pod in summary["pods"].items():
            app = applications.get(
                pod["application"],
                {
                    "id": pod["application"],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "components": {},
                    "requests": {},
                    "usage": {},
                    "recommendation": {},
                    "clusters": set(),
                    "team": "",
                    "active": None,
                },
            )
            component = app["components"].get(
                pod["component"],
                {
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "recommendation": {},
                    "clusters": set(),
                },
            )
            for r in "cpu", "memory":
                for key in "requests", "usage":
                    app[key][r] = app[key].get(r, 0) + pod.get(key, {}).get(
                        r, 0)
                    component[key][r] = component[key].get(r, 0) + pod.get(
                        key, {}).get(r, 0)
            aggregate_recommendation(pod, app)
            aggregate_recommendation(pod, component)
            app["cost"] += pod["cost"]
            app["slack_cost"] += pod.get("slack_cost", 0)
            app["pods"] += 1
            app["clusters"].add(cluster_id)
            app["team"] = pod["team"]

            component["cost"] += pod["cost"]
            component["slack_cost"] += pod.get("slack_cost", 0)
            component["pods"] += 1
            component["clusters"].add(cluster_id)

            app["components"][pod["component"]] = component
            applications[pod["application"]] = app

        for ns_pod, pod in summary["pods"].items():
            namespace = namespace_usage.get(
                (ns_pod[0], cluster_id),
                {
                    "id": ns_pod[0],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "recommendation": {},
                    "cluster": "",
                    "email": "",
                    "status": "",
                },
            )
            for r in "cpu", "memory":
                namespace["requests"][r] = (namespace["requests"].get(r, 0) +
                                            pod["requests"][r])
                namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get(
                    "usage", {}).get(r, 0)
            aggregate_recommendation(pod, namespace)
            namespace["cost"] += pod["cost"]
            namespace["slack_cost"] += pod.get("slack_cost", 0)
            namespace["pods"] += 1
            namespace["cluster"] = summary["cluster"]
            namespace_usage[(ns_pod[0], cluster_id)] = namespace

    if application_registry:
        resolve_application_ids(applications, application_registry)

    aggregate_by_team(applications, teams)

    for team in teams.values():

        def cluster_name(cluster_id):
            try:
                return cluster_summaries[cluster_id]["cluster"].name
            except KeyError:
                return None

        team["clusters"] = sorted(team["clusters"], key=cluster_name)

    for _cluster_id, summary in sorted(cluster_summaries.items()):
        for _k, pod in summary["pods"].items():
            app = applications[pod["application"]]
            pod["team"] = app["team"]

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for ns, ns_values in summary["namespaces"].items():
            namespace_ = namespace_usage.get((ns, cluster_id))
            if namespace_:
                namespace_["email"] = ns_values["email"]
                namespace_["status"] = ns_values["status"]

    if not use_cache:
        try:
            with out.open(pickle_file_name, "wb") as fd:
                pickle.dump(
                    {
                        "cluster_summaries": cluster_summaries,
                        "teams": teams,
                        "applications": applications,
                        "namespace_usage": namespace_usage,
                    },
                    fd,
                )
        except Exception as e:
            logger.error(f"Could not dump pickled cache data: {e}")

    write_report(
        out,
        start,
        notifications,
        cluster_summaries,
        namespace_usage,
        applications,
        product,
        ecosystem,
        environment,
        teams,
        node_labels,
        links,
        alpha_ema,
    )

    return cluster_summaries
示例#2
0
def generate_report(
    clusters,
    cluster_registry,
    kubeconfig_path,
    kubeconfig_contexts: set,
    application_registry,
    use_cache,
    no_ingress_status,
    output_dir,
    system_namespaces,
    include_clusters,
    exclude_clusters,
    additional_cost_per_cluster,
    pricing_file,
):
    notifications = []

    output_path = Path(output_dir)

    if pricing_file:
        pricing.NODE_COSTS_MONTHLY = pricing.regenerate_cost_dict(pricing_file)

    start = datetime.datetime.utcnow()

    pickle_path = output_path / "dump.pickle"

    if use_cache and pickle_path.exists():
        with pickle_path.open("rb") as fd:
            data = pickle.load(fd)
        cluster_summaries = data["cluster_summaries"]
        teams = data["teams"]
        applications = data["applications"]

    else:
        cluster_summaries = get_cluster_summaries(
            clusters,
            cluster_registry,
            kubeconfig_path,
            kubeconfig_contexts,
            include_clusters,
            exclude_clusters,
            system_namespaces,
            notifications,
            additional_cost_per_cluster,
            no_ingress_status,
        )
        teams = {}
        applications = {}

    total_allocatable = collections.defaultdict(int)
    total_requests = collections.defaultdict(int)
    total_user_requests = collections.defaultdict(int)

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for r in "cpu", "memory":
            total_allocatable[r] += summary["allocatable"][r]
            total_requests[r] += summary["requests"][r]
            total_user_requests[r] += summary["user_requests"][r]

        cost_per_cpu = summary["cost"] / summary["allocatable"]["cpu"]
        cost_per_memory = summary["cost"] / summary["allocatable"]["memory"]
        for k, pod in summary["pods"].items():
            app = applications.get(
                pod["application"],
                {
                    "id": pod["application"],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "clusters": set(),
                    "team": "",
                    "active": None,
                },
            )
            for r in "cpu", "memory":
                app["requests"][r] = app["requests"].get(r, 0) + pod["requests"][r]
                app["usage"][r] = app["usage"].get(r, 0) + pod.get("usage", {}).get(
                    r, 0
                )
            app["cost"] += pod["cost"]
            app["slack_cost"] += pod.get("slack_cost", 0)
            app["pods"] += 1
            app["clusters"].add(cluster_id)
            applications[pod["application"]] = app

    if application_registry:
        resolve_application_ids(applications, teams, application_registry)

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for k, pod in summary["pods"].items():
            app = applications.get(pod["application"])
            pod["team"] = app["team"]

    if not use_cache:
        with pickle_path.open("wb") as fd:
            pickle.dump(
                {
                    "cluster_summaries": cluster_summaries,
                    "teams": teams,
                    "applications": applications,
                },
                fd,
            )

    logger.info("Writing clusters.tsv..")
    with (output_path / "clusters.tsv").open("w") as csvfile:
        writer = csv.writer(csvfile, delimiter="\t")
        for cluster_id, summary in sorted(cluster_summaries.items()):
            worker_instance_type = set()
            kubelet_version = set()
            for node in summary["nodes"].values():
                if node["role"] == "worker":
                    worker_instance_type.add(node["instance_type"])
                kubelet_version.add(node["kubelet_version"])
            fields = [
                cluster_id,
                summary["cluster"].api_server_url,
                summary["master_nodes"],
                summary["worker_nodes"],
                ",".join(worker_instance_type),
                ",".join(kubelet_version),
            ]
            for x in ["capacity", "allocatable", "requests", "usage"]:
                fields += [
                    round(summary[x]["cpu"], 2),
                    int(summary[x]["memory"] / ONE_MEBI),
                ]
            fields += [round(summary["cost"], 2)]
            writer.writerow(fields)

    logger.info("Writing ingresses.tsv..")
    with (output_path / "ingresses.tsv").open("w") as csvfile:
        writer = csv.writer(csvfile, delimiter="\t")
        for cluster_id, summary in sorted(cluster_summaries.items()):
            for ingress in summary["ingresses"]:
                writer.writerow(
                    [cluster_id, summary["cluster"].api_server_url] + ingress
                )

    logger.info("Writing pods.tsv..")
    with (output_path / "pods.tsv").open("w") as csvfile:
        writer = csv.writer(csvfile, delimiter="\t")
        with (output_path / "slack.tsv").open("w") as csvfile2:
            slackwriter = csv.writer(csvfile2, delimiter="\t")
            for cluster_id, summary in sorted(cluster_summaries.items()):
                cpu_slack = collections.Counter()
                memory_slack = collections.Counter()
                for k, pod in summary["pods"].items():
                    namespace, name = k
                    requests = pod["requests"]
                    application = pod["application"] or name.rsplit("-", 1)[0]
                    usage = pod.get("usage", collections.defaultdict(float))
                    cpu_slack[(namespace, application)] += (
                        requests["cpu"] - usage["cpu"]
                    )
                    memory_slack[(namespace, application)] += (
                        requests["memory"] - usage["memory"]
                    )
                    writer.writerow(
                        [
                            cluster_id,
                            summary["cluster"].api_server_url,
                            namespace,
                            name,
                            pod["application"],
                            requests["cpu"],
                            requests["memory"],
                            usage["cpu"],
                            usage["memory"],
                        ]
                    )
                cost_per_cpu = summary["cost"] / summary["allocatable"]["cpu"]
                cost_per_memory = summary["cost"] / summary["allocatable"]["memory"]
                for namespace_name, slack in cpu_slack.most_common(20):
                    namespace, name = namespace_name
                    slackwriter.writerow(
                        [
                            cluster_id,
                            summary["cluster"].api_server_url,
                            namespace,
                            name,
                            "cpu",
                            "{:3.2f}".format(slack),
                            "${:.2f} potential monthly savings".format(
                                slack * cost_per_cpu
                            ),
                        ]
                    )
                for namespace_name, slack in memory_slack.most_common(20):
                    namespace, name = namespace_name
                    slackwriter.writerow(
                        [
                            cluster_id,
                            summary["cluster"].api_server_url,
                            namespace,
                            name,
                            "memory",
                            "{:6.0f}Mi".format(slack / ONE_MEBI),
                            "${:.2f} potential monthly savings".format(
                                slack * cost_per_memory
                            ),
                        ]
                    )

    templates_path = Path(__file__).parent / "templates"
    env = Environment(
        loader=FileSystemLoader(str(templates_path)),
        autoescape=select_autoescape(["html", "xml"]),
    )
    env.filters["money"] = filters.money
    env.filters["cpu"] = filters.cpu
    env.filters["memory"] = filters.memory
    total_cost = sum([s["cost"] for s in cluster_summaries.values()])
    total_hourly_cost = total_cost / HOURS_PER_MONTH
    now = datetime.datetime.utcnow()
    context = {
        "notifications": notifications,
        "cluster_summaries": cluster_summaries,
        "teams": teams,
        "applications": applications,
        "total_worker_nodes": sum(
            [s["worker_nodes"] for s in cluster_summaries.values()]
        ),
        "total_allocatable": total_allocatable,
        "total_requests": total_requests,
        "total_usage": {
            "cpu": sum(s["usage"]["cpu"] for s in cluster_summaries.values()),
            "memory": sum(s["usage"]["memory"] for s in cluster_summaries.values()),
        },
        "total_user_requests": total_user_requests,
        "total_pods": sum([len(s["pods"]) for s in cluster_summaries.values()]),
        "total_cost": total_cost,
        "total_cost_per_user_request_hour": {
            "cpu": 0.5 * total_hourly_cost / max(total_user_requests["cpu"], 1),
            "memory": 0.5 * total_hourly_cost / max(
                total_user_requests["memory"] / ONE_GIBI, 1),
        },
        "total_slack_cost": sum([a["slack_cost"] for a in applications.values()]),
        "now": now,
        "duration": (now - start).total_seconds(),
        "version": __version__,
    }

    metrics = calculate_metrics(context)

    with (output_path / "metrics.json").open("w") as fd:
        json.dump(metrics, fd)

    for page in ["index", "clusters", "ingresses", "teams", "applications", "pods"]:
        file_name = "{}.html".format(page)
        logger.info("Generating {}..".format(file_name))
        template = env.get_template(file_name)
        context["page"] = page
        template.stream(**context).dump(str(output_path / file_name))

    for cluster_id, summary in cluster_summaries.items():
        page = "clusters"
        file_name = "cluster-{}.html".format(cluster_id)
        logger.info("Generating {}..".format(file_name))
        template = env.get_template("cluster.html")
        context["page"] = page
        context["cluster_id"] = cluster_id
        context["summary"] = summary
        template.stream(**context).dump(str(output_path / file_name))

    with (output_path / "cluster-metrics.json").open("w") as fd:
        json.dump(
            {
                cluster_id: {
                    key: {
                        k if isinstance(k, str) else '/'.join(k): v
                        for k, v in value.items()
                    } if hasattr(value, 'items') else value
                    for key, value in summary.items()
                    if key != 'cluster'
                }
                for cluster_id, summary in cluster_summaries.items()
            },
            fd,
            default=json_default
        )

    for team_id, team in teams.items():
        page = "teams"
        file_name = "team-{}.html".format(team_id)
        logger.info("Generating {}..".format(file_name))
        template = env.get_template("team.html")
        context["page"] = page
        context["team_id"] = team_id
        context["team"] = team
        template.stream(**context).dump(str(output_path / file_name))

    with (output_path / "team-metrics.json").open("w") as fd:
        json.dump(
            {
                team_id: {
                    **team,
                    "application": {
                        app_id: app
                        for app_id, app in applications.items()
                        if app["team"] == team_id
                    }
                }
                for team_id, team in teams.items()
            },
            fd,
            default=json_default
        )

    with (output_path / "application-metrics.json").open("w") as fd:
        json.dump(applications, fd, default=json_default)

    assets_path = output_path / "assets"
    assets_path.mkdir(exist_ok=True)

    assets_source_path = templates_path / "assets"

    for path in assets_source_path.iterdir():
        if path.match("*.js") or path.match("*.css") or path.match("*.png"):
            shutil.copy(str(path), str(assets_path / path.name))

    return cluster_summaries
示例#3
0
def generate_report(
    clusters,
    cluster_registry,
    kubeconfig_path,
    kubeconfig_contexts: set,
    application_registry,
    use_cache,
    no_ingress_status,
    output_dir,
    system_namespaces,
    include_clusters,
    exclude_clusters,
    additional_cost_per_cluster,
    pricing_file,
    links_file,
    node_label,
):
    notifications = []

    output_path = Path(output_dir)

    if pricing_file:
        pricing.regenerate_cost_dict(pricing_file)

    if links_file:
        with open(links_file, 'rb') as fd:
            links = yaml.safe_load(fd)
    else:
        links = {}

    start = datetime.datetime.utcnow()

    # the data collection might take a long time, so first write index.html
    # to give users feedback that Kubernetes Resource Report has started
    # first copy CSS/JS/..
    copy_static_assets(output_path)
    write_loading_page(output_path)

    pickle_path = output_path / "dump.pickle"

    if use_cache and pickle_path.exists():
        with pickle_path.open("rb") as fd:
            data = pickle.load(fd)
        cluster_summaries = data["cluster_summaries"]
        teams = data["teams"]

    else:
        cluster_summaries = get_cluster_summaries(
            clusters,
            cluster_registry,
            kubeconfig_path,
            kubeconfig_contexts,
            include_clusters,
            exclude_clusters,
            system_namespaces,
            notifications,
            additional_cost_per_cluster,
            no_ingress_status,
            node_label,
        )
        teams = {}

    applications = {}
    namespace_usage = {}

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for k, pod in summary["pods"].items():
            app = applications.get(
                pod["application"],
                {
                    "id": pod["application"],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "clusters": set(),
                    "team": "",
                    "active": None,
                },
            )
            for r in "cpu", "memory":
                app["requests"][r] = app["requests"].get(
                    r, 0) + pod["requests"][r]
                app["usage"][r] = app["usage"].get(r, 0) + pod.get(
                    "usage", {}).get(r, 0)
            app["cost"] += pod["cost"]
            app["slack_cost"] += pod.get("slack_cost", 0)
            app["pods"] += 1
            app["clusters"].add(cluster_id)
            applications[pod["application"]] = app

        for ns_pod, pod in summary["pods"].items():
            namespace = namespace_usage.get(
                (ns_pod[0], cluster_id),
                {
                    "id": ns_pod[0],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "cluster": "",
                    "email": "",
                    "status": "",
                },
            )
            for r in "cpu", "memory":
                namespace["requests"][r] = namespace["requests"].get(
                    r, 0) + pod["requests"][r]
                namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get(
                    "usage", {}).get(r, 0)
            namespace["cost"] += pod["cost"]
            namespace["slack_cost"] += pod.get("slack_cost", 0)
            namespace["pods"] += 1
            namespace["cluster"] = summary["cluster"]
            namespace_usage[(ns_pod[0], cluster_id)] = namespace

    if application_registry:
        resolve_application_ids(applications, teams, application_registry)

    for team in teams.values():

        def cluster_name(cluster_id):
            try:
                return cluster_summaries[cluster_id]["cluster"].name
            except KeyError:
                return None

        team["clusters"] = sorted(team["clusters"], key=cluster_name)

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for k, pod in summary["pods"].items():
            app = applications.get(pod["application"])
            pod["team"] = app["team"]

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for ns, ns_values in summary["namespaces"].items():
            namespace = namespace_usage.get((ns, cluster_id))
            if namespace:
                namespace["email"] = ns_values["email"]
                namespace["status"] = ns_values["status"]

    if not use_cache:
        try:
            with pickle_path.open("wb") as fd:
                pickle.dump(
                    {
                        "cluster_summaries": cluster_summaries,
                        "teams": teams,
                        "applications": applications,
                        "namespace_usage": namespace_usage,
                    },
                    fd,
                )
        except Exception as e:
            logger.error(f'Could not dump pickled cache data: {e}')

    write_report(output_path, start, notifications, cluster_summaries,
                 namespace_usage, applications, teams, node_label, links)

    return cluster_summaries
示例#4
0
def generate_report(
    clusters,
    cluster_registry,
    kubeconfig_path,
    kubeconfig_contexts: set,
    application_registry,
    use_cache,
    no_ingress_status,
    output_dir,
    system_namespaces,
    include_clusters,
    exclude_clusters,
    additional_cost_per_cluster,
    pricing_file,
    node_label,
):
    notifications = []

    output_path = Path(output_dir)

    if pricing_file:
        pricing.NODE_COSTS_MONTHLY = pricing.regenerate_cost_dict(pricing_file)

    start = datetime.datetime.utcnow()

    pickle_path = output_path / "dump.pickle"

    if use_cache and pickle_path.exists():
        with pickle_path.open("rb") as fd:
            data = pickle.load(fd)
        cluster_summaries = data["cluster_summaries"]
        teams = data["teams"]
        applications = data["applications"]

    else:
        cluster_summaries = get_cluster_summaries(
            clusters,
            cluster_registry,
            kubeconfig_path,
            kubeconfig_contexts,
            include_clusters,
            exclude_clusters,
            system_namespaces,
            notifications,
            additional_cost_per_cluster,
            no_ingress_status,
            node_label,
        )
        teams = {}
        applications = {}
        namespace_usage = {}

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for k, pod in summary["pods"].items():
            app = applications.get(
                pod["application"],
                {
                    "id": pod["application"],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "clusters": set(),
                    "team": "",
                    "active": None,
                },
            )
            for r in "cpu", "memory":
                app["requests"][r] = app["requests"].get(
                    r, 0) + pod["requests"][r]
                app["usage"][r] = app["usage"].get(r, 0) + pod.get(
                    "usage", {}).get(r, 0)
            app["cost"] += pod["cost"]
            app["slack_cost"] += pod.get("slack_cost", 0)
            app["pods"] += 1
            app["clusters"].add(cluster_id)
            applications[pod["application"]] = app

        for ns_pod, pod in summary["pods"].items():
            namespace = namespace_usage.get(
                (ns_pod[0], cluster_id),
                {
                    "id": ns_pod[0],
                    "cost": 0,
                    "slack_cost": 0,
                    "pods": 0,
                    "requests": {},
                    "usage": {},
                    "cluster": "",
                    "email": "",
                    "status": "",
                },
            )
            for r in "cpu", "memory":
                namespace["requests"][r] = namespace["requests"].get(
                    r, 0) + pod["requests"][r]
                namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get(
                    "usage", {}).get(r, 0)
            namespace["cost"] += pod["cost"]
            namespace["slack_cost"] += pod.get("slack_cost", 0)
            namespace["pods"] += 1
            namespace["cluster"] = summary["cluster"]
            namespace_usage[(ns_pod[0], cluster_id)] = namespace

    if application_registry:
        resolve_application_ids(applications, teams, application_registry)

    for team in teams.values():

        def cluster_name(cluster_id):
            try:
                return cluster_summaries[cluster_id]["cluster"].name
            except KeyError:
                return None

        team["clusters"] = sorted(team["clusters"], key=cluster_name)

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for k, pod in summary["pods"].items():
            app = applications.get(pod["application"])
            pod["team"] = app["team"]

    for cluster_id, summary in sorted(cluster_summaries.items()):
        for ns, ns_values in summary["namespaces"].items():
            namespace = namespace_usage.get((ns, cluster_id))
            if namespace:
                namespace["email"] = ns_values["email"]
                namespace["status"] = ns_values["status"]

    if not use_cache:
        with pickle_path.open("wb") as fd:
            pickle.dump(
                {
                    "cluster_summaries": cluster_summaries,
                    "teams": teams,
                    "applications": applications,
                    "namespace_usage": namespace_usage,
                },
                fd,
            )

    write_report(output_path, start, notifications, cluster_summaries,
                 namespace_usage, applications, teams, node_label)

    return cluster_summaries