def generate_report( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts: set, application_registry, use_cache, no_ingress_status, output_dir, system_namespaces, include_clusters, exclude_clusters, additional_cost_per_cluster, alpha_ema, cluster_summaries, pricing_file, links_file, node_labels, ): notifications: List[tuple] = [] if pricing_file: pricing.regenerate_cost_dict(pricing_file) if links_file: with open(links_file, "rb") as fd: links = yaml.safe_load(fd) else: links = {} start = datetime.datetime.utcnow() out = OutputManager(Path(output_dir)) # the data collection might take a long time, so first write index.html # to give users feedback that Kubernetes Resource Report has started # first copy CSS/JS/.. out.copy_static_assets() write_loading_page(out) pickle_file_name = "dump.pickle" if use_cache and out.exists(pickle_file_name): with out.open(pickle_file_name, "rb") as fd: data = pickle.load(fd) cluster_summaries = data["cluster_summaries"] teams = data["teams"] else: cluster_summaries = get_cluster_summaries( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts, include_clusters, exclude_clusters, system_namespaces, notifications, additional_cost_per_cluster, alpha_ema, cluster_summaries, no_ingress_status, node_labels, ) teams = {} applications: Dict[str, dict] = {} namespace_usage: Dict[tuple, dict] = {} for cluster_id, summary in sorted(cluster_summaries.items()): for _k, pod in summary["pods"].items(): app = applications.get( pod["application"], { "id": pod["application"], "cost": 0, "slack_cost": 0, "pods": 0, "components": {}, "requests": {}, "usage": {}, "recommendation": {}, "clusters": set(), "team": "", "active": None, }, ) component = app["components"].get( pod["component"], { "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "recommendation": {}, "clusters": set(), }, ) for r in "cpu", "memory": for key in "requests", "usage": app[key][r] = app[key].get(r, 0) + pod.get(key, {}).get( r, 0) component[key][r] = component[key].get(r, 0) + pod.get( key, {}).get(r, 0) aggregate_recommendation(pod, app) aggregate_recommendation(pod, component) app["cost"] += pod["cost"] app["slack_cost"] += pod.get("slack_cost", 0) app["pods"] += 1 app["clusters"].add(cluster_id) app["team"] = pod["team"] component["cost"] += pod["cost"] component["slack_cost"] += pod.get("slack_cost", 0) component["pods"] += 1 component["clusters"].add(cluster_id) app["components"][pod["component"]] = component applications[pod["application"]] = app for ns_pod, pod in summary["pods"].items(): namespace = namespace_usage.get( (ns_pod[0], cluster_id), { "id": ns_pod[0], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "recommendation": {}, "cluster": "", "email": "", "status": "", }, ) for r in "cpu", "memory": namespace["requests"][r] = (namespace["requests"].get(r, 0) + pod["requests"][r]) namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get( "usage", {}).get(r, 0) aggregate_recommendation(pod, namespace) namespace["cost"] += pod["cost"] namespace["slack_cost"] += pod.get("slack_cost", 0) namespace["pods"] += 1 namespace["cluster"] = summary["cluster"] namespace_usage[(ns_pod[0], cluster_id)] = namespace if application_registry: resolve_application_ids(applications, application_registry) aggregate_by_team(applications, teams) for team in teams.values(): def cluster_name(cluster_id): try: return cluster_summaries[cluster_id]["cluster"].name except KeyError: return None team["clusters"] = sorted(team["clusters"], key=cluster_name) for _cluster_id, summary in sorted(cluster_summaries.items()): for _k, pod in summary["pods"].items(): app = applications[pod["application"]] pod["team"] = app["team"] for cluster_id, summary in sorted(cluster_summaries.items()): for ns, ns_values in summary["namespaces"].items(): namespace_ = namespace_usage.get((ns, cluster_id)) if namespace_: namespace_["email"] = ns_values["email"] namespace_["status"] = ns_values["status"] if not use_cache: try: with out.open(pickle_file_name, "wb") as fd: pickle.dump( { "cluster_summaries": cluster_summaries, "teams": teams, "applications": applications, "namespace_usage": namespace_usage, }, fd, ) except Exception as e: logger.error(f"Could not dump pickled cache data: {e}") write_report( out, start, notifications, cluster_summaries, namespace_usage, applications, product, ecosystem, environment, teams, node_labels, links, alpha_ema, ) return cluster_summaries
def generate_report( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts: set, application_registry, use_cache, no_ingress_status, output_dir, system_namespaces, include_clusters, exclude_clusters, additional_cost_per_cluster, pricing_file, ): notifications = [] output_path = Path(output_dir) if pricing_file: pricing.NODE_COSTS_MONTHLY = pricing.regenerate_cost_dict(pricing_file) start = datetime.datetime.utcnow() pickle_path = output_path / "dump.pickle" if use_cache and pickle_path.exists(): with pickle_path.open("rb") as fd: data = pickle.load(fd) cluster_summaries = data["cluster_summaries"] teams = data["teams"] applications = data["applications"] else: cluster_summaries = get_cluster_summaries( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts, include_clusters, exclude_clusters, system_namespaces, notifications, additional_cost_per_cluster, no_ingress_status, ) teams = {} applications = {} total_allocatable = collections.defaultdict(int) total_requests = collections.defaultdict(int) total_user_requests = collections.defaultdict(int) for cluster_id, summary in sorted(cluster_summaries.items()): for r in "cpu", "memory": total_allocatable[r] += summary["allocatable"][r] total_requests[r] += summary["requests"][r] total_user_requests[r] += summary["user_requests"][r] cost_per_cpu = summary["cost"] / summary["allocatable"]["cpu"] cost_per_memory = summary["cost"] / summary["allocatable"]["memory"] for k, pod in summary["pods"].items(): app = applications.get( pod["application"], { "id": pod["application"], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "clusters": set(), "team": "", "active": None, }, ) for r in "cpu", "memory": app["requests"][r] = app["requests"].get(r, 0) + pod["requests"][r] app["usage"][r] = app["usage"].get(r, 0) + pod.get("usage", {}).get( r, 0 ) app["cost"] += pod["cost"] app["slack_cost"] += pod.get("slack_cost", 0) app["pods"] += 1 app["clusters"].add(cluster_id) applications[pod["application"]] = app if application_registry: resolve_application_ids(applications, teams, application_registry) for cluster_id, summary in sorted(cluster_summaries.items()): for k, pod in summary["pods"].items(): app = applications.get(pod["application"]) pod["team"] = app["team"] if not use_cache: with pickle_path.open("wb") as fd: pickle.dump( { "cluster_summaries": cluster_summaries, "teams": teams, "applications": applications, }, fd, ) logger.info("Writing clusters.tsv..") with (output_path / "clusters.tsv").open("w") as csvfile: writer = csv.writer(csvfile, delimiter="\t") for cluster_id, summary in sorted(cluster_summaries.items()): worker_instance_type = set() kubelet_version = set() for node in summary["nodes"].values(): if node["role"] == "worker": worker_instance_type.add(node["instance_type"]) kubelet_version.add(node["kubelet_version"]) fields = [ cluster_id, summary["cluster"].api_server_url, summary["master_nodes"], summary["worker_nodes"], ",".join(worker_instance_type), ",".join(kubelet_version), ] for x in ["capacity", "allocatable", "requests", "usage"]: fields += [ round(summary[x]["cpu"], 2), int(summary[x]["memory"] / ONE_MEBI), ] fields += [round(summary["cost"], 2)] writer.writerow(fields) logger.info("Writing ingresses.tsv..") with (output_path / "ingresses.tsv").open("w") as csvfile: writer = csv.writer(csvfile, delimiter="\t") for cluster_id, summary in sorted(cluster_summaries.items()): for ingress in summary["ingresses"]: writer.writerow( [cluster_id, summary["cluster"].api_server_url] + ingress ) logger.info("Writing pods.tsv..") with (output_path / "pods.tsv").open("w") as csvfile: writer = csv.writer(csvfile, delimiter="\t") with (output_path / "slack.tsv").open("w") as csvfile2: slackwriter = csv.writer(csvfile2, delimiter="\t") for cluster_id, summary in sorted(cluster_summaries.items()): cpu_slack = collections.Counter() memory_slack = collections.Counter() for k, pod in summary["pods"].items(): namespace, name = k requests = pod["requests"] application = pod["application"] or name.rsplit("-", 1)[0] usage = pod.get("usage", collections.defaultdict(float)) cpu_slack[(namespace, application)] += ( requests["cpu"] - usage["cpu"] ) memory_slack[(namespace, application)] += ( requests["memory"] - usage["memory"] ) writer.writerow( [ cluster_id, summary["cluster"].api_server_url, namespace, name, pod["application"], requests["cpu"], requests["memory"], usage["cpu"], usage["memory"], ] ) cost_per_cpu = summary["cost"] / summary["allocatable"]["cpu"] cost_per_memory = summary["cost"] / summary["allocatable"]["memory"] for namespace_name, slack in cpu_slack.most_common(20): namespace, name = namespace_name slackwriter.writerow( [ cluster_id, summary["cluster"].api_server_url, namespace, name, "cpu", "{:3.2f}".format(slack), "${:.2f} potential monthly savings".format( slack * cost_per_cpu ), ] ) for namespace_name, slack in memory_slack.most_common(20): namespace, name = namespace_name slackwriter.writerow( [ cluster_id, summary["cluster"].api_server_url, namespace, name, "memory", "{:6.0f}Mi".format(slack / ONE_MEBI), "${:.2f} potential monthly savings".format( slack * cost_per_memory ), ] ) templates_path = Path(__file__).parent / "templates" env = Environment( loader=FileSystemLoader(str(templates_path)), autoescape=select_autoescape(["html", "xml"]), ) env.filters["money"] = filters.money env.filters["cpu"] = filters.cpu env.filters["memory"] = filters.memory total_cost = sum([s["cost"] for s in cluster_summaries.values()]) total_hourly_cost = total_cost / HOURS_PER_MONTH now = datetime.datetime.utcnow() context = { "notifications": notifications, "cluster_summaries": cluster_summaries, "teams": teams, "applications": applications, "total_worker_nodes": sum( [s["worker_nodes"] for s in cluster_summaries.values()] ), "total_allocatable": total_allocatable, "total_requests": total_requests, "total_usage": { "cpu": sum(s["usage"]["cpu"] for s in cluster_summaries.values()), "memory": sum(s["usage"]["memory"] for s in cluster_summaries.values()), }, "total_user_requests": total_user_requests, "total_pods": sum([len(s["pods"]) for s in cluster_summaries.values()]), "total_cost": total_cost, "total_cost_per_user_request_hour": { "cpu": 0.5 * total_hourly_cost / max(total_user_requests["cpu"], 1), "memory": 0.5 * total_hourly_cost / max( total_user_requests["memory"] / ONE_GIBI, 1), }, "total_slack_cost": sum([a["slack_cost"] for a in applications.values()]), "now": now, "duration": (now - start).total_seconds(), "version": __version__, } metrics = calculate_metrics(context) with (output_path / "metrics.json").open("w") as fd: json.dump(metrics, fd) for page in ["index", "clusters", "ingresses", "teams", "applications", "pods"]: file_name = "{}.html".format(page) logger.info("Generating {}..".format(file_name)) template = env.get_template(file_name) context["page"] = page template.stream(**context).dump(str(output_path / file_name)) for cluster_id, summary in cluster_summaries.items(): page = "clusters" file_name = "cluster-{}.html".format(cluster_id) logger.info("Generating {}..".format(file_name)) template = env.get_template("cluster.html") context["page"] = page context["cluster_id"] = cluster_id context["summary"] = summary template.stream(**context).dump(str(output_path / file_name)) with (output_path / "cluster-metrics.json").open("w") as fd: json.dump( { cluster_id: { key: { k if isinstance(k, str) else '/'.join(k): v for k, v in value.items() } if hasattr(value, 'items') else value for key, value in summary.items() if key != 'cluster' } for cluster_id, summary in cluster_summaries.items() }, fd, default=json_default ) for team_id, team in teams.items(): page = "teams" file_name = "team-{}.html".format(team_id) logger.info("Generating {}..".format(file_name)) template = env.get_template("team.html") context["page"] = page context["team_id"] = team_id context["team"] = team template.stream(**context).dump(str(output_path / file_name)) with (output_path / "team-metrics.json").open("w") as fd: json.dump( { team_id: { **team, "application": { app_id: app for app_id, app in applications.items() if app["team"] == team_id } } for team_id, team in teams.items() }, fd, default=json_default ) with (output_path / "application-metrics.json").open("w") as fd: json.dump(applications, fd, default=json_default) assets_path = output_path / "assets" assets_path.mkdir(exist_ok=True) assets_source_path = templates_path / "assets" for path in assets_source_path.iterdir(): if path.match("*.js") or path.match("*.css") or path.match("*.png"): shutil.copy(str(path), str(assets_path / path.name)) return cluster_summaries
def generate_report( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts: set, application_registry, use_cache, no_ingress_status, output_dir, system_namespaces, include_clusters, exclude_clusters, additional_cost_per_cluster, pricing_file, links_file, node_label, ): notifications = [] output_path = Path(output_dir) if pricing_file: pricing.regenerate_cost_dict(pricing_file) if links_file: with open(links_file, 'rb') as fd: links = yaml.safe_load(fd) else: links = {} start = datetime.datetime.utcnow() # the data collection might take a long time, so first write index.html # to give users feedback that Kubernetes Resource Report has started # first copy CSS/JS/.. copy_static_assets(output_path) write_loading_page(output_path) pickle_path = output_path / "dump.pickle" if use_cache and pickle_path.exists(): with pickle_path.open("rb") as fd: data = pickle.load(fd) cluster_summaries = data["cluster_summaries"] teams = data["teams"] else: cluster_summaries = get_cluster_summaries( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts, include_clusters, exclude_clusters, system_namespaces, notifications, additional_cost_per_cluster, no_ingress_status, node_label, ) teams = {} applications = {} namespace_usage = {} for cluster_id, summary in sorted(cluster_summaries.items()): for k, pod in summary["pods"].items(): app = applications.get( pod["application"], { "id": pod["application"], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "clusters": set(), "team": "", "active": None, }, ) for r in "cpu", "memory": app["requests"][r] = app["requests"].get( r, 0) + pod["requests"][r] app["usage"][r] = app["usage"].get(r, 0) + pod.get( "usage", {}).get(r, 0) app["cost"] += pod["cost"] app["slack_cost"] += pod.get("slack_cost", 0) app["pods"] += 1 app["clusters"].add(cluster_id) applications[pod["application"]] = app for ns_pod, pod in summary["pods"].items(): namespace = namespace_usage.get( (ns_pod[0], cluster_id), { "id": ns_pod[0], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "cluster": "", "email": "", "status": "", }, ) for r in "cpu", "memory": namespace["requests"][r] = namespace["requests"].get( r, 0) + pod["requests"][r] namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get( "usage", {}).get(r, 0) namespace["cost"] += pod["cost"] namespace["slack_cost"] += pod.get("slack_cost", 0) namespace["pods"] += 1 namespace["cluster"] = summary["cluster"] namespace_usage[(ns_pod[0], cluster_id)] = namespace if application_registry: resolve_application_ids(applications, teams, application_registry) for team in teams.values(): def cluster_name(cluster_id): try: return cluster_summaries[cluster_id]["cluster"].name except KeyError: return None team["clusters"] = sorted(team["clusters"], key=cluster_name) for cluster_id, summary in sorted(cluster_summaries.items()): for k, pod in summary["pods"].items(): app = applications.get(pod["application"]) pod["team"] = app["team"] for cluster_id, summary in sorted(cluster_summaries.items()): for ns, ns_values in summary["namespaces"].items(): namespace = namespace_usage.get((ns, cluster_id)) if namespace: namespace["email"] = ns_values["email"] namespace["status"] = ns_values["status"] if not use_cache: try: with pickle_path.open("wb") as fd: pickle.dump( { "cluster_summaries": cluster_summaries, "teams": teams, "applications": applications, "namespace_usage": namespace_usage, }, fd, ) except Exception as e: logger.error(f'Could not dump pickled cache data: {e}') write_report(output_path, start, notifications, cluster_summaries, namespace_usage, applications, teams, node_label, links) return cluster_summaries
def generate_report( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts: set, application_registry, use_cache, no_ingress_status, output_dir, system_namespaces, include_clusters, exclude_clusters, additional_cost_per_cluster, pricing_file, node_label, ): notifications = [] output_path = Path(output_dir) if pricing_file: pricing.NODE_COSTS_MONTHLY = pricing.regenerate_cost_dict(pricing_file) start = datetime.datetime.utcnow() pickle_path = output_path / "dump.pickle" if use_cache and pickle_path.exists(): with pickle_path.open("rb") as fd: data = pickle.load(fd) cluster_summaries = data["cluster_summaries"] teams = data["teams"] applications = data["applications"] else: cluster_summaries = get_cluster_summaries( clusters, cluster_registry, kubeconfig_path, kubeconfig_contexts, include_clusters, exclude_clusters, system_namespaces, notifications, additional_cost_per_cluster, no_ingress_status, node_label, ) teams = {} applications = {} namespace_usage = {} for cluster_id, summary in sorted(cluster_summaries.items()): for k, pod in summary["pods"].items(): app = applications.get( pod["application"], { "id": pod["application"], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "clusters": set(), "team": "", "active": None, }, ) for r in "cpu", "memory": app["requests"][r] = app["requests"].get( r, 0) + pod["requests"][r] app["usage"][r] = app["usage"].get(r, 0) + pod.get( "usage", {}).get(r, 0) app["cost"] += pod["cost"] app["slack_cost"] += pod.get("slack_cost", 0) app["pods"] += 1 app["clusters"].add(cluster_id) applications[pod["application"]] = app for ns_pod, pod in summary["pods"].items(): namespace = namespace_usage.get( (ns_pod[0], cluster_id), { "id": ns_pod[0], "cost": 0, "slack_cost": 0, "pods": 0, "requests": {}, "usage": {}, "cluster": "", "email": "", "status": "", }, ) for r in "cpu", "memory": namespace["requests"][r] = namespace["requests"].get( r, 0) + pod["requests"][r] namespace["usage"][r] = namespace["usage"].get(r, 0) + pod.get( "usage", {}).get(r, 0) namespace["cost"] += pod["cost"] namespace["slack_cost"] += pod.get("slack_cost", 0) namespace["pods"] += 1 namespace["cluster"] = summary["cluster"] namespace_usage[(ns_pod[0], cluster_id)] = namespace if application_registry: resolve_application_ids(applications, teams, application_registry) for team in teams.values(): def cluster_name(cluster_id): try: return cluster_summaries[cluster_id]["cluster"].name except KeyError: return None team["clusters"] = sorted(team["clusters"], key=cluster_name) for cluster_id, summary in sorted(cluster_summaries.items()): for k, pod in summary["pods"].items(): app = applications.get(pod["application"]) pod["team"] = app["team"] for cluster_id, summary in sorted(cluster_summaries.items()): for ns, ns_values in summary["namespaces"].items(): namespace = namespace_usage.get((ns, cluster_id)) if namespace: namespace["email"] = ns_values["email"] namespace["status"] = ns_values["status"] if not use_cache: with pickle_path.open("wb") as fd: pickle.dump( { "cluster_summaries": cluster_summaries, "teams": teams, "applications": applications, "namespace_usage": namespace_usage, }, fd, ) write_report(output_path, start, notifications, cluster_summaries, namespace_usage, applications, teams, node_label) return cluster_summaries