def test_get_runs(mocker, kubernetes_api_client_node_port): mocker.patch("kubernetes.config.load_kube_config") rg = mocker.patch("concurrent.futures.ThreadPoolExecutor") rg.return_value.submit.return_value.result.return_value.json.return_value = "a" client = ApiClient(in_cluster=False) result = client.get_runs() assert result is not None assert result.result().json() == "a"
def status(name, dashboard_url): """Get the status of a benchmark run, or all runs if no name is given""" loaded = setup_client_from_config() client = ApiClient(in_cluster=False, url=dashboard_url, load_config=not loaded) ret = client.get_runs() runs = ret.result().json() if name is None: # List all runs for run in runs: del run["job_id"] del run["job_metadata"] click.echo(tabulate(runs, headers="keys")) return try: run = next(r for r in runs if r["name"] == name) except StopIteration: click.echo("Run not found") return del run["job_id"] del run["job_metadata"] click.echo(tabulate([run], headers="keys")) loss = client.get_run_metrics(run["id"], metric_filter="val_global_loss @ 0", last_n=1) prec = client.get_run_metrics(run["id"], metric_filter="val_global_Prec@1 @ 0", last_n=1) loss = loss.result() prec = prec.result() if loss.status_code < 300 and "val_global_loss @ 0" in loss.json(): val = loss.json()["val_global_loss @ 0"][0] click.echo("Current Global Loss: {0:.2f} ({1})".format( float(val["value"]), val["date"])) else: click.echo("No Validation Loss Data yet") if prec.status_code < 300 and "val_global_Prec@1 @ 0" in prec.json(): val = prec.json()["val_global_Prec@1 @ 0"][0] click.echo("Current Global Precision: {0:.2f} ({1})".format( float(val["value"]), val["date"])) else: click.echo("No Validation Precision Data yet")
def download(name, output, dashboard_url): """Download the results of a benchmark run""" loaded = setup_client_from_config() client = ApiClient(in_cluster=False, url=dashboard_url, load_config=not loaded) ret = client.get_runs() runs = ret.result().json() run = next(r for r in runs if r["name"] == name) ret = client.download_run_metrics(run["id"]) with open(output, "wb") as f: f.write(ret.result().content)
def delete(name, dashboard_url): """Delete a benchmark run""" loaded = setup_client_from_config() client = ApiClient(in_cluster=False, url=dashboard_url, load_config=not loaded) ret = client.get_runs() runs = ret.result().json() try: run = next(r for r in runs if r["name"] == name) except StopIteration: click.echo("Run not found") return del run["job_id"] del run["job_metadata"] client.delete_run(run["id"])
def charts(folder, filter, dashboard_url): """Chart the results of benchmark runs Save generated charts in FOLDER """ folder = Path(folder) if not folder.exists(): folder.mkdir(parents=True) loaded = setup_client_from_config() client = ApiClient(in_cluster=False, url=dashboard_url, load_config=not loaded) ret = client.get_runs() runs = ret.result().json() runs = [r for r in runs if r["state"] == "finished"] if filter: runs = [r for r in runs if filter in r["name"]] options = {i: r for i, r in enumerate(runs, start=0)} if len(options) < 2: click.echo("At least two finished runs are needed to create a summary") return options["all"] = {"name": "*all runs*"} prompt = 'Select the runs to generate a summary for (e.g. "0 1 2"): \n\t{}'.format( "\n\t".join("{} [{}]".format(r["name"], i) for i, r in options.items())) choice = click.prompt( prompt, default=0, type=click.Choice([options.keys()]), show_choices=False, value_proc=lambda x: runs if "all" in x else [options[int(i)] for i in x.split(" ")], ) if len(choice) < 2: click.echo("At least two finished runs are needed to create a summary") return results = [] def _get_metric(name, run): """Gets a metric from the dashboard.""" name = "global_cum_{} @ 0".format(name) return float( client.get_run_metrics(run["id"], metric_filter=name, last_n=1).result().json()[name][0]["value"]) for run in choice: agg = _get_metric("agg", run) backprop = _get_metric("backprop", run) batch_load = _get_metric("batch_load", run) comp_loss = _get_metric("comp_loss", run) comp_metrics = _get_metric("comp_metrics", run) fwd_pass = _get_metric("fwd_pass", run) opt_step = _get_metric("opt_step", run) compute = (fwd_pass + comp_loss + backprop + opt_step + (agg if run["num_workers"] == 1 else 0)) communicate = agg if run["num_workers"] != 1 else 0 results.append(( run["name"], compute, communicate, comp_metrics, batch_load, str(run["num_workers"]), )) results = sorted(results, key=lambda x: x[5]) names, compute, communicate, metrics, batch_load, num_workers = zip( *results) width = 0.35 fig, ax = plt.subplots() ax.bar(num_workers, compute, width, label="Compute") ax.bar(num_workers, communicate, width, label="Communication") ax.set_ylabel("Time (s)") ax.set_title("Total time by number of workers") ax.legend() plt.savefig(folder / "total_time.png", dpi=150) fig, ax = plt.subplots() combined = [c + r for _, c, r, _, _, _ in results] speedup = [combined[0] / c for c in combined] ax.bar(num_workers, speedup, width) ax.set_ylabel("Speedup factor") ax.set_title("Speedup") plt.savefig(folder / "speedup.png", dpi=150) fig, ax = plt.subplots() ax.bar(num_workers, compute, width, label="Compute") ax.bar(num_workers, communicate, width, label="Communication") ax.bar(num_workers, metrics, width, label="Metrics Computation") ax.bar(num_workers, batch_load, width, label="Batch Loading") ax.set_ylabel("Time (s)") ax.set_title("Total time by number of workers") ax.legend() plt.savefig(folder / "time_for_all_phases.png", dpi=150) click.echo("Summary created in {}".format(folder))