def benchmark_sample(sample_method, imgs, config, batch_num, last_epoch):
    """Simulate benchmarking and sampling at the edge, returning a list of samples."""
    name, (sample_func, kwargs) = sample_method
    bench_file = (
        f"{config['output']}/{name}{batch_num}_benchmark_avg_1_{last_epoch}.csv"
    )

    if not os.path.exists(bench_file):
        results_df = bench.benchmark_avg(
            imgs,
            name,
            1,
            last_epoch,
            config["conf_check_num"],
            config,
        )

        bench.save_results(results_df, bench_file)

    # Create samples from the benchmark
    results, _ = resloader.load_data(bench_file, by_actual=False)

    print(f"===== {name} ======")
    sample_files = sample.create_sample(results, config["bandwidth"],
                                        sample_func, **kwargs)

    return sample_files
def display_benchmark(file, config):
    results, _ = rload.load_data(
        file,
        by_actual=False,
        add_all=False,
        conf_thresh=config["pos_thres"],
    )

    df = pd.DataFrame(columns=[
        "Class", "N", "Prec", "Acc", "Recall", "Avg. Conf", "Conf Std"
    ]).set_index("Class")
    for result in results:
        df.loc[result.name] = [
            len(result),
            result.precision(),
            result.accuracy(),
            result.recall(),
            np.mean(result.get_confidences()),
            np.std(result.get_confidences(), ddof=1),
        ]

    df.loc["Overall"] = [
        df["N"].sum(), *df.loc[:, "Prec":"Conf Std"].mean(axis=0)
    ]
    print(df)
    df.to_csv(file[:-4] + "_stats.csv")
def display_series(config, opt):
    names = [
        "init",
        "sample",
        "all_iter",
        "all",
    ]
    epoch_splits = utils.get_epoch_splits(config, opt.prefix, True)
    names += [f"cur_iter{i}" for i in range(len(epoch_splits))]
    if opt.batch_test is not None:
        names.append("batch_test")

    results = list()

    out_folder = f"{config['output']}/{opt.prefix}-series"
    if opt.avg or opt.roll_avg:
        out_folder += "-roll-avg" if opt.roll_avg else "-avg"
    for name in names:
        is_baseline = opt.prefix == "init" or "baseline" in opt.prefix
        start_epoch = 1 if is_baseline else epoch_splits[0]
        for i in range(start_epoch, epoch_splits[-1], opt.delta):
            out_name = f"{out_folder}/{name}_{i}.csv"

            if not os.path.exists(out_name):
                print(f"Skipping epoch {i} due to missing benchmark")
                continue

            epoch_res, _ = rload.load_data(out_name,
                                           by_actual=False,
                                           conf_thresh=config["pos_thres"])
            new_row = {
                "test_set": name,
                "epoch": i,
                **get_avg_metric_dict(epoch_res)
            }
            results.append(new_row)

    results = pd.DataFrame.from_dict(results, orient="columns")

    results.to_csv(f"{out_folder}/{opt.prefix}-series-stats.csv")

    xy_pairs = list()
    for name in names:
        if "cur_iter" in name:
            # Combine the current iteration sets into one line
            if name == "cur_iter0":
                filtered_data = results[results["test_set"].str.contains(
                    "cur_iter")]
                name = "cur_iter"
            else:
                continue
        else:
            filtered_data = results[results["test_set"] == name]
        xy_pairs.append(
            (filtered_data["epoch"], filtered_data[opt.metric], name))

    plot_multiline(xy_pairs,
                   xlab="Epoch",
                   ylab=f"Avg. {opt.metric}",
                   vert_lines=epoch_splits)
def tabulate_batch_samples(config,
                           prefix,
                           bench_suffix=None,
                           silent=False,
                           filter_samp=False):
    """Analyze accuracy/precision relationships and training duration
    for each batched sample using existing testing data."""
    bench_str = f"{config['output']}/{prefix}*_benchmark" + bench_suffix

    benchmarks = utils.sort_by_epoch(bench_str)
    checkpoints = utils.sort_by_epoch(f"{config['checkpoints']}/{prefix}*.pth")

    data = list()
    for i, benchmark in enumerate(benchmarks):
        kwargs = dict()
        if filter_samp and prefix != "init":
            sampled_imgs = glob(f"{config['output']}/{prefix}{i}_sample*")
            if len(sampled_imgs) == 0:
                continue
            kwargs["filter"] = sampled_imgs[0]
        results, _ = rload.load_data(
            benchmark,
            by_actual=False,
            add_all=False,
            conf_thresh=config["pos_thres"],
            **kwargs,
        )

        if i == len(benchmarks) - 1:
            train_len = utils.get_epoch(
                checkpoints[-1]) - utils.get_epoch(benchmark)
        else:
            train_len = utils.get_epoch(
                benchmarks[i + 1]) - utils.get_epoch(benchmark)

        new_row = {
            "batch": i,
            **get_avg_metric_dict(results),
            "epochs_trained": train_len,
        }

        data.append(new_row)
    data = pd.DataFrame.from_dict(data, orient="columns")
    # data.set_index("batch")

    if not silent:
        print("=== Metrics on Batch ===")
        print(data.to_string(index=False))

    return data
def visualize_conf(prefix, benchmark, filter_sample=False, pos_thres=0.5):
    kwargs = dict()
    if filter_sample:
        folder = "/".join(benchmark.split("/")[:-1])
        epoch = utils.get_epoch(benchmark)
        sampled_imgs = glob(f"{folder}/{prefix}*_sample_{epoch}.txt")[0]
        kwargs["filter"] = sampled_imgs

    results, conf_mat = rload.load_data(benchmark,
                                        by_actual=False,
                                        conf_thresh=pos_thres,
                                        **kwargs)

    results[-1].generate_prec_distrib(benchmark[:-4] + "_prec.csv")

    conf_mat_file = benchmark[:-4] + "_conf.csv"
    classes = [result.name for result in results]
    classes.remove("All")
    make_conf_matrix(conf_mat, classes, conf_mat_file)

    hist_filename = benchmark[:-4] + "_viz.pdf"
    make_conf_histogram(results, hist_filename)
    show_overall_hist(results)