def plot_runs(rnn_runs, feedforward_runs): feedforward_runs = group(feedforward_runs, ["layer_sizes"]) rnn_runs = group(rnn_runs, ["tuple.mode"]) plot_all("rnn", rnn_runs) plot_all("feedforward", feedforward_runs) print( f"********************************** all **********************************" ) plot_both(feedforward_runs["layer_sizes_2000,2000,2000,2000"], rnn_runs["tuple.mode_together"])
def plot(runs, baseline, fname: str): groups = {"runs": runs} stats = calc_stat( groups, lambda k: k.startswith("analysis_results/") and "/accuracy/" in k and "/train/" not in k)["runs"] baseline_stats = calc_stat( group(baseline, ["scan.train_split"]), lambda k: k.startswith("validation/") and "/accuracy/" in k) for k, s in stats.items(): print(k) print("Baseline groups", baseline_stats.keys()) means = {k: stats[v].get().mean for k, v in plots.items()} std = {k: stats[v].get().std for k, v in plots.items()} #validation/jump/accuracy/total for k, v in refs.items(): print("----------------================---------------------") print(baseline_stats[f"scan.train_split_{v}"]) ref_stats = { k: baseline_stats[f"scan.train_split_{v}"] [f"validation/{v}/accuracy/total"].get() for k, v in refs.items() } ref_means = {k: v.mean for k, v in ref_stats.items()} ref_std = {k: v.std for k, v in ref_stats.items()} fig = plt.figure(figsize=[3, 1.5]) plt.bar([2.25 * x for x in range(len(names))], [ref_means[n] * 100 for n in names], yerr=[ref_std[n] * 100 for n in names], align='center') plt.bar([2.25 * x + 1 for x in range(len(names))], [means[n] * 100 for n in names], yerr=[std[n] * 100 for n in names], align='center') plt.xticks([2.25 * x + 0.5 for x in range(len(names))], names) plt.ylabel("Test accuracy [\\%]") # plt.legend(["Before", "After"]) fig.savefig(fname, bbox_inches='tight')
] #Force sans-serif math mode (for axes labels) plt.rcParams['font.family'] = 'sans-serif' # ... for regular text plt.rcParams[ 'font.sans-serif'] = 'Helvetica, Avant Garde, Computer Modern Sans serif' # Choose a nice font here TEST = False api = wandb.Api() runs = lib.get_runs(["addmul_feedforward_big", "addmul_rnn"]) BASE_DIR = "out/addmul_confusion_plot/download" shutil.rmtree(BASE_DIR, ignore_errors=True) os.makedirs(BASE_DIR, exist_ok=True) runs = group(runs, ['layer_sizes', "task"]) print(runs.keys()) def draw_confusion(means: np.ndarray, std: np.ndarray): print("MEAN", means) figure = plt.figure(figsize=[2.5, 0.5]) #means.shape) ax = plt.gca() im = plt.imshow(means, interpolation='nearest', cmap=plt.cm.viridis, aspect='auto', vmin=0, vmax=100) x_marks = ["$+$", "$*$", "none"]
#!/usr/bin/env python3 import lib from lib import StatTracker from lib.common import group import os import matplotlib.pyplot as plt runs = lib.get_runs([ "addmul_feedforward", "addmul_feedforward_big", "addmul_feedforward_huge" ]) os.makedirs("out", exist_ok=True) runs = group(runs, ["layer_sizes"]) print(runs) all_stats = {} for grp, runs in runs.items(): print("----------------------------------- ", grp) for run in runs: tsum = 0 ssum = 0 # print(stats) for k, v in run.summary.items(): kparts = k.split("/") if kparts[-1] != "n_1" or "/all/" in k or not k.startswith( "mask_stat/"): continue print(k, v)
#!/usr/bin/env python3 import lib from lib import StatTracker from lib.common import group import os import matplotlib.pyplot as plt runs = lib.get_runs(["addmul_feedforward_big", "addmul_rnn"]) runs = group(runs, ["layer_sizes", "task"]) all_stats = {} for grp, rn in runs.items(): if grp not in all_stats: all_stats[grp] = {} stats = all_stats[grp] for r in rn: for k, v in r.summary.items(): if not k.startswith("mask_stat/") or "/n_" not in k: continue if k not in stats: stats[k] = StatTracker() stats[k].add(v)
print(f"Downloading run {i}, {r.name}, {r.id}") run_dir = os.path.join(WEIGHTS_DIR, r.name, r.id) if os.path.isdir(run_dir): continue for f in tqdm(r.files()): if "export/stage_final_masks/stage_0" not in f.name: continue dl_name = os.path.join(run_dir, f.name) os.makedirs(os.path.dirname(dl_name), exist_ok=True) f.download(root=run_dir, replace=True) N_POINTS = 500 runs = group(runs, ["task", 'layer_sizes', "tuple.mode"]) trackers: Dict[str, StatTracker] = {} trackers_all: Dict[str, StatTracker] = {} def add_tracker(trackers, name, data): if name not in trackers: trackers[name] = StatTracker() hist, _ = np.histogram(data, N_POINTS, [0, 1]) trackers[name].add(hist) human_readable_names = OrderedDict() human_readable_names[
#!/usr/bin/env python3 import lib from lib import StatTracker from lib.common import group import os from mpl_toolkits.axes_grid1 import make_axes_locatable import matplotlib.pyplot as plt runs = lib.get_runs(["addmul_ff_alpha_analysis"]) runs = group(runs, ["mask_loss_weight"]) sharing_stats = {} accuracy_stats = {} for grp, runs in runs.items(): print("----------------------------------- ", grp) for run in runs: print("RUN ID", run.id) tsum = 0 ssum = 0 # print(stats) for k, v in run.summary.items(): kparts = k.split("/") if kparts[-1] != "n_1" or "/all/" in k or not k.startswith( "mask_stat/"): continue print(k, v)
for t in range(2): this_rnn_stats = [rnn_stats[f"{plots[n]}{t}"].get() for n in names] means_rnn = [s.mean * 100 for s in this_rnn_stats] std_rnn = [s.std * 100 for s in this_rnn_stats] plt.bar([5.5 * r + 1 + t * 2.5 for r in range(len(names))], means_rnn, yerr=std_rnn, align='center') plt.xticks([5.5 * r + 1.75 for r in range(len(names))], names) plt.ylabel("Accuracy [\\%]") # plt.legend(["F1", "F2", "R1", "R2"], bbox_to_anchor=(1.1, 1.05)) fname = f"{BASE_DIR}/tuple_performance.pdf" fig.axes[0].yaxis.set_label_coords(-0.12, 0.4) fig.savefig(fname, bbox_inches='tight', pad_inches=0.01) rnn_runs = lib.get_runs(["tuple_rnn"]) feedforward_runs = lib.get_runs(["tuple_feedforward_big"]) feedforward_runs = group(feedforward_runs, ["layer_sizes"]) rnn_runs = group(rnn_runs, ["tuple.mode"]) plot_all("rnn", rnn_runs) plot_all("feedforward", feedforward_runs) plot_both(feedforward_runs["layer_sizes_2000,2000,2000,2000"], rnn_runs["tuple.mode_together"])
def do_plot(runs, prefix): runs = group(runs, ["layer_sizes", "task"]) all_stats = {} download_dir = f"{prefix}/weights" @dataclass class Similarity: iou: Union[float, lib.StatTracker, lib.Stat] subsetness: Union[float, lib.StatTracker, lib.Stat] def calc_stats(run: str) -> Dict[str, Similarity]: base_dir = os.path.join(download_dir, run, "export/stage_final_masks") dir1=f"{base_dir}/stage_1/" dir2=f"{base_dir}/stage_2/" res = {} for f in os.listdir(dir1): assert f.endswith(".pth") m1 = (torch.load(os.path.join(dir1, f)) > 0) m2 = (torch.load(os.path.join(dir2, f)) > 0) n_min = min(m1.astype(np.int64).sum(), m2.astype(np.int64).sum()) intersect = (m1 & m2).astype(np.int64).sum() union = (m1 | m2).astype(np.int64).sum() res[f[:-4]] = Similarity(intersect/union, intersect/n_min) return res for grp, rn in runs.items(): if grp not in all_stats: all_stats[grp] = {} stats = all_stats[grp] for run in rn: for f in run.files(per_page=10000): if not f.name.startswith("export") or "/stage_final_masks" not in f.name: continue fname = os.path.join(download_dir, run.id, f.name) if not os.path.isfile(fname): print(fname) target_dir = os.path.dirname(fname) os.makedirs(target_dir, exist_ok=True) print(f"Run {run.id}: downloading {fname}...") f.download(root=os.path.join(download_dir, run.id), replace=True) for name, val in calc_stats(run.id).items(): if name not in stats: stats[name] = Similarity(lib.StatTracker(), lib.StatTracker()) stats[name].iou.add(val.iou) stats[name].subsetness.add(val.subsetness) for v in stats.values(): v.iou = v.iou.get() v.subsetness = v.subsetness.get() def friendly_name(name: str) -> str: if name.startswith("mask_"): name = name[5:] if name.endswith("_weight"): name = name[:-7] name=name.replace("_weight_", "_") name=name.replace("_cells_", "_") lparts = name.split("_") if lparts[0] == "layers" and lparts[1].isdecimal(): name = f"layer {int(lparts[1])+1}" if name in ["output_projection", "layer 5"]: name = "output" return name.replace("_","\\_") for grp, stats in all_stats.items(): print("-------------------- GROUP --------", grp) print(stats.keys()) fig = plt.figure(figsize=[4.5,1.4]) keys = list(sorted(stats.keys())) if keys[0].startswith("lstm_cells"): for i in range(1, len(keys), 2): keys[i], keys[i-1] = keys[i-1], keys[i] # print([friendly_name(k) for k in keys]) names = [friendly_name(k) for k in keys] legend = ["IoU", "IoMin"] plt.bar([2.25 * x for x in range(len(names))], [stats[n].iou.mean for n in keys], yerr=[stats[n].iou.std for n in keys], align='center') plt.bar([2.25 * x + 1 for x in range(len(names))], [stats[n].subsetness.mean for n in keys], yerr=[stats[n].subsetness.std for n in keys], align='center') plt.xticks([2.25 * x + 0.5 for x in range(len(names))], names) plt.ylabel("Proportion") plt.ylim(0,1) plt.legend(legend) f = f"{prefix}/{grp}.pdf" os.makedirs(os.path.dirname(f), exist_ok=True) fig.savefig(f, bbox_inches='tight')