def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict({ "target_task": item["target_task"], "donor_task": item["donor_task"], }) row_groups[group_key].append(item) header = [ "task", "donor", "merged score", "stddev", "orig score", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["target_task"], hp["donor_task"], round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) header = [ "MNLI ckpt", "RTE ckpt", "merged RTE acc", "merged stddev", "orig RTE acc", "orig stddev", "MNLI body acc", "MNLI body stddev", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = [ get_single_score(item["original_score"]) for item in row_items ] merged_scores = [ get_single_score(item["merged_score"]) for item in row_items ] donor_body_scores = [ get_single_score(item["donor_body_score"]) for item in row_items ] row = [ hp["donor_ckpt_index"], hp["target_ckpt_index"], round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(donor_body_scores), round_digits), round(np.std(donor_body_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, ): items = result_utils.load_json(filepath) groups = collections.defaultdict(list) for item in items: hp = item["hyperparams"] groups[(hp["donor_fisher_examples"], hp["target_fisher_examples"])].append(item) merged_scores = {} for k, group_items in groups.items(): scores = np.array( [get_single_score(item["merged_score"]) for item in group_items]) mean = np.mean(scores) stddev = np.std(scores) if len(group_items) > 1 else None merged_scores[k] = render_score_fn(mean, stddev) all_donor_fisher_examples = sorted(set(k[0] for k in groups.keys())) all_target_fisher_examples = sorted(set(k[1] for k in groups.keys())) rows = [ len(all_target_fisher_examples) * [""] for _ in all_donor_fisher_examples ] for col_idx, target_examples in enumerate(all_target_fisher_examples): for row_idx, donor_examples in enumerate(all_donor_fisher_examples): rows[row_idx][col_idx] = merged_scores[(donor_examples, target_examples)] for row, examples in zip(rows, all_donor_fisher_examples): row.insert(0, str(examples)) rows = [ R"\toprule", [R"\textbf{Examples}"] + [str(examples) for examples in all_target_fisher_examples], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_csv_table(filepath, round_digits=1, group_by_ckpt_index=True, best_per_finetuned_model=False): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) if best_per_finetuned_model: new_row_groups = collections.defaultdict(list) for hp, row_items in row_groups.items(): # TODO: get best original score as well best = max(row_items, key=lambda r: get_single_score(r["merged_score"])) new_key = dict(hp) del new_key["train_run_uuid"] new_key = hashabledict(new_key) new_row_groups[new_key].append(best) row_groups = new_row_groups header = [ "task", "task ckpt", "merged task f1", "stddev", "orig task f1", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["task"], hp["target_ckpt_index"] if group_by_ckpt_index else "-", round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:2]) rows = [header] + body return result_utils.csv_to_str(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, ): if not isinstance(filepath, (list, tuple)): filepath = [filepath] items = [] for fp in filepath: its = result_utils.load_json(fp) its = [_index_to_epoch(it, fp) for it in its] its = [ it for it in its if it["hyperparams"]["target_epoch"].is_integer() ] items.extend(its) groups = collections.defaultdict(list) for item in items: hp = item["hyperparams"] groups[(hp["donor_epoch"], hp["target_epoch"])].append(item) original_scores = {} for target_epoch in range(1, 11): group_items = max( [ group_items for (_, te), group_items in groups.items() if te == target_epoch ], key=len, ) og_scores = np.array( [get_single_score(item["original_score"]) for item in group_items]) mean = np.mean(og_scores) stddev = np.std(og_scores) if len(group_items) > 1 else None original_scores[target_epoch] = render_score_fn(mean, stddev) merged_scores = {} for k, group_items in groups.items(): scores = np.array( [get_single_score(item["merged_score"]) for item in group_items]) og_mean = np.mean( [get_single_score(item["original_score"]) for item in group_items]) mean = np.mean(scores) stddev = np.std(scores) if len(group_items) > 1 else None merged_scores[k] = render_score_fn(mean - og_mean, stddev) rows = [] for row_idx in range(8): donor_epoch = (row_idx + 1) / 2 rows.append([str(donor_epoch)] + [ merged_scores[(donor_epoch, float(target_epoch))] for target_epoch in range(1, 11) ]) rows = [ R"\toprule", [R"\textbf{Epoch}"] + [str(target_epoch) for target_epoch in range(1, 11)], R"\midrule", [R"\textit{Unmerged}"] + [original_scores[target_epoch] for target_epoch in range(1, 11)], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, l2_coeffs=(0.0, 1e-6, 3e-4, 0.01, 0.1), coeff_to_pretty={ 0.0: "0", 1e-6: "1e-6", 3e-4: "3e-4", 0.01: "1e-2", 0.1: "1e-1", }, ): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) def get_original_score(target_coeff): ret = {} for k, v in row_groups.items(): if k["target_reg_strength"] != target_coeff: continue ret[k["donor_reg_strength"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return render_score_fn(mean, stddev) rows = [len(l2_coeffs) * [""] for _ in l2_coeffs] for col_idx, target_coeff in enumerate(l2_coeffs): for row_idx, donor_coeff in enumerate(l2_coeffs): key = hashabledict({ "target_reg_strength": target_coeff, "donor_reg_strength": donor_coeff, }) row_items = row_groups[key] merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(row_items) > 1 else None rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, coeff in zip(rows, l2_coeffs): row.insert(0, coeff_to_pretty[coeff]) rows = [ R"\toprule", [""] + [coeff_to_pretty[t] for t in coeff_to_pretty], R"\midrule", ["Original"] + [get_original_score(t) for t in coeff_to_pretty], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_latex_table( # noqa: C901 filepath, render_score_fn=latex_render_score_subscript, target_task_order=result_utils.GLUE_TASKS_ORDER, donor_task_order=result_utils.GLUE_TASKS_ORDER, no_original_scores=False, ): if not isinstance(filepath, (list, tuple)): filepath = [filepath] items = [] for fp in filepath: its = result_utils.load_json(fp) if "squad_donor" in fp: for it in its: it["donor_task"] = "squad2" items.extend(its) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict( { "target_task": item["target_task"], "donor_task": item["donor_task"], } ) row_groups[group_key].append(item) def create_donor_to_merge_summary(target_task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v ret2 = {} for donor_task, ret_items in ret.items(): merged_scores = np.array( [get_single_score(item["merged_score"]) for item in ret_items] ) if target_task == "rte" and donor_task == "mnli": print(merged_scores) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None ret2[donor_task] = (mean, stddev) return ret2 def get_original_task_summary(task): ret = {} for k, v in row_groups.items(): if k["target_task"] != target_task: continue ret[k["donor_task"]] = v if not ret: return None, None ret_items = max(ret.values(), key=len) merged_scores = np.array( [get_single_score(item["original_score"]) for item in ret_items] ) mean = np.mean(merged_scores) stddev = np.std(merged_scores) if len(ret_items) > 1 else None return mean, stddev rows = [len(target_task_order) * [""] for _ in donor_task_order] for col_idx, target_task in enumerate(target_task_order): donor_to_merge_summary = create_donor_to_merge_summary(target_task) for row_idx, donor_task in enumerate(donor_task_order): if donor_task == target_task and not no_original_scores: mean, stddev = get_original_task_summary(target_task) rows[row_idx][col_idx] = render_score_fn(mean, stddev, is_orig=True) continue if donor_task not in donor_to_merge_summary: continue mean, stddev = donor_to_merge_summary[donor_task] rows[row_idx][col_idx] = render_score_fn(mean, stddev) for row, task in zip(rows, donor_task_order): row.insert(0, result_utils.TASK_NICE_NAMES[task]) rows = [ R"\toprule", [R"\textbf{Task}"] + [result_utils.TASK_NICE_NAMES[t] for t in target_task_order], R"\midrule", *rows, R"\bottomrule", ] return result_utils.table_to_latex(rows)
def create_csv_table(filepath, round_digits=1): items = result_utils.load_json(filepath) row_groups = collections.defaultdict(list) for item in items: group_key = hashabledict(item["hyperparams"]) row_groups[group_key].append(item) row_groups2 = collections.defaultdict(list) for hp, row_items in row_groups.items(): best_og = max( row_items, key=lambda item: get_single_score(item["original_score"])) best_merged = max( row_items, key=lambda item: get_single_score(item["merged_score"])) hp = dict(hp) del hp["train_run_uuid"] group_key = hashabledict(hp) row_groups2[group_key].append({ "original_score": best_og["original_score"], "merged_score": best_merged["merged_score"], }) header = [ "task", "mlm train ex", "mlm reg str", "merged task f1", "stddev", "orig task f1", "stddev", "mean boost", "stddev", "max boost", "min boost", "num trials", ] body = [] for hp, row_items in row_groups2.items(): og_scores = np.array( [get_single_score(item["original_score"]) for item in row_items]) merged_scores = np.array( [get_single_score(item["merged_score"]) for item in row_items]) row = [ hp["task"], hp["pretrained_examples"], hp["pretrained_reg_strength"], # q, round(np.mean(merged_scores), round_digits), round(np.std(merged_scores), round_digits), # round(np.mean(og_scores), round_digits), round(np.std(og_scores), round_digits), # round(np.mean(merged_scores - og_scores), round_digits), round(np.std(merged_scores - og_scores), round_digits), # round(np.max(merged_scores - og_scores), round_digits), round(np.min(merged_scores - og_scores), round_digits), len(row_items), ] body.append(row) body = sorted(body, key=lambda r: r[:3]) rows = [header] + body return result_utils.csv_to_str(rows)