def cv(task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any]) -> None: """Run cross validation (for hyperparameter selection).""" logging.info("Running cross validation for {}, {}".format(task.name, variant.name)) # get data. cv uses train data only. train_data, _ = data.get(task) labels, y_np = train_data labels_np = np.array(labels) x_np = data.features(task, variant, labels) # run k-fold cross validation folder = model_selection.KFold(n_splits=5, shuffle=True) overall_y_hat = np.zeros_like(y_np) for i, (train_index, test_index) in enumerate(folder.split(x_np)): # logging.info("Fold {}".format(i)) x_train, x_test = x_np[train_index], x_np[test_index] y_train, y_test = y_np[train_index], y_np[test_index] labels_train, labels_test = (labels_np[train_index], labels_np[test_index]) centering = train(model, x_train, y_train, config) y_test_hat = test(model, x_test, y_test, centering, config) # Uncomment the next line to report on individual folds. # metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task]) # Save results into overall aggregate. overall_y_hat[test_index] = y_test_hat # Report on overall results. metrics.report(overall_y_hat, y_np, labels, data.TASK_LABELS[task])
def train_and_test( task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any] ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]: """Run a final train + test run over a task.""" logging.info("Running train+test for {}, {}".format(task.name, variant.name)) # get data. cv uses train data only. train_data, test_data = data.get(task) # train labels_train, y_train_np = train_data train_label_to_y = {} for label, y in zip(labels_train, y_train_np): train_label_to_y[label] = y labels_train_unique = sorted(list(set(labels_train))) y_train = [train_label_to_y[label] for label in labels_train_unique] x_train_np = data.features(task, variant, labels_train_unique) centering = train(model, x_train_np, y_train, config) # test labels_test, y_test_np = test_data test_label_to_y = {} for label, y in zip(labels_test, y_test_np): test_label_to_y[label] = y labels_test_unique = sorted(list(set(labels_test))) y_test = [test_label_to_y[label] for label in labels_test_unique] x_test_np = data.features(task, variant, labels_test_unique) y_test_hat = test(model, x_test_np, y_test, centering, config) return metrics.report(y_test_hat, y_test, labels_test_unique, data.TASK_LABELS[task])
def evaluate_round1() -> None: """This is run to score human annotations.""" phase = "round1" lim = 50 for task in Task: logging.info(task) gold_path = os.path.join( "data", "human", "{}-{}-gold.txt".format(TASK_MEDIUMHAND[task], phase)) gold_data = get_gold(gold_path, lim=lim) label_path = os.path.join( "data", "human", "{}-{}-labels.txt".format(TASK_MEDIUMHAND[task], phase)) labels = get_labels(label_path, lim=lim) ann_path = os.path.join( "data", "human", "{}-{}-annotations-first50.csv".format(TASK_MEDIUMHAND[task], phase), ) ann_data = get_anns(ann_path, lim=lim) task_labels = data.TASK_LABELS[task] acc, micro_f1, macro_f1s, category_cms, per_datum = metrics.report( ann_data, gold_data, labels, task_labels)
def train_and_test( task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any] ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]: """Run a final train + test run over a task.""" logging.info("Running train+test for {}, {}".format( task.name, variant.name)) # get data. cv uses train data only. train_data, test_data = data.get(task) # train labels_train, y_train_np = train_data x_train_np = data.features(task, variant, labels_train) centering = train(model, x_train_np, y_train_np, config) # test labels_test, y_test_np = test_data x_test_np = data.features(task, variant, labels_test) y_test_hat = test(model, x_test_np, y_test_np, centering, config) return metrics.report(y_test_hat, y_test_np, labels_test, data.TASK_LABELS[task])
def main() -> None: # settings. (onetime use so no flags.) perdatum_path = "data/results/Bert-situated-AP-perdatum.txt" task = data.Task.Situated_AffordancesProperties # load per-datum output with open(perdatum_path, "r") as f: perdatum = util.str2np(f.read()) # get test data: labels and groundtruth y-values _, test_data = data.get(task) labels, y = test_data y = y.squeeze() # the per-datum is y_hat == y. we want to recover y_hat so we can pass it back into # metrics to easily re-compute everything we need. probably a vectorized op that can # do this but oh well. y_hat = np.zeros_like(y) for i in range(len(y)): y_hat[i] = y[i] if perdatum[i] else 1 - y[i] # sanity check assert len(labels) == len(y_hat) assert len(y) == len(y_hat) _, _, _, category_cms, _ = metrics.report(y_hat, y, labels, data.TASK_LABELS[task]) # write out task_short = data.TASK_SHORTHAND[task] for i in [0, 1]: # e.g., "O" for objects, "P" for properties cat_short = task_short[i] out_path = "data/results/{}-{}-{}.txt".format("Bert", task_short, cat_short) print("Writing {} results to {}".format(cat_short, out_path)) with open(out_path, "w") as f: for item, cm in category_cms[i]["per-item"].items(): f.write("{} {}\n".format(item, util.np2str(cm)))
def baseline( func: Callable[[List[str], np.ndarray, List[str], Tuple[int, ...]], np.ndarray], name: str, shortname: str, ) -> str: # settings tasks = [ (Task.Abstract_ObjectsProperties, ["object", "property"]), (Task.Situated_ObjectsProperties, ["object", "property"]), (Task.Situated_ObjectsAffordances, ["object", "affordance"]), (Task.Situated_AffordancesProperties, ["affordance", "property"]), ] nums = [] for task, mf1_labs in tasks: logging.info("Running {} baseline for {}".format(name, task.name)) train_data, test_data = data.get(task) labels_train, y_train = train_data labels_test, y_test = test_data y_test_hat = func(labels_train, y_train, labels_test, y_test.shape) _, _, macro_f1s, _, per_datum = metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task]) for mf1_lab in mf1_labs: nums.append(macro_f1s[mf1_lab]) # write full results to file path = os.path.join( "data", "results", "{}-{}-perdatum.txt".format(shortname, TASK_MEDIUMHAND[task]), ) with open(path, "w") as f: f.write(util.np2str(per_datum) + "\n") logging.info("") return name + "," + ",".join(["{:.2f}".format(num) for num in nums])
def epoch( loader: DataLoader, data_len: int, train: bool, split: str, global_i: int, text_only: bool = False ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]], np.ndarray]: """ Returns results of metrics.report(...) """ model.train(train) labels: List[str] = [] total_corr, total_loss, start_idx = 0, 0, 0 epoch_y_hat = np.zeros(data_len, dtype=int) epoch_y = np.zeros(data_len, dtype=int) for batch_i, batch in enumerate(tqdm(loader, desc="Batch")): y = batch["y"].to(device, dtype=torch.half) input_ids = batch["input_ids"].to(device) if not text_only: input_images = batch["input_image"].to(device) labels += batch["label"] batch_size = len(y) # fwd if train: if not text_only: y_hat = model(text=input_ids, image=input_images) else: y_hat = model(text=input_ids) loss = loss_fn(y_hat, y) loss.backward() scheduler.step() optimizer.step() model.zero_grad() global_i += batch_size else: with torch.no_grad(): if not text_only: y_hat = model(text=input_ids, image=input_images) else: y_hat = model(text=input_ids) loss = loss_fn(y_hat, y) batch_decisions = torch.tensor( [int(value >= .5) for value in y_hat]).to(device) batch_corr = (batch_decisions == y).sum().item() total_corr += batch_corr total_loss += loss.item() * batch_size batch_acc = batch_corr / batch_size epoch_y_hat[start_idx:start_idx + batch_size] = (batch_decisions.int().cpu().numpy()) epoch_y[start_idx:start_idx + batch_size] = (y.int().cpu().squeeze().numpy()) # viz per-batch stats for training only if train: viz.add_scalar("Loss/{}".format(split), loss.item(), global_i) viz.add_scalar("Acc/{}".format(split), batch_acc, global_i) start_idx += batch_size # end of batch. always print overall stats. avg_loss = total_loss / data_len overall_acc = total_corr / data_len print("Average {} loss: {}".format(split, avg_loss)) print("{} accuracy: {}".format(split, overall_acc)) # for eval only, viz overall loss and acc if not train: viz.add_scalar("Loss/{}".format(split), avg_loss, global_i) viz.add_scalar("Acc/{}".format(split), overall_acc, global_i) # for both train and eval, compute overall stats. assert len(labels) == len(epoch_y_hat) # code.interact(local=dict(globals(), **locals())) metrics_results = metrics.report(epoch_y_hat, epoch_y, labels, TASK_LABELS[task]) _, micro_f1, category_macro_f1s, _, _ = metrics_results viz.add_scalar("F1/{}/micro".format(split), micro_f1, global_i) for cat, macro_f1 in category_macro_f1s.items(): viz.add_scalar("F1/{}/macro/{}".format(split, cat), macro_f1, global_i) viz.flush() return metrics_results, model
def main() -> None: parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "--task", type=str, choices=TASK_REV_MEDIUMHAND.keys(), help="Name of task to run", required=True, ) parser.add_argument("--epochs", type=int, default=5, help="How many epochs to run") parser.add_argument("--layer", type=int, default=12, help="Which bert layer to run") args = parser.parse_args() task = TASK_REV_MEDIUMHAND[args.task] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") initial_lr = 5e-5 warmup_proportion = 0.1 train_batch_size = 64 test_batch_size = 96 train_epochs = args.epochs print("Building model...") model = mlp(600, 0.0, 128, nn.ReLU, 0.0, 1) logging.info("Model:") logging.info(model) model.to(device) print("Loading traning data") train_dataset = BertDataset(task, True) train_loader = DataLoader( train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8 ) print("Loading test data") test_dataset = BertDataset(task, False) test_loader = DataLoader( test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=8 ) labels: List[str] = [] # training for batch_i, batch in enumerate(tqdm(train_loader, desc="Batch")): input_ids = batch["input_ids"].to(device) y = batch["y"].to(device, dtype=torch.long) loss_fn = nn.MSELoss() optimizer = torch.optim.adam(lr = 0.0001) x = torch.from_numpy(input_ids).float().to(device) y = torch.from_numpy(y).float().to(device) assert x.shape[0] == y.shape[0] # training model.train() batch_y_hat = model(batch_x) loss = loss_fn(batch_y_hat, batch_y) optimizer.zero_grad() loss.backward() optimizer.step() # testing for batch_i, batch in enumerate(tqdm(test_loader, desc="Batch")): input_ids = batch["input_ids"].to(device) y = batch["y"].to(device, dtype=torch.long) x = torch.from_numpy(input_ids).float().to(DEVICE) y = torch.from_numpy(y).float().to(device) model.eval() y_hat = model(x).round().int().cpu().numpy() metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task])