Exemplo n.º 1
0
def cv(task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any]) -> None:
    """Run cross validation (for hyperparameter selection)."""
    logging.info("Running cross validation for {}, {}".format(task.name, variant.name))
    # get data. cv uses train data only.
    train_data, _ = data.get(task)
    labels, y_np = train_data
    labels_np = np.array(labels)
    x_np = data.features(task, variant, labels)

    # run k-fold cross validation
    folder = model_selection.KFold(n_splits=5, shuffle=True)
    overall_y_hat = np.zeros_like(y_np)
    for i, (train_index, test_index) in enumerate(folder.split(x_np)):
        # logging.info("Fold {}".format(i))
        x_train, x_test = x_np[train_index], x_np[test_index]
        y_train, y_test = y_np[train_index], y_np[test_index]
        labels_train, labels_test = (labels_np[train_index], labels_np[test_index])

        centering = train(model, x_train, y_train, config)
        y_test_hat = test(model, x_test, y_test, centering, config)

        # Uncomment the next line to report on individual folds.
        # metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task])

        # Save results into overall aggregate.
        overall_y_hat[test_index] = y_test_hat

    # Report on overall results.
    metrics.report(overall_y_hat, y_np, labels, data.TASK_LABELS[task])
Exemplo n.º 2
0
def train_and_test(
    task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any]
) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]:
    """Run a final train + test run over a task."""
    logging.info("Running train+test for {}, {}".format(task.name, variant.name))
    # get data. cv uses train data only.
    train_data, test_data = data.get(task)

    # train
    labels_train, y_train_np = train_data
    train_label_to_y = {}
    for label, y in zip(labels_train, y_train_np):
        train_label_to_y[label] = y
    labels_train_unique = sorted(list(set(labels_train))) 
    y_train = [train_label_to_y[label] for label in labels_train_unique]
    x_train_np = data.features(task, variant, labels_train_unique)
    centering = train(model, x_train_np, y_train, config)

    # test
    labels_test, y_test_np = test_data
    test_label_to_y = {}
    for label, y in zip(labels_test, y_test_np):
        test_label_to_y[label] = y
    labels_test_unique = sorted(list(set(labels_test))) 
    y_test = [test_label_to_y[label] for label in labels_test_unique]
    x_test_np = data.features(task, variant, labels_test_unique)
    y_test_hat = test(model, x_test_np, y_test, centering, config)
    return metrics.report(y_test_hat, y_test, labels_test_unique, data.TASK_LABELS[task])
Exemplo n.º 3
0
def evaluate_round1() -> None:
    """This is run to score human annotations."""
    phase = "round1"
    lim = 50
    for task in Task:
        logging.info(task)
        gold_path = os.path.join(
            "data", "human", "{}-{}-gold.txt".format(TASK_MEDIUMHAND[task],
                                                     phase))
        gold_data = get_gold(gold_path, lim=lim)

        label_path = os.path.join(
            "data", "human", "{}-{}-labels.txt".format(TASK_MEDIUMHAND[task],
                                                       phase))
        labels = get_labels(label_path, lim=lim)

        ann_path = os.path.join(
            "data",
            "human",
            "{}-{}-annotations-first50.csv".format(TASK_MEDIUMHAND[task],
                                                   phase),
        )
        ann_data = get_anns(ann_path, lim=lim)

        task_labels = data.TASK_LABELS[task]

        acc, micro_f1, macro_f1s, category_cms, per_datum = metrics.report(
            ann_data, gold_data, labels, task_labels)
Exemplo n.º 4
0
def train_and_test(
    task: Task, variant: Variant, model: nn.Module, config: Dict[str, Any]
) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]]]:
    """Run a final train + test run over a task."""
    logging.info("Running train+test for {}, {}".format(
        task.name, variant.name))
    # get data. cv uses train data only.
    train_data, test_data = data.get(task)

    # train
    labels_train, y_train_np = train_data
    x_train_np = data.features(task, variant, labels_train)
    centering = train(model, x_train_np, y_train_np, config)

    # test
    labels_test, y_test_np = test_data
    x_test_np = data.features(task, variant, labels_test)
    y_test_hat = test(model, x_test_np, y_test_np, centering, config)
    return metrics.report(y_test_hat, y_test_np, labels_test,
                          data.TASK_LABELS[task])
Exemplo n.º 5
0
def main() -> None:
    # settings. (onetime use so no flags.)
    perdatum_path = "data/results/Bert-situated-AP-perdatum.txt"
    task = data.Task.Situated_AffordancesProperties

    # load per-datum output
    with open(perdatum_path, "r") as f:
        perdatum = util.str2np(f.read())

    # get test data: labels and groundtruth y-values
    _, test_data = data.get(task)
    labels, y = test_data
    y = y.squeeze()

    # the per-datum is y_hat == y. we want to recover y_hat so we can pass it back into
    # metrics to easily re-compute everything we need. probably a vectorized op that can
    # do this but oh well.
    y_hat = np.zeros_like(y)
    for i in range(len(y)):
        y_hat[i] = y[i] if perdatum[i] else 1 - y[i]

    # sanity check
    assert len(labels) == len(y_hat)
    assert len(y) == len(y_hat)

    _, _, _, category_cms, _ = metrics.report(y_hat, y, labels,
                                              data.TASK_LABELS[task])

    # write out
    task_short = data.TASK_SHORTHAND[task]
    for i in [0, 1]:
        # e.g., "O" for objects, "P" for properties
        cat_short = task_short[i]
        out_path = "data/results/{}-{}-{}.txt".format("Bert", task_short,
                                                      cat_short)
        print("Writing {} results to {}".format(cat_short, out_path))
        with open(out_path, "w") as f:
            for item, cm in category_cms[i]["per-item"].items():
                f.write("{} {}\n".format(item, util.np2str(cm)))
Exemplo n.º 6
0
def baseline(
    func: Callable[[List[str], np.ndarray, List[str], Tuple[int, ...]],
                   np.ndarray],
    name: str,
    shortname: str,
) -> str:
    # settings
    tasks = [
        (Task.Abstract_ObjectsProperties, ["object", "property"]),
        (Task.Situated_ObjectsProperties, ["object", "property"]),
        (Task.Situated_ObjectsAffordances, ["object", "affordance"]),
        (Task.Situated_AffordancesProperties, ["affordance", "property"]),
    ]
    nums = []
    for task, mf1_labs in tasks:
        logging.info("Running {} baseline for {}".format(name, task.name))
        train_data, test_data = data.get(task)
        labels_train, y_train = train_data
        labels_test, y_test = test_data
        y_test_hat = func(labels_train, y_train, labels_test, y_test.shape)
        _, _, macro_f1s, _, per_datum = metrics.report(y_test_hat, y_test,
                                                       labels_test,
                                                       data.TASK_LABELS[task])
        for mf1_lab in mf1_labs:
            nums.append(macro_f1s[mf1_lab])

        # write full results to file
        path = os.path.join(
            "data",
            "results",
            "{}-{}-perdatum.txt".format(shortname, TASK_MEDIUMHAND[task]),
        )
        with open(path, "w") as f:
            f.write(util.np2str(per_datum) + "\n")

    logging.info("")
    return name + "," + ",".join(["{:.2f}".format(num) for num in nums])
Exemplo n.º 7
0
    def epoch(
        loader: DataLoader,
        data_len: int,
        train: bool,
        split: str,
        global_i: int,
        text_only: bool = False
    ) -> Tuple[float, float, Dict[str, float], Dict[int, Dict[str, Any]],
               np.ndarray]:
        """
        Returns results of metrics.report(...)
        """
        model.train(train)
        labels: List[str] = []
        total_corr, total_loss, start_idx = 0, 0, 0
        epoch_y_hat = np.zeros(data_len, dtype=int)
        epoch_y = np.zeros(data_len, dtype=int)

        for batch_i, batch in enumerate(tqdm(loader, desc="Batch")):
            y = batch["y"].to(device, dtype=torch.half)
            input_ids = batch["input_ids"].to(device)

            if not text_only:
                input_images = batch["input_image"].to(device)

            labels += batch["label"]
            batch_size = len(y)

            # fwd
            if train:
                if not text_only:
                    y_hat = model(text=input_ids, image=input_images)
                else:
                    y_hat = model(text=input_ids)

                loss = loss_fn(y_hat, y)
                loss.backward()
                scheduler.step()
                optimizer.step()
                model.zero_grad()
                global_i += batch_size
            else:
                with torch.no_grad():
                    if not text_only:
                        y_hat = model(text=input_ids, image=input_images)
                    else:
                        y_hat = model(text=input_ids)

                    loss = loss_fn(y_hat, y)

            batch_decisions = torch.tensor(
                [int(value >= .5) for value in y_hat]).to(device)
            batch_corr = (batch_decisions == y).sum().item()
            total_corr += batch_corr
            total_loss += loss.item() * batch_size
            batch_acc = batch_corr / batch_size

            epoch_y_hat[start_idx:start_idx +
                        batch_size] = (batch_decisions.int().cpu().numpy())
            epoch_y[start_idx:start_idx +
                    batch_size] = (y.int().cpu().squeeze().numpy())

            # viz per-batch stats for training only
            if train:
                viz.add_scalar("Loss/{}".format(split), loss.item(), global_i)
                viz.add_scalar("Acc/{}".format(split), batch_acc, global_i)

            start_idx += batch_size

        # end of batch. always print overall stats.
        avg_loss = total_loss / data_len
        overall_acc = total_corr / data_len
        print("Average {} loss: {}".format(split, avg_loss))
        print("{} accuracy: {}".format(split, overall_acc))

        # for eval only, viz overall loss and acc
        if not train:
            viz.add_scalar("Loss/{}".format(split), avg_loss, global_i)
            viz.add_scalar("Acc/{}".format(split), overall_acc, global_i)

        # for both train and eval, compute overall stats.
        assert len(labels) == len(epoch_y_hat)
        # code.interact(local=dict(globals(), **locals()))
        metrics_results = metrics.report(epoch_y_hat, epoch_y, labels,
                                         TASK_LABELS[task])
        _, micro_f1, category_macro_f1s, _, _ = metrics_results
        viz.add_scalar("F1/{}/micro".format(split), micro_f1, global_i)
        for cat, macro_f1 in category_macro_f1s.items():
            viz.add_scalar("F1/{}/macro/{}".format(split, cat), macro_f1,
                           global_i)
        viz.flush()

        return metrics_results, model
Exemplo n.º 8
0
def main() -> None:
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        "--task",
        type=str,
        choices=TASK_REV_MEDIUMHAND.keys(),
        help="Name of task to run",
        required=True,
    )
    parser.add_argument("--epochs", type=int, default=5, help="How many epochs to run")
    parser.add_argument("--layer", type=int, default=12, help="Which bert layer to run")
    args = parser.parse_args()
    task = TASK_REV_MEDIUMHAND[args.task]


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    initial_lr = 5e-5
    warmup_proportion = 0.1
    train_batch_size = 64
    test_batch_size = 96
    train_epochs = args.epochs

    print("Building model...")
    model = mlp(600, 0.0, 128, nn.ReLU, 0.0, 1)
    logging.info("Model:")
    logging.info(model)
    model.to(device)

    print("Loading traning data")
    train_dataset = BertDataset(task, True)
    train_loader = DataLoader(
        train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8
    )
    print("Loading test data")
    test_dataset = BertDataset(task, False)
    test_loader = DataLoader(
        test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=8
    )

    labels: List[str] = []
    # training
    for batch_i, batch in enumerate(tqdm(train_loader, desc="Batch")):
        input_ids = batch["input_ids"].to(device)
        y = batch["y"].to(device, dtype=torch.long)

        loss_fn = nn.MSELoss()
        optimizer = torch.optim.adam(lr = 0.0001)

        x = torch.from_numpy(input_ids).float().to(device)
        y = torch.from_numpy(y).float().to(device)
        assert x.shape[0] == y.shape[0]

        # training
        model.train()
        batch_y_hat = model(batch_x)
        loss = loss_fn(batch_y_hat, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # testing
    for batch_i, batch in enumerate(tqdm(test_loader, desc="Batch")):
        input_ids = batch["input_ids"].to(device)
        y = batch["y"].to(device, dtype=torch.long)
        x = torch.from_numpy(input_ids).float().to(DEVICE)
        y = torch.from_numpy(y).float().to(device)

        model.eval()
        y_hat = model(x).round().int().cpu().numpy()

        metrics.report(y_test_hat, y_test, labels_test, data.TASK_LABELS[task])