def evaluate_accuracy(model, data, batch_size, gpu, debug=0): n = float(len(data)) correct = 0 num_1s = 0 correct_1 = 0 false_negative = 0 for batch in chunked_sorted(data, batch_size): batch_obj = Batch([x for x, y in batch], model.embeddings, to_cuda(gpu)) gold = [y for x, y in batch] predicted = model.predict(batch_obj, debug) num_1s += predicted.count(1) correct += sum(1 for pred, gold in zip(predicted, gold) if pred == gold) correct_1 += sum(1 for pred, gold in zip(predicted, gold) if pred == gold and gold == 1) false_negative += sum(1 for pred, gold in zip(predicted, gold) if pred != gold and gold == 1) precision = correct_1 / max(num_1s, 1) recall = correct_1 / (false_negative + correct_1) print("num predicted 1s:", num_1s) print("num gold 1s: ", sum(gold == 1 for _, gold in data)) print("precision", precision) print("recall", recall) print("f1 score", 2 * precision * recall / max(precision + recall, 1)) return correct / n
def evaluate_accuracy(model, data, batch_size, gpu, debug=0): n = float(len(data)) correct = 0 num_1s = 0 for batch in chunked_sorted(data, batch_size): batch_obj = Batch([x for x, y in batch], model.embeddings, to_cuda(gpu)) gold = [y for x, y in batch] predicted = model.predict(batch_obj, debug) num_1s += predicted.count(1) correct += sum(1 for pred, gold in zip(predicted, gold) if pred == gold) print("num predicted 1s:", num_1s) print("num gold 1s: ", sum(gold == 1 for _, gold in data)) return correct / n
def test_same_forward_for_diff_batches(self): """ Test that different batch sizes yield same `forward` results """ # for each batch size, chunk data into batches, run model.forward, # then flatten results into a list (one NUM_CLASSESx1 vec per doc). for model in self.models: forward_results = [ [ fwd for chunk in chunked_sorted(self.data, batch_size) for fwd in model.forward(Batch([x for x, y in chunk], self.embeddings, to_cuda(GPU))).data ] for batch_size in self.batch_sizes ] # transpose, so doc_forwards are all the diff batch sizes for a given doc for doc_forwards in zip(*forward_results): # make sure adjacent batch sizes predict the same probs for batch_size_a, batch_size_b in zip(doc_forwards, doc_forwards[1:]): for y in range(NUM_CLASSES): self.assertAlmostEqual(batch_size_a[y], batch_size_b[y], places=4)
def train(train_data, dev_data, model, num_classes, model_save_dir, num_iterations, model_file_prefix, learning_rate, batch_size, run_scheduler=False, gpu=False, clip=None, max_len=-1, debug=0, dropout=0, word_dropout=0, patience=1000): """ Train a model on all the given docs """ optimizer = Adam(model.parameters(), lr=learning_rate) loss_function = NLLLoss(None, False) enable_gradient_clipping(model, clip) if dropout: dropout = torch.nn.Dropout(dropout) else: dropout = None debug_print = int(100 / batch_size) + 1 writer = None if model_save_dir is not None: writer = SummaryWriter(os.path.join(model_save_dir, "logs")) if run_scheduler: scheduler = ReduceLROnPlateau(optimizer, 'min', 0.1, 10, True) best_dev_loss = 100000000 best_dev_loss_index = -1 best_dev_acc = -1 start_time = monotonic() for it in range(num_iterations): np.random.shuffle(train_data) loss = 0.0 i = 0 for batch in shuffled_chunked_sorted(train_data, batch_size): batch_obj = Batch([x[0] for x in batch], model.embeddings, to_cuda(gpu), word_dropout, max_len) gold = [x[1] for x in batch] loss += torch.sum( train_batch(model, batch_obj, num_classes, gold, optimizer, loss_function, gpu, debug, dropout)) if i % debug_print == (debug_print - 1): print(".", end="", flush=True) i += 1 if writer is not None: for name, param in model.named_parameters(): writer.add_scalar("parameter_mean/" + name, param.data.mean(), it) writer.add_scalar("parameter_std/" + name, param.data.std(), it) if param.grad is not None: writer.add_scalar("gradient_mean/" + name, param.grad.data.mean(), it) writer.add_scalar("gradient_std/" + name, param.grad.data.std(), it) writer.add_scalar("loss/loss_train", loss, it) dev_loss = 0.0 i = 0 for batch in chunked_sorted(dev_data, batch_size): batch_obj = Batch([x[0] for x in batch], model.embeddings, to_cuda(gpu)) gold = [x[1] for x in batch] dev_loss += torch.sum( compute_loss(model, batch_obj, num_classes, gold, loss_function, gpu, debug).data) if i % debug_print == (debug_print - 1): print(".", end="", flush=True) i += 1 if writer is not None: writer.add_scalar("loss/loss_dev", dev_loss, it) print("\n") finish_iter_time = monotonic() train_acc = evaluate_accuracy(model, train_data[:1000], batch_size, gpu) dev_acc = evaluate_accuracy(model, dev_data, batch_size, gpu) print( "iteration: {:>7,} train time: {:>9,.3f}m, eval time: {:>9,.3f}m " "train loss: {:>12,.3f} train_acc: {:>8,.3f}% " "dev loss: {:>12,.3f} dev_acc: {:>8,.3f}%".format( it, (finish_iter_time - start_time) / 60, (monotonic() - finish_iter_time) / 60, loss / len(train_data), train_acc * 100, dev_loss / len(dev_data), dev_acc * 100)) if dev_loss < best_dev_loss: if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("New best acc!") print("New best dev!") best_dev_loss = dev_loss best_dev_loss_index = 0 if model_save_dir is not None: model_save_file = os.path.join( model_save_dir, "{}_{}.pth".format(model_file_prefix, it)) print("saving model to", model_save_file) torch.save(model.state_dict(), model_save_file) else: best_dev_loss_index += 1 if best_dev_loss_index == patience: print("Reached", patience, "iterations without improving dev loss. Breaking") break if dev_acc > best_dev_acc: best_dev_acc = dev_acc print("New best acc!") if model_save_dir is not None: model_save_file = os.path.join( model_save_dir, "{}_{}.pth".format(model_file_prefix, it)) print("saving model to", model_save_file) torch.save(model.state_dict(), model_save_file) if run_scheduler: scheduler.step(dev_loss) return model