def train(model: nn.Module,
          scheduler,
          optimizer,
          images,
          datasets,
          n_epoches: int,
          batch_size: int,
          eval_valid_freq: int = 1,
          eval_test_freq: int = 3,
          device=None):

    log_dir = inc_folder_no(ROOT_DIR / "runs" / "s02_exp" / "run_")
    writer = SummaryWriter(log_dir=log_dir)
    global_step = 0
    model.train()

    valid_res = {
        'loss': float('nan'),
        'nt_accuracy': float('nan'),
        'nbtn_accuracy': float('nan'),
        'nrow_accuracy': float('nan')
    }
    test_res = {
        'loss': float('nan'),
        'nt_accuracy': float('nan'),
        'nbtn_accuracy': float('nan'),
        'nrow_accuracy': float('nan')
    }
    best_performance = 0.0

    train_examples, valid_examples, test_examples = datasets

    try:
        with tqdm(range(n_epoches),
                  desc='epoch') as epoches, tqdm(total=math.ceil(
                      sum([len(x)
                           for x in train_examples]) / batch_size) * n_epoches,
                                                 desc='training') as pbar:
            for i in epoches:
                scheduler.step()
                for bimgs, bx, bnx, bxlen, bnt, bnbtn, bnrow in iter_batch(
                        batch_size,
                        images,
                        train_examples,
                        shuffle=True,
                        device=device):
                    pbar.update()
                    global_step += 1

                    model.zero_grad()
                    bnt_pred, bnts_pred, bnbtn_pred, bnrow_pred = model(
                        bimgs, bx, bxlen)

                    bnt_acc = (torch.argmax(
                        bnt_pred, dim=1) == bnt).float().mean().item()
                    bnbtn_acc = (torch.argmax(
                        bnbtn_pred, dim=1) == bnbtn).float().mean().item()
                    bnrow_acc = (torch.argmax(
                        bnrow_pred, dim=1) == bnrow).float().mean().item()

                    loss, sublosses = model.loss_func(bnts_pred, bnx, bnt_pred,
                                                      bnt, bnbtn_pred, bnbtn,
                                                      bnrow_pred, bnrow)
                    loss.backward()
                    optimizer.step()

                    writer.add_scalar('train/total_loss', loss, global_step)
                    for j, l in enumerate(sublosses):
                        writer.add_scalar(f'train/loss_{j}', l.item(),
                                          global_step)
                    writer.add_scalar('train/nt_accuracy', bnt_acc,
                                      global_step)
                    writer.add_scalar('train/nbtn_accuracy', bnbtn_acc,
                                      global_step)
                    writer.add_scalar('train/nrow_accuracy', bnrow_acc,
                                      global_step)

                    pbar.set_postfix(loss=f"{loss:.5f}",
                                     nt_accuracy=f"{bnt_acc:.5f}",
                                     nbtn_accuracy=f"{bnbtn_acc:.5f}",
                                     nrow_accuracy=f"{bnrow_acc:.5f}",
                                     **{
                                         f"loss_{j}": f"{l.item():.5f}"
                                         for j, l in enumerate(sublosses)
                                     })

                if (i + 1) % eval_valid_freq == 0:
                    valid_res = eval(model, images, valid_examples, device)
                    for k, v in valid_res.items():
                        writer.add_scalar(f'valid/{k}', v, global_step)

                    if valid_res['nt_accuracy'] > best_performance:
                        best_performance = valid_res['nt_accuracy']
                        torch.save(model, log_dir + f"/model.{i}.bin")

                if (i + 1) % eval_test_freq == 0:
                    test_res = eval(model, images, test_examples, device)
                    for k, v in test_res.items():
                        writer.add_scalar(f'test/{k}', v, global_step)

                epoches.set_postfix(
                    v_l=f'{valid_res["loss"]:.5f}',
                    v_nt_a=f'{valid_res["nt_accuracy"]:.5f}',
                    v_nbtn_a=f'{valid_res["nbtn_accuracy"]:.5f}',
                    v_nrow_a=f'{valid_res["nrow_accuracy"]:.5f}',
                    t_l=f'{test_res["loss"]:.5f}',
                    t_nt_a=f'{test_res["nt_accuracy"]:.5f}',
                    t_nbtn_a=f'{test_res["nbtn_accuracy"]:.5f}',
                    t_nrow_a=f'{test_res["nrow_accuracy"]:.5f}',
                )
    finally:
        writer.close()
示例#2
0
def train(model: nn.Module,
          scheduler,
          optimizer,
          images,
          datasets,
          n_epoches: int,
          batch_size: int,
          eval_valid_freq: int = 1,
          eval_test_freq: int = 3,
          device=None):

    log_dir = inc_folder_no(ROOT_DIR / "runs" / "s04_exp" / "run_")
    writer = SummaryWriter(log_dir=log_dir)
    global_step = 0
    model.train()

    valid_res = {'loss': float('nan'), 'accuracy': float('nan')}
    test_res = {'loss': float('nan'), 'accuracy': float('nan')}
    best_performance = 0.0

    train_examples, valid_examples, test_examples = datasets

    try:
        with tqdm(range(n_epoches),
                  desc='epoch') as epoches, tqdm(total=math.ceil(
                      sum([len(x)
                           for x in train_examples]) / batch_size) * n_epoches,
                                                 desc='training') as pbar:
            for i in epoches:
                scheduler.step()
                for bimgs, bx, bnts, bxlen, bnt, bnbtn, bnrow in iter_batch(
                        batch_size,
                        images,
                        train_examples,
                        shuffle=True,
                        device=device):
                    pbar.update()
                    global_step += 1

                    model.zero_grad()
                    bnts_pred = model(bimgs, bx, bxlen)

                    loss, mask, btokens = padded_aware_nllloss(bnts_pred, bnts)
                    accuracy = ((torch.argmax(bnts_pred, dim=1)
                                 == bnts.view(-1)).float() *
                                mask).sum().item() / btokens

                    loss.backward()
                    optimizer.step()

                    writer.add_scalar('train/loss', loss, global_step)
                    writer.add_scalar('train/accuracy', accuracy, global_step)

                    pbar.set_postfix(loss=f"{loss:.5f}",
                                     accuracy=f"{accuracy:.5f}")

                if (i + 1) % eval_valid_freq == 0:
                    valid_res = eval(model, images, valid_examples, device)
                    for k, v in valid_res.items():
                        writer.add_scalar(f'valid/{k}', v, global_step)

                    if valid_res['accuracy'] > best_performance:
                        best_performance = valid_res['accuracy']
                        torch.save(model, log_dir + f"/model.{i}.bin")

                if (i + 1) % eval_test_freq == 0:
                    test_res = eval(model, images, test_examples, device)
                    for k, v in test_res.items():
                        writer.add_scalar(f'test/{k}', v, global_step)

                epoches.set_postfix(
                    v_l=f'{valid_res["loss"]:.5f}',
                    v_a=f'{valid_res["accuracy"]:.5f}',
                    t_l=f'{test_res["loss"]:.5f}',
                    t_a=f'{test_res["accuracy"]:.5f}',
                )
    finally:
        writer.close()
示例#3
0
def train(model: nn.Module,
          loss_func,
          scheduler,
          optimizer,
          images,
          datasets,
          n_epoches: int,
          batch_size: int,
          eval_valid_freq: int = 1,
          eval_test_freq: int = 3,
          device=None):
    histories = {'train': [], 'valid': [], 'test': []}

    log_dir = inc_folder_no(ROOT_DIR / "runs" / f"s02_exp_")
    writer = SummaryWriter(log_dir=log_dir)
    global_step = 0
    model.train()

    valid_res = {'loss': float('nan'), 'accuracy': float('nan')}
    test_res = {'loss': float('nan'), 'accuracy': float('nan')}
    best_performance = 0.0

    train_examples, valid_examples, test_examples = datasets

    try:
        with tqdm(range(n_epoches),
                  desc='epoch') as epoches, tqdm(total=math.ceil(
                      sum([len(x)
                           for x in train_examples]) / batch_size) * n_epoches,
                                                 desc='training') as pbar:
            for i in epoches:
                scheduler.step()
                for bimgs, bx, by, bxlen in iter_batch(batch_size,
                                                       images,
                                                       train_examples,
                                                       shuffle=True,
                                                       device=device):
                    pbar.update()
                    global_step += 1

                    model.zero_grad()
                    bypred = model(bimgs, bx, bxlen)
                    accuracy = (torch.argmax(
                        bypred, dim=1) == by).float().mean().item()
                    loss = loss_func(bypred, by)
                    loss.backward()
                    optimizer.step()

                    histories['train'].append((loss, accuracy))
                    writer.add_scalar('train/loss', loss, global_step)
                    writer.add_scalar('train/accuracy', accuracy, global_step)
                    pbar.set_postfix(train_loss=f"{loss:.5f}",
                                     train_accuracy=f"{accuracy:.5f}")

                if (i + 1) % eval_valid_freq == 0:
                    valid_res = eval(model, loss_func, images, valid_examples,
                                     device)
                    writer.add_scalar('valid/loss', valid_res['loss'],
                                      global_step)
                    writer.add_scalar('valid/accuracy', valid_res['accuracy'],
                                      global_step)
                    histories['valid'].append(valid_res)

                    if valid_res['accuracy'] > best_performance:
                        best_performance = valid_res['accuracy']
                        torch.save(model, log_dir + f"/model.{i}.bin")

                if (i + 1) % eval_test_freq == 0:
                    test_res = eval(model, loss_func, images, test_examples,
                                    device)
                    writer.add_scalar('test/loss', test_res['loss'],
                                      global_step)
                    writer.add_scalar('test/accuracy', test_res['accuracy'],
                                      global_step)
                    histories['test'].append(test_res)

                epoches.set_postfix(
                    valid_l=f'{valid_res["loss"]:.5f}',
                    valid_a=f'{valid_res["accuracy"]:.5f}',
                    test_l=f'{test_res["loss"]:.5f}',
                    test_a=f'{test_res["accuracy"]:.5f}',
                )
    finally:
        writer.close()
    return histories
def train(model: nn.Module,
          loss_func,
          scheduler,
          optimizer,
          images,
          datasets,
          n_epoches: int,
          batch_size: int,
          clip_grad_val: float,
          eval_batch_size: int = 500,
          eval_valid_freq: int = 1,
          eval_test_freq: int = 3,
          device=None,
          exp_dir: str = "exp"):
    log_dir = inc_folder_no(ROOT_DIR / "runs" / exp_dir / "run_")
    print("log_dir:", log_dir)
    writer = SummaryWriter(log_dir=log_dir)
    global_step = 0
    model.train()

    valid_res = {
        'loss': AverageMeter(),
        'top_1_acc': AverageMeter(),
        'top_3_acc': AverageMeter(),
        'top_5_acc': AverageMeter()
    }
    test_res = {
        'loss': AverageMeter(),
        'top_1_acc': AverageMeter(),
        'top_3_acc': AverageMeter(),
        'top_5_acc': AverageMeter()
    }
    best_performance = 0.0

    train_examples, valid_examples, test_examples = datasets

    try:
        with tqdm(range(n_epoches), desc='epoch---') as epoches, tqdm(
                total=math.ceil(len(train_examples) / batch_size) * n_epoches,
                desc='training-') as pbar:
            for i in epoches:
                scheduler.step()
                batch_loss = AverageMeter()
                batch_top_1_acc = AverageMeter()
                batch_top_3_acc = AverageMeter()

                for bimgs, bx, bnx, bxlen, sorted_idx in iter_batch(
                        batch_size,
                        images,
                        train_examples,
                        shuffle=True,
                        device=device):
                    pbar.update()
                    global_step += 1

                    model.zero_grad()

                    bnx_pred = model(bimgs, bx, bxlen)
                    bnx_pred, _ = pack_padded_sequence(bnx_pred,
                                                       bxlen,
                                                       batch_first=True)
                    bnx, _ = pack_padded_sequence(bnx, bxlen, batch_first=True)

                    loss = loss_func(bnx_pred, bnx)
                    loss.backward()
                    clip_grad_value_(model.parameters(),
                                     clip_grad_val)  # prevent explode
                    optimizer.step()

                    loss = float(loss)
                    top_k_acc = seq_accuracy(bnx_pred, bnx, [1, 3])

                    writer.add_scalar('train/loss', loss, global_step)
                    writer.add_scalar('train/top_1_acc', top_k_acc[0],
                                      global_step)
                    writer.add_scalar('train/top_3_acc', top_k_acc[1],
                                      global_step)

                    n_tokens = bnx.shape[0]
                    batch_loss.update(loss, n_tokens)
                    batch_top_1_acc.update(top_k_acc[0], n_tokens)
                    batch_top_3_acc.update(top_k_acc[1], n_tokens)

                    pbar.set_postfix(loss=f"{loss:.5f}",
                                     top_1_acc=f"{top_k_acc[0]:.5f}",
                                     top_3_acc=f"{top_k_acc[1]:.5f}")

                if (i + 1) % eval_valid_freq == 0:
                    valid_res = evaluate(model,
                                         loss_func,
                                         images,
                                         valid_examples,
                                         device,
                                         batch_size=eval_batch_size)
                    for k, v in valid_res.items():
                        writer.add_scalar(f'valid/{k}', v.avg, i)

                    if valid_res['top_1_acc'].avg > best_performance:
                        best_performance = valid_res['top_1_acc'].avg
                        torch.save(
                            {
                                "epoch": i,
                                "model": model.state_dict(),
                                "optimizer": optimizer.state_dict(),
                            }, log_dir + f"/model.bin")

                if (i + 1) % eval_test_freq == 0:
                    test_res = evaluate(model,
                                        loss_func,
                                        images,
                                        test_examples,
                                        device,
                                        batch_size=eval_batch_size)
                    for k, v in test_res.items():
                        writer.add_scalar(f'test/{k}', v.avg, i)

                epoches.set_postfix(
                    b_l=f'{batch_loss.avg:.5f}',
                    b_a1=f'{batch_top_1_acc.avg:.5f}',
                    b_a3=f'{batch_top_3_acc.avg:.5f}',
                    v_l=f'{valid_res["loss"].avg:.5f}',
                    v_a1=f'{valid_res["top_1_acc"].avg:.5f}',
                    v_a3=f'{valid_res["top_3_acc"].avg:.5f}',
                    v_a5=f'{valid_res["top_5_acc"].avg:.5f}',
                    t_l=f'{test_res["loss"].avg:.5f}',
                    t_a1=f'{test_res["top_1_acc"].avg:.5f}',
                    t_a3=f'{test_res["top_3_acc"].avg:.5f}',
                    t_a5=f'{test_res["top_5_acc"].avg:.5f}',
                )
    finally:
        writer.close()
示例#5
0
def train(model: nn.Module,
          loss_func1,
          loss_func2,
          scheduler,
          optimizer,
          datasets,
          n_epoches: int,
          batch_size: int,
          device=None):
    task1_histories = {'train': [], 'valid': [], 'test': []}
    task2_histories = {'train': [], 'valid': [], 'test': []}

    train_X, train_y1, train_y2 = datasets['train']
    valid_X, valid_y1, valid_y2 = datasets['valid']
    test_X, test_y1, test_y2 = datasets['test']

    writer = SummaryWriter(log_dir=inc_folder_no(ROOT_DIR / "runs" /
                                                 f"s01_exp_"))
    global_step = 0

    try:
        with tqdm(range(n_epoches), desc='epoch') as epoches, tqdm(
                total=math.ceil(len(train_X) / batch_size) * n_epoches,
                desc='training') as pbar:
            for i in epoches:
                scheduler.step()
                for bx, by1, by2 in iter_batch(batch_size,
                                               train_X,
                                               train_y1,
                                               train_y2,
                                               shuffle=True,
                                               device=device):
                    pbar.update()

                    global_step += 1

                    model.zero_grad()
                    by1_pred = model.forward_task1(bx)
                    loss1 = loss_func1(by1_pred, by1)
                    loss1.backward()
                    optimizer.step()

                    model.zero_grad()
                    by2_pred = model.forward_task2(bx)
                    loss2 = loss_func2(by2_pred, by2)
                    loss2.backward()
                    optimizer.step()

                    task1_histories['train'].append(loss1)
                    task2_histories['train'].append(loss2)
                    writer.add_scalar('train/loss1', loss1, global_step)
                    writer.add_scalar('train/loss2', loss2, global_step)

                    pbar.set_postfix(train_loss1=f"{loss1:.5f}",
                                     train_loss2=f'{loss2:.5f}')

                valid_res = eval(model, loss_func1, loss_func2, valid_X,
                                 valid_y1, valid_y2, device)

                epoches.set_postfix()
                writer.add_scalar('valid/loss1', valid_res['task1_loss'],
                                  global_step)
                writer.add_scalar('valid/loss2', valid_res['task2_loss'],
                                  global_step)
                writer.add_scalar('valid/accuracies1',
                                  valid_res['task1_accuracies'], global_step)
                writer.add_scalar('valid/accuracies2',
                                  valid_res['task2_accuracies'], global_step)

                test_res = eval(model, loss_func1, loss_func2, test_X, test_y1,
                                test_y2, device)

                writer.add_scalar('test/loss1', test_res['task1_loss'],
                                  global_step)
                writer.add_scalar('test/loss2', test_res['task2_loss'],
                                  global_step)
                writer.add_scalar('test/accuracies1',
                                  test_res['task1_accuracies'], global_step)
                writer.add_scalar('test/accuracies2',
                                  test_res['task2_accuracies'], global_step)

                task1_histories['valid'].append(valid_res)
                task2_histories['test'].append(test_res)

                epoches.set_postfix(
                    valid_l1=f'{valid_res["task1_loss"]:.5f}',
                    valid_l2=f'{valid_res["task2_loss"]:.5f}',
                    valid_a1=f'{valid_res["task1_accuracies"]:.5f}',
                    valid_a2=f'{valid_res["task2_accuracies"]:.5f}',
                    test_l1=f'{test_res["task1_loss"]:.5f}',
                    test_l2=f'{test_res["task2_loss"]:.5f}',
                    test_a1=f'{test_res["task1_accuracies"]:.5f}',
                    test_a2=f'{test_res["task2_accuracies"]:.5f}',
                )
    finally:
        writer.close()
    return task1_histories, task2_histories