예제 #1
0
def train(train_loader, valid_loader, model, w_optim, lr, epoch, sample,
          net_crit, train_meter):

    cur_step = epoch * len(train_loader)
    writer.add_scalar('train/lr', lr, cur_step)

    model.train()
    train_meter.iter_tic()
    scaler = torch.cuda.amp.GradScaler() if cfg.SEARCH.AMP & hasattr(
        torch.cuda.amp, 'autocast') else None

    for step, (trn_X, trn_y) in enumerate(train_loader):
        trn_X, trn_y = trn_X.cuda(), trn_y.cuda()
        # phase 1. child network step (w)
        if scaler is not None:
            with torch.cuda.amp.autocast():
                # Perform the forward pass in AMP
                preds = model(trn_X, sample)
                # Compute the loss in AMP
                loss = net_crit(preds, trn_y)
                # Perform the backward pass in AMP
                w_optim.zero_grad()
                scaler.scale(loss).backward()
                scaler.step(w_optim)
                # Updates the scale for next iteration.
                scaler.update()
        else:
            preds = model(trn_X, sample)
            # Compute the loss
            loss = net_crit(preds, trn_y)
            # Perform the backward pass
            w_optim.zero_grad()
            loss.backward()
            # gradient clipping
            nn.utils.clip_grad_norm_(model.parameters(), cfg.OPTIM.GRAD_CLIP)
            # Update the parameters
            w_optim.step()

        # Compute the errors
        top1_err, top5_err = meters.topk_errors(preds, trn_y, [1, 5])
        # Copy the stats from GPU to CPU (sync point)
        loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item(
        )
        train_meter.iter_toc()
        # Update and log stats
        mb_size = trn_X.size(0) * cfg.NUM_GPUS
        train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size)
        train_meter.log_iter_stats(epoch, step)
        train_meter.iter_tic()
        # write to tensorboard
        writer.add_scalar('train/loss', loss, cur_step)
        writer.add_scalar('train/top1_error', top1_err, cur_step)
        writer.add_scalar('train/top5_error', top5_err, cur_step)
        cur_step += 1

    # Log epoch stats
    train_meter.log_epoch_stats(epoch)
    train_meter.reset()
예제 #2
0
def train_epoch(train_loader, model, loss_fun, optimizer, train_meter,
                cur_epoch):
    """Performs one epoch of training."""
    # Shuffle the data
    loader.shuffle(train_loader, cur_epoch)
    # Update the learning rate
    lr = optim.get_epoch_lr(cur_epoch)
    optim.set_lr(optimizer, lr)
    # Enable training mode
    model.train()
    train_meter.iter_tic()
    # scale the grad in amp, amp only support the newest version
    scaler = torch.cuda.amp.GradScaler() if cfg.TRAIN.AMP & hasattr(
        torch.cuda.amp, 'autocast') else None
    for cur_iter, (inputs, labels) in enumerate(train_loader):
        # Transfer the data to the current GPU device
        inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
        # using AMP
        if scaler is not None:
            with torch.cuda.amp.autocast():
                # Perform the forward pass in AMP
                preds = model(inputs)
                # Compute the loss in AMP
                loss = loss_fun(preds, labels)
                # Perform the backward pass in AMP
                optimizer.zero_grad()
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                # Updates the scale for next iteration.
                scaler.update()
        else:
            preds = model(inputs)
            # Compute the loss
            loss = loss_fun(preds, labels)
            # Perform the backward pass
            optimizer.zero_grad()
            loss.backward()
            # Update the parameters
            optimizer.step()
        # Compute the errors
        top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5])
        # Combine the stats across the GPUs (no reduction if 1 GPU used)
        loss, top1_err, top5_err = dist.scaled_all_reduce(
            [loss, top1_err, top5_err])
        # Copy the stats from GPU to CPU (sync point)
        loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item(
        )
        train_meter.iter_toc()
        # Update and log stats
        mb_size = inputs.size(0) * cfg.NUM_GPUS
        train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size)
        train_meter.log_iter_stats(cur_epoch, cur_iter)
        train_meter.iter_tic()
    # Log epoch stats
    train_meter.log_epoch_stats(cur_epoch)
    train_meter.reset()
예제 #3
0
def train_epoch(train_loader, model, optimizer, criterion, cur_epoch,
                train_meter):

    # TODO: DALI backend support
    # if config.data_loader_type == 'DALI':
    #     len_train_loader = get_train_loader_len(config.dataset.lower(), config.batch_size, is_train=True)
    # else:
    #     len_train_loader = len(train_loader)
    model.train()
    train_meter.iter_tic()
    cur_step = cur_epoch * len(train_loader)
    cur_lr = optimizer.param_groups[0]['lr']
    writer.add_scalar('train/lr', cur_lr, cur_step)

    # TODO: DALI backend support
    # if config.data_loader_type == 'DALI':
    #     for cur_iter, data in enumerate(train_loader):
    #         X = data[0]["data"].cuda(non_blocking=True)
    #         y = data[0]["label"].squeeze().long().cuda(non_blocking=True)
    #         if config.cutout_length > 0:
    #             X = cutout_batch(X, config.cutout_length)
    #         train_iter(X, y)
    #         cur_step += 1
    #     train_loader.reset()
    for cur_iter, (X, y) in enumerate(train_loader):
        X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad()
        logits, aux_logits = model(X)
        loss = criterion(logits, y)
        if cfg.TRAIN.AUX_WEIGHT > 0.:
            loss += cfg.TRAIN.AUX_WEIGHT * criterion(aux_logits, y)
        loss.backward()

        # gradient clipping
        nn.utils.clip_grad_norm_(model.parameters(), cfg.OPTIM.GRAD_CLIP)
        optimizer.step()

        top1_err, top5_err = meters.topk_errors(logits, y, [1, 5])
        # Copy the stats from GPU to CPU (sync point)
        loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item(
        )
        train_meter.iter_toc()
        # Update and log stats
        mb_size = X.size(0) * cfg.NUM_GPUS
        train_meter.update_stats(top1_err, top5_err, loss, cur_lr, mb_size)
        train_meter.log_iter_stats(cur_epoch, cur_iter)
        train_meter.iter_tic()
        # write to tensorboard
        writer.add_scalar('train/loss', loss, cur_step)
        writer.add_scalar('train/top1_error', top1_err, cur_step)
        writer.add_scalar('train/top5_error', top5_err, cur_step)
        cur_step += 1
    # Log epoch stats
    train_meter.log_epoch_stats(cur_epoch)
    train_meter.reset()
예제 #4
0
def test_epoch(
    test_loader,
    model,
    test_meter,
    cur_epoch,
    sample,
    tensorboard_writer=None,
):
    """Evaluates the model on the test set."""
    # Enable eval mode
    model.eval()
    test_meter.iter_tic()

    for cur_iter, (inputs, labels) in enumerate(test_loader):
        # Transfer the data to the current GPU device
        inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True)
        # using AMP
        if cfg.SEARCH.AMP & hasattr(torch.cuda.amp, 'autocast'):
            with torch.cuda.amp.autocast():
                # Compute the predictions
                preds = model(inputs, sample)
        else:
            # Compute the predictions
            preds = model(inputs, sample)
        # Compute the errors
        top1_err, top5_err = meters.topk_errors(preds, labels, [1, 5])
        # Combine the errors across the GPUs  (no reduction if 1 GPU used)
        # top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err])
        # Copy the errors from GPU to CPU (sync point)
        top1_err, top5_err = top1_err.item(), top5_err.item()
        test_meter.iter_toc()
        # Update and log stats
        test_meter.update_stats(top1_err, top5_err,
                                inputs.size(0) * cfg.NUM_GPUS)
        test_meter.log_iter_stats(cur_epoch, cur_iter)
        test_meter.iter_tic()
    top1_err = test_meter.mb_top1_err.get_win_median()
    if tensorboard_writer is not None:
        tensorboard_writer.add_scalar('val/top1_error',
                                      test_meter.mb_top1_err.get_win_median(),
                                      cur_epoch)
        tensorboard_writer.add_scalar('val/top5_error',
                                      test_meter.mb_top5_err.get_win_median(),
                                      cur_epoch)
    # Log epoch stats
    test_meter.log_epoch_stats(cur_epoch)
    test_meter.reset()
    return top1_err
예제 #5
0
def valid_epoch(valid_loader, model, criterion, cur_epoch, cur_step,
                valid_meter):
    model.eval()
    valid_meter.iter_tic()
    for cur_iter, (X, y) in enumerate(valid_loader):
        X, y = X.to(device, non_blocking=True), y.to(device, non_blocking=True)
        logits, _ = model(X)
        loss = criterion(logits, y)
        # Compute the errors
        top1_err, top5_err = meters.topk_errors(logits, y, [1, 5])
        # Combine the errors across the GPUs  (no reduction if 1 GPU used)
        # NOTE: this line is disabled before.
        top1_err, top5_err = dist.scaled_all_reduce([top1_err, top5_err])
        # Copy the errors from GPU to CPU (sync point)
        top1_err, top5_err = top1_err.item(), top5_err.item()
        valid_meter.iter_toc()
        # Update and log stats
        valid_meter.update_stats(top1_err, top5_err, X.size(0) * cfg.NUM_GPUS)
        valid_meter.log_iter_stats(cur_epoch, cur_iter)
        valid_meter.iter_tic()
    top1_err = valid_meter.mb_top1_err.get_win_median()
    valid_meter.log_epoch_stats(cur_epoch)
    valid_meter.reset()
    return top1_err
예제 #6
0
def train_epoch(train_loader, valid_loader, model, architect, loss_fun,
                w_optimizer, alpha_optimizer, lr, train_meter, cur_epoch):
    model.train()
    train_meter.iter_tic()
    cur_step = cur_epoch * len(train_loader)
    writer.add_scalar('train/lr', lr, cur_step)
    # scale the grad in amp, amp only support the newest version
    scaler = torch.cuda.amp.GradScaler() if cfg.SEARCH.AMP & hasattr(
        torch.cuda.amp, 'autocast') else None
    valid_loader_iter = iter(valid_loader)
    for cur_iter, (trn_X, trn_y) in enumerate(train_loader):
        try:
            (val_X, val_y) = next(valid_loader_iter)
        except StopIteration:
            valid_loader_iter = iter(valid_loader)
            (val_X, val_y) = next(valid_loader_iter)
        # Transfer the data to the current GPU device
        trn_X, trn_y = trn_X.cuda(), trn_y.cuda(non_blocking=True)
        val_X, val_y = val_X.cuda(), val_y.cuda(non_blocking=True)
        # phase 2. architect step (alpha)
        alpha_optimizer.zero_grad()
        architect.unrolled_backward(trn_X,
                                    trn_y,
                                    val_X,
                                    val_y,
                                    lr,
                                    w_optimizer,
                                    unrolled=cfg.DARTS.SECOND)
        alpha_optimizer.step()

        # phase 1. child network step (w)
        if scaler is not None:
            with torch.cuda.amp.autocast():
                # Perform the forward pass in AMP
                preds = model(trn_X)
                # Compute the loss in AMP
                loss = loss_fun(preds, trn_y)
                # Perform the backward pass in AMP
                w_optimizer.zero_grad()
                scaler.scale(loss).backward()
                scaler.step(w_optimizer)
                # Updates the scale for next iteration.
                scaler.update()
        else:
            preds = model(trn_X)
            # Compute the loss
            loss = loss_fun(preds, trn_y)
            # Perform the backward pass
            w_optimizer.zero_grad()
            loss.backward()
            # gradient clipping
            nn.utils.clip_grad_norm_(model.weights(), cfg.OPTIM.GRAD_CLIP)
            # Update the parameters
            w_optimizer.step()
        # Compute the errors
        top1_err, top5_err = meters.topk_errors(preds, trn_y, [1, 5])
        # Copy the stats from GPU to CPU (sync point)
        loss, top1_err, top5_err = loss.item(), top1_err.item(), top5_err.item(
        )
        train_meter.iter_toc()
        # Update and log stats
        mb_size = trn_X.size(0) * cfg.NUM_GPUS
        train_meter.update_stats(top1_err, top5_err, loss, lr, mb_size)
        train_meter.log_iter_stats(cur_epoch, cur_iter)
        train_meter.iter_tic()
        # write to tensorboard
        writer.add_scalar('train/loss', loss, cur_step)
        writer.add_scalar('train/top1_error', top1_err, cur_step)
        writer.add_scalar('train/top5_error', top5_err, cur_step)
        cur_step += 1
    # Log epoch stats
    train_meter.log_epoch_stats(cur_epoch)
    train_meter.reset()