def validate(val_loader, model, criterion, args): batch_time = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(val_loader), [batch_time, losses, top1, top5], prefix='Test: ') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): if args.gpu is not None: images = images.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: progress.display(i) # TODO: this should also be done with the ProgressMeter print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) return top1.avg, top5.avg
def validate(val_loader, model, criterion, args): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() if args.track_correct: corr_dict = {'correct':[]} with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) if args.loss_func == 'l2': zero_mat = np.zeros((len(target), args.num_classes), dtype=int) zero_mat[list(range(len(target))), target] = 1 targetl2 = torch.from_numpy(zero_mat).float() targetl2 = targetl2.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # compute output output = model(input) if args.loss_func == 'l2': loss = criterion(output, targetl2) else: loss = criterion(output, target) # record correctly classified examples if args.track_correct: correct = accuracy(output, target, topk=(1, 1), track=True) corr_dict['correct'] += [(i*args.batch_size) + idx for idx, is_corr in enumerate(correct) if is_corr] # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 1)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1, top5=top5)) # Record the indices of the correctly classified images if args.track_correct: fname, ext = str(args.outpath).split('.') corrfile = fname + '_corr.json' with open(corrfile, 'w') as f: json.dump(corr_dict, f, indent=2) return return top1.avg, top5.avg
def train(train_loader, model, criterion, optimizer, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_inverse_sqrt_lr: assert not (args.max_lr_adjusting_epoch > 0), "lr scheduler crash, step and inverse sqrt lr can't be mutually set" for cur_p, param_group in enumerate(optimizer.param_groups): d_rate = (args.initial_lr_decay[cur_p]['decay'] if args.initial_lr_decay else 512) base_lr = (args.initial_lr[cur_p]['lr'] if args.initial_lr else args.lr) lr = base_lr / np.sqrt(1 + (epoch*len(train_loader) + i)/d_rate) param_group['lr'] = lr if args.gpu is not None: input = input.cuda(args.gpu, non_blocking=True) if args.loss_func == 'l2': zero_mat = np.zeros((len(target), args.num_classes), dtype=int) zero_mat[list(range(len(target))), target] = 1 targetl2 = torch.from_numpy(zero_mat).float() targetl2 = targetl2.cuda(args.gpu, non_blocking=True) target = target.cuda(args.gpu, non_blocking=True) # for LBFGS if args.use_LBFGS: def closure(): optimizer.zero_grad() output = model(input) loss = criterion(output, target) loss.backward() return loss optimizer.step(closure) else: # compute output output = model(input) if args.loss_func == 'l2': loss = criterion(output, targetl2) else: loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) top5.update(acc5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5))
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_reg = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs, targets = inputs.to(device), targets.to(device) batch_size = inputs.size(0) # augment inputs with noise noise = torch.randn_like(inputs, device=device) * noise_sd logits = model(inputs) logits_n = model(inputs + noise) loss_xent = criterion(logits, targets) stab = _cross_entropy(logits_n, logits) loss = loss_xent + args.lbd * stab acc1, acc5 = accuracy(logits_n, targets, topk=(1, 5)) losses.update(loss_xent.item(), batch_size) losses_reg.update(stab.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('loss/stability', losses_reg.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float): """ Function to do one training epoch :param loader:DataLoader: dataloader (train) :param model:torch.nn.Module: the classifer being trained :param criterion: the loss function :param optimizer:Optimizer: the optimizer used during trainined :param epoch:int: the current epoch number (for logging) :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device='cuda') * noise_sd # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, device, print_freq=100, display=True): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # print("Entered training function") # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.to(device) targets = targets.to(device) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0 and display == True: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg, top5.avg)
def train(loader: DataLoader, denoiser: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, classifier: torch.nn.Module = None): """ Function for training denoiser for one epoch :param loader:DataLoader: training dataloader :param denoiser:torch.nn.Module: the denoiser being trained :param criterion: loss function :param optimizer:Optimizer: optimizer used during trainined :param epoch:int: the current epoch (for logging) :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input :param classifier:torch.nn.Module=None: a ``freezed'' classifier attached to the denoiser (required classifciation/stability objectives), None for denoising objective """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() end = time.time() # switch to train mode denoiser.train() if classifier: classifier.eval() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise noise = torch.randn_like(inputs, device='cuda') * noise_sd # compute output outputs = denoiser(inputs + noise) if classifier: outputs = classifier(outputs) if isinstance(criterion, MSELoss): loss = criterion(outputs, inputs) elif isinstance(criterion, CrossEntropyLoss): if args.objective == 'stability': with torch.no_grad(): targets = classifier(inputs) targets = targets.argmax(1).detach().clone() loss = criterion(outputs, targets) # record loss losses.update(loss.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses)) return losses.avg
def train_or_eval(train, gpu, loader, model, criterion, optimizer, args, hyper, epoch): phase = "train" if train else "test" model.train() if train else model.eval() losses = AverageMeter("Loss", ":.4e") top1 = AverageMeter("Accuracy1", ":6.2f") top5 = AverageMeter("Accuracy5", ":6.2f") prefix = "Epoch:[{}]".format(epoch + 1) if train else "Test: " progress = ProgressMeter(len(loader), [losses, top1, top5], prefix=prefix) if args.prof: print("Profiling started") torch.cuda.cudart().cudaProfilerStart() t_init = time.time() prefetcher = data_prefetcher(loader) with torch.set_grad_enabled(mode=train): for i, (images, target) in enumerate(prefetcher): niter = epoch * len(loader) + i if args.prof: torch.cuda.nvtx.range_push("Prof start iteration {}".format(i)) if args.prof: torch.cuda.nvtx.range_push("forward") output = model(images) if args.prof: torch.cuda.nvtx.range_pop() loss = criterion(output, target) if train: lr = adjust_learning_rate(optimizer, niter, hyper, len(loader)) optimizer.zero_grad() if args.prof: torch.cuda.nvtx.range_push("backward") with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if args.prof: torch.cuda.nvtx.range_pop() if args.prof: torch.cuda.nvtx.range_push("optimizer step") optimizer.step() if args.prof: torch.cuda.nvtx.range_pop() distributed = args.gpu is None publish_stats = (not distributed or gpu == 0) and i % 100 == 0 if not train or publish_stats: acc1, acc5 = accuracy(output.detach(), target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) if publish_stats: progress.display(i) if train and publish_stats: args.writer.add_scalar("Loss/{}".format(phase), loss.item(), niter) args.writer.add_scalar("Accuracy/{}".format(phase), acc1, niter) args.writer.add_scalar("Loss/Accuracy", acc1, lr * 10000) if args.prof: torch.cuda.nvtx.range_pop() if args.prof and i == 20: break if args.prof: print("Profiling stopped") torch.cuda.cudart().cudaProfilerStop() print("Total {} epoch time: {}".format(phase, HTIME(time.time() - t_init))) return top1.avg
def test(loader: DataLoader, model: torch.nn.Module, criterion, noise_sd: float, attacker: Attacker = None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() top1_normal = AverageMeter() end = time.time() # switch to eval mode model.eval() requires_grad_(model, False) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs = inputs + noise # compute output if args.adv_training: normal_outputs = model(noisy_inputs) acc1_normal, _ = accuracy(normal_outputs, targets, topk=(1, 5)) top1_normal.update(acc1_normal.item(), inputs.size(0)) with torch.enable_grad(): inputs = attacker.attack(model, inputs, targets, noise=noise) # noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs = inputs + noise outputs = model(noisy_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if args.adv_training: return (losses.avg, top1.avg, top1_normal.avg) else: return (losses.avg, top1.avg, None)
def test(loader: DataLoader, model: torch.nn.Module, criterion, epoch: int, args): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() m = Bernoulli(torch.tensor([args.calibrated_alpha]).cuda()) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # make MNIST binary if args.dataset == 'mnist': inputs = (inputs > 0.5).type(torch.cuda.FloatTensor) # augment inputs with noise if args.perturb == 'bernoulli': mask = m.sample(inputs.shape).squeeze(-1) # make sure that the value is normalized rand_inputs = torch.randint_like( inputs, low=0, high=args.K + 1, device='cuda') / float( args.K) inputs = inputs * mask + rand_inputs * (1 - mask) elif args.perturb == 'gaussian': inputs = inputs + torch.randn_like(inputs, device='cuda') * args.sigma # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if (i + 1) % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i + 1, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) print('* Epoch: [{0}] Test: \t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})\n'.format(epoch, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker = None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = get_minibatches(batch, args.num_noise_vec) noisy_inputs_list = [] for inputs, targets in mini_batches: inputs = inputs.cuda() targets = targets.cuda() inputs = inputs.repeat( (1, args.num_noise_vec, 1, 1)).view(batch[0].shape) # augment inputs with noise noise = torch.randn_like(inputs, device='cuda') * noise_sd if args.adv_training: requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noise=noise, num_noise_vectors=args.num_noise_vec, no_grad=args.no_grad_attack) model.train() requires_grad_(model, True) if args.train_multi_noise: noisy_inputs = inputs + noise targets = targets.unsqueeze(1).repeat( 1, args.num_noise_vec).reshape(-1, 1).squeeze() outputs = model(noisy_inputs) loss = criterion(outputs, targets) acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), noisy_inputs.size(0)) top1.update(acc1.item(), noisy_inputs.size(0)) top5.update(acc5.item(), noisy_inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() else: inputs = inputs[::args.num_noise_vec] # subsample the samples noise = noise[::args.num_noise_vec] # noise = torch.randn_like(inputs, device='cuda') * noise_sd noisy_inputs_list.append(inputs + noise) if not args.train_multi_noise: noisy_inputs = torch.cat(noisy_inputs_list) targets = batch[1].cuda() assert len(targets) == len(noisy_inputs) outputs = model(noisy_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), noisy_inputs.size(0)) top1.update(acc1.item(), noisy_inputs.size(0)) top5.update(acc5.item(), noisy_inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(args): args.print_freq = 100 args.gpu = None np.random.seed(args.seed) torch.manual_seed(args.seed) # load data train_dl, valid_dl, test_dl = load_imagenet(args) # define model model = torchvision.models.resnet50(pretrained=False) # multiple gpus model = torch.nn.DataParallel(model).cuda() loss_fn = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=0.9, weight_decay=args.weight_decay) model_dir = gen_model_dir(args) model_dir.mkdir(parents=True, exist_ok=True) torch.backends.cudnn.benchmark = True best_acc1 = 0 for epoch in range(args.n_epochs): adjust_learning_rate(optimizer, epoch, args) batch_time = AverageMeter('Time', ':6.3f') data_time = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') top1 = AverageMeter('Acc@1', ':6.2f') top5 = AverageMeter('Acc@5', ':6.2f') progress = ProgressMeter(len(train_dl), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch)) # switch to train mode model.train() end = time.time() for batch_idx, (images, target) in enumerate(train_dl): # measure data loading time data_time.update(time.time() - end) # if args.gpu is not None: images = images.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(images) loss = loss_fn(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: progress.display(batch_idx) # evaluate on validation set acc1 = validate(valid_dl, model, loss_fn, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) torch.save( { 'epoch': epoch + 1, 'model_weight': model.state_dict(), 'heldout_best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, Path(model_dir, 'model')) if is_best: shutil.copyfile(Path(model_dir, 'model'), Path(model_dir, 'model_best')) # load best model with open(Path(model_dir, "model_best"), 'rb') as f: params = torch.load(f) model.load_state_dict(params['model_weight']) # test model.eval() # evaluate on test set acc_test = validate(test_dl, model, loss_fn, args) print('epoch: {}, val acc: {:.4f}, test acc: {:.4f}'.format( params["epoch"], params["heldout_best_acc1"], acc_test)) with open(Path(model_dir, "res.json"), 'w') as fp: json.dump( { 'epoch': params["epoch"], 'heldout_best_acc1': params["heldout_best_acc1"].item(), 'test_best_acc1': acc_test.item(), }, fp)
def test(loader: DataLoader, model: torch.nn.Module, criterion, noise_sd: float): """ Function to evaluate the trained model :param loader:DataLoader: dataloader (train) :param model:torch.nn.Module: the classifer being evaluated :param criterion: the loss function :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device='cuda') * noise_sd # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() learning_rate = args.attlr iterations = args.attiters ROAwidth = args.ROAwidth ROAheight = args.ROAheight skip_in_x = args.skip_in_x skip_in_y = args.skip_in_y potential_nums = args.potential_nums # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() model.eval() roa = ROA(model, 32) adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\ iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums) imshow(args.outdir, adv_inputs) # compute output model.train() outputs = model(adv_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_reg = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: inputs, targets = inputs.to(device), targets.to(device) batch_size = inputs.size(0) noises = [ torch.randn_like(inputs, device=device) * noise_sd for _ in range(args.num_noise_vec) ] if args.adv_training: requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noises=noises) model.train() requires_grad_(model, True) # augment inputs with noise inputs_c = torch.cat([inputs + noise for noise in noises], dim=0) targets_c = targets.repeat(args.num_noise_vec) logits = model(inputs_c) loss_xent = criterion(logits, targets_c) logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0) loss_con = consistency_loss(logits_chunk, args.lbd, args.eta) loss = loss_xent + loss_con acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5)) losses.update(loss_xent.item(), batch_size) losses_reg.update(loss_con.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('loss/consistency', losses_reg.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def test(loader: DataLoader, model: torch.nn.Module, criterion): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() learning_rate = args.attlr iterations = args.attiters ROAwidth = args.ROAwidth ROAheight = args.ROAheight skip_in_x = args.skip_in_x skip_in_y = args.skip_in_y potential_nums = args.potential_nums # switch to eval mode model.eval() roa = ROA(model, 32) with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() with torch.set_grad_enabled(True): adv_inputs = roa.gradient_based_search(inputs, targets, learning_rate,\ iterations, ROAwidth , ROAheight, skip_in_x, skip_in_y, potential_nums) # compute output outputs = model(adv_inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, transformer: AbstractTransformer, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_reg = AverageMeter() confidence = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: targets = targets.cuda() batch_size = inputs.size(0) noised_inputs = [ transformer.process(inputs).cuda() for _ in range(args.num_noise_vec) ] # augment inputs with noise inputs_c = torch.cat(noised_inputs, dim=0) targets_c = targets.repeat(args.num_noise_vec) logits = model(inputs_c) loss_xent = criterion(logits, targets_c) logits_chunk = torch.chunk(logits, args.num_noise_vec, dim=0) softmax = [F.softmax(logit, dim=1) for logit in logits_chunk] avg_softmax = sum(softmax) / args.num_noise_vec consistency = [ kl_div(logit, avg_softmax, reduction='none').sum(1) + _entropy(avg_softmax, reduction='none') for logit in logits_chunk ] consistency = sum(consistency) / args.num_noise_vec consistency = consistency.mean() loss = loss_xent + args.lbd * consistency avg_confidence = -F.nll_loss(avg_softmax, targets) acc1, acc5 = accuracy(logits, targets_c, topk=(1, 5)) losses.update(loss_xent.item(), batch_size) losses_reg.update(consistency.item(), batch_size) confidence.update(avg_confidence.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if args.print_step: writer.add_scalar(f'epoch/{epoch}/loss/train', losses.avg, i) writer.add_scalar(f'epoch/{epoch}/loss/consistency', losses_reg.avg, i) writer.add_scalar(f'epoch/{epoch}/loss/avg_confidence', confidence.avg, i) writer.add_scalar(f'epoch/{epoch}/batch_time', batch_time.avg, i) writer.add_scalar(f'epoch/{epoch}/accuracy/train@1', top1.avg, i) writer.add_scalar(f'epoch/{epoch}/accuracy/train@5', top5.avg, i) writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('loss/consistency', losses_reg.avg, epoch) writer.add_scalar('loss/avg_confidence', confidence.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def test_with_classifier(loader: DataLoader, denoiser: torch.nn.Module, criterion, noise_sd: float, print_freq: int, classifier: torch.nn.Module): """ A function to test the classification performance of a denoiser when attached to a given classifier :param loader:DataLoader: test dataloader :param denoiser:torch.nn.Module: the denoiser :param criterion: the loss function (e.g. CE) :param noise_sd:float: the std-dev of the Guassian noise perturbation of the input :param print_freq:int: the frequency of logging :param classifier:torch.nn.Module: the classifier to which the denoiser is attached """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode classifier.eval() if denoiser: denoiser.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + torch.randn_like(inputs, device='cuda') * noise_sd if denoiser is not None: inputs = denoiser(inputs) # compute output outputs = classifier(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)
def test(loader, model, criterion, epoch, transformer: AbstractTransformer, writer=None, print_freq=10): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to eval mode model.eval() with torch.no_grad(): for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs targets = targets.cuda() # augment inputs with noise inputs = transformer.process(inputs).cuda() # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/test', losses.avg, epoch) writer.add_scalar('accuracy/test@1', top1.avg, epoch) writer.add_scalar('accuracy/test@5', top5.avg, epoch) return (losses.avg, top1.avg)
def get_teacher_intermediates(teacher_model, train_loader, layers_to_replace): if args.max_batches is not None: num_batches = args.max_batches else: num_batches = len(train_loader) # define hook to capture intermediate inputs/activations and intermediate outputs # serialize outputs # keep dictionary to io objects for different layers teacher_inputs = {} teacher_outputs = {} teacher_input_size = {} teacher_output_size = {} # store ids of modules to names -- can't figure out # other way to get same name of module within hook name_map = {} for name, module in teacher_model.named_modules(): name_map[id(module)] = name batch_size = args.batch_size # use memory mapping to manage large activations def make_mmap_file(path, input_size): view_size = torch.Size([num_batches * args.batch_size]) + input_size # shared needs to be true for file to be created return torch.from_file(path, size=int(np.prod(view_size)), shared=True).view(view_size) batch_idx = 0 data_idx = 0 # TODO: store only inputs or outputs (otherwise we may be storing duplicate info # if we already stored neighboring layer) # won't cause answers to be wrong, but could be wasteful def hook(module, input, output): current_batch_size = output.size(0) mod_id = id(module) input_size = get_size(input) output_size = get_size(output) if mod_id not in teacher_inputs: teacher_inputs[mod_id] = make_mmap_file( f'{args.output_dir}/{name_map[mod_id]}_input.pt', input_size) teacher_outputs[mod_id] = make_mmap_file( f'{args.output_dir}/{name_map[mod_id]}_output.pt', output_size) if mod_id not in teacher_input_size: teacher_input_size[mod_id] = input_size teacher_output_size[mod_id] = output_size # save inputs to memory mapped files # TODO: input always a length-1 tuple? teacher_inputs[mod_id][data_idx:data_idx + current_batch_size] = input[0].cpu().detach() teacher_outputs[mod_id][data_idx:data_idx + current_batch_size] = output.cpu().detach() teacher_model.eval() for name, module in teacher_model.named_modules(): if name in layers_to_replace: module.register_forward_hook(hook) batch_time = AverageMeter() prefetcher = data_prefetcher(train_loader) input, _ = prefetcher.next() end = time.time() while input is not None: input = input.cuda() teacher_model(input) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.print_freq == 0: logger.info( 'Batch:{0}/{1}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( batch_idx, num_batches, batch_time=batch_time)) batch_idx += 1 data_idx += input.size(0) # for variable size batches if args.max_batches is not None and batch_idx == args.max_batches: break input, _ = prefetcher.next() logging.info("Computed teacher intermediates. ") # write sizes to disk for easy loading of memory maps at a later time for layer_id in teacher_inputs: layer_name = name_map[layer_id] # write sizes with open(f'{args.output_dir}/{layer_name}_input_sz.pt', 'wb') as f: input_size = torch.Size([data_idx]) + teacher_input_size[layer_id] torch.save(input_size, f) with open(f'{args.output_dir}/{layer_name}_output_sz.pt', 'wb') as f: output_size = torch.Size([data_idx ]) + teacher_output_size[layer_id] torch.save(output_size, f)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float, attacker: Attacker, device: torch.device, writer=None): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() requires_grad_(model, True) for i, batch in enumerate(loader): # measure data loading time data_time.update(time.time() - end) mini_batches = _chunk_minibatch(batch, args.num_noise_vec) for inputs, targets in mini_batches: inputs, targets = inputs.to(device), targets.to(device) inputs = inputs.repeat( (1, args.num_noise_vec, 1, 1)).reshape(-1, *batch[0].shape[1:]) batch_size = inputs.size(0) # augment inputs with noise noise = torch.randn_like(inputs, device=device) * noise_sd requires_grad_(model, False) model.eval() inputs = attacker.attack(model, inputs, targets, noise=noise, num_noise_vectors=args.num_noise_vec, no_grad=args.no_grad_attack) model.train() requires_grad_(model, True) noisy_inputs = inputs + noise targets = targets.unsqueeze(1).repeat(1, args.num_noise_vec).reshape( -1, 1).squeeze() outputs = model(noisy_inputs) loss = criterion(outputs, targets) acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), batch_size) top1.update(acc1.item(), batch_size) top5.update(acc5.item(), batch_size) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.avg:.3f}\t' 'Data {data_time.avg:.3f}\t' 'Loss {loss.avg:.4f}\t' 'Acc@1 {top1.avg:.3f}\t' 'Acc@5 {top5.avg:.3f}'.format(epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) if writer: writer.add_scalar('loss/train', losses.avg, epoch) writer.add_scalar('batch_time', batch_time.avg, epoch) writer.add_scalar('accuracy/train@1', top1.avg, epoch) writer.add_scalar('accuracy/train@5', top5.avg, epoch) return (losses.avg, top1.avg)
def train(train_loader, val_loader, model, criterion, optimizer, param_copy): global args, rank, world_size, best_prec1, dataset_len # moving average batch time, data loading time, loss batch_time = AverageMeter(10) data_time = AverageMeter(10) losses = AverageMeter(10) model.train() end = time.time() curr_step = 0 momentum_buffer = [] for master_p in param_copy: momentum_buffer.append(torch.zeros_like(master_p)) for i, (input, target) in enumerate(train_loader): curr_step += 1 if curr_step > args.max_iter: break current_lr = adjust_learning_rate(optimizer, curr_step) target = target.cuda() input = input.cuda() data_time.update(time.time() - end) end = time.time() output = model(input) # loss divided by world_size loss = criterion(output, target) / world_size if rank == 0: print("loss:", loss) # average loss reduced_loss = loss.data.clone() if args.dist == 1: dist.all_reduce(reduced_loss) losses.update(reduced_loss.item()) # average gradient optimizer.zero_grad() model.zero_grad() loss.backward() if args.dist == 1: sum_gradients(model) for param_1, param_2 in zip(param_copy, list(model.parameters())): param_1.backward(param_2.grad.float()) for idx, master_p in enumerate(param_copy): if master_p.grad is not None: update = master_p.grad local_lr = master_p.norm(2)/\ (master_p.grad.norm(2) \ + args.weight_decay * master_p.norm(2)) momentum_buffer[idx] = args.momentum * momentum_buffer[idx] \ + current_lr \ * local_lr \ * (master_p.grad + args.weight_decay * master_p) update = momentum_buffer[idx] master_p.data.copy_(master_p - update) for param, copy_param in zip(model.parameters(), param_copy): param.data.copy_(copy_param.data) batch_time.update(time.time() - end) end = time.time() if curr_step % args.val_freq == 0 and curr_step != 0: if rank == 0: print( 'Iter: [{}/{}]\nTime {batch_time.val:.3f} ({batch_time.avg:.3f})\n' .format(curr_step, args.max_iter, batch_time=batch_time)) val_loss, prec1, prec5 = validate(val_loader, model, criterion) if rank == 0: print('Iter: [{}/{}]\n'.format(curr_step, args.max_iter)) val_loss, prec1, prec5 = validate(val_loader, model, criterion)
def train(loader: DataLoader, model: torch.nn.Module, criterion, optimizer: Optimizer, epoch: int, noise_sd: float): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() # switch to train mode model.train() for i, (inputs, targets) in enumerate(loader): # measure data loading time data_time.update(time.time() - end) inputs = inputs.cuda() targets = targets.cuda() # augment inputs with noise inputs = inputs + randgn_like(inputs, p=args.p, device='cuda') * noise_sd if (args.scale_down != 1): inputs = torch.nn.functional.interpolate( inputs, scale_factor=args.scale_down) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss acc1, acc5 = accuracy(outputs, targets, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(acc1.item(), inputs.size(0)) top5.update(acc5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Acc@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5)) return (losses.avg, top1.avg)