def train(train_loader, model, optimizer, lr_scheduler, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (data, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(non_blocking=True) data = data.cuda() output = model(data) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), data.size(0)) top1.update(acc1.item(), data.size(0)) top5.update(acc5.item(), data.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() # adjust lr lr = lr_scheduler.step() for pg in optimizer.param_groups: pg["lr"] = lr # impose L1 penalty to BN factors if args.sparsity != 0: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.weight.grad.data.add_(args.sparsity*torch.sign(m.weight.data)) # L1 optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() lr = optimizer.param_groups[0]["lr"] if i % args.print_freq == 0: logger.info('Epoch[{0}/{1}] Iter[{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Train Loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' 'LR: {lr:.4f}'.format( epoch, args.epochs, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=lr)) return losses.avg
def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (data, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) target = target.cuda(non_blocking=True) data = data.cuda() output = model(data) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), data.size(0)) top1.update(acc1.item(), data.size(0)) top5.update(acc5.item(), data.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() lr = optimizer.param_groups[0]["lr"] if i % 20 == 0: logger.info( 'Epoch[{0}/{1}] Iter[{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Train Loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' 'LR: {lr:.4f}'.format(epoch, args.epochs, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=lr)) return losses.avg
def validate(val_loader, model, epoch): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.use_dali: val_loader_len = int(val_loader._size / 100) else: val_loader_len = len(val_loader) # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, data in enumerate(val_loader): if args.use_dali: target = torch.cat([i["label"].to(torch.device('cuda:0')) for i in data], dim=0) data = torch.cat([i["data"].to(torch.device('cuda:0')) for i in data], dim=0) target = target.cuda(non_blocking=True).squeeze().long() else: data, target = data data = data.cuda() target = target.cuda(non_blocking=True) data = data.cuda() # compute output output = model(data) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), data.size(0)) top1.update(acc1.item(), data.size(0)) top5.update(acc5.item(), data.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logger.info('Test: [{0}/{1}]\t' 'Test Loss {loss.val:.3f} (avg={loss.avg:.3f})\t' 'Prec@1 {top1.val:.3f} (avg={top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} (avg={top5.avg:.3f})'.format( i, val_loader_len, loss=losses, top1=top1, top5=top5)) logger.info(' * Prec@1 {top1.avg:.5f} Prec@5 {top5.avg:.5f}' .format(top1=top1, top5=top5)) if args.use_dali: val_loader.reset() return top1.avg, top5.avg
def validate(val_loader, model, epoch): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (data, target) in enumerate(val_loader): target = target.cuda(non_blocking=True) data = data.cuda() # compute output output = model(data) loss = criterion(output, target) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), data.size(0)) top1.update(acc1.item(), data.size(0)) top5.update(acc5.item(), data.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: logger.info( 'Test: [{0}/{1}]\t' 'Test Loss {loss.val:.3f} (avg={loss.avg:.3f})\t' 'Prec@1 {top1.val:.3f} (avg={top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} (avg={top5.avg:.3f})'.format( i, len(val_loader), loss=losses, top1=top1, top5=top5)) logger.info(' * Prec@1 {top1.avg:.5f} Prec@5 {top5.avg:.5f}'.format( top1=top1, top5=top5)) return top1.avg, top5.avg
def train(train_loader, model, optimizer, lr_scheduler, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() if args.use_dali: train_loader_len = int(np.ceil(train_loader._size/args.batch_size)) else: train_loader_len = len(train_loader) # switch to train mode model.train() end = time.time() for i, data in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_dali: target = torch.cat([i["label"].to(torch.device('cuda:0')) for i in data], dim=0) data = torch.cat([i["data"].to(torch.device('cuda:0')) for i in data], dim=0) target = target.cuda().squeeze().long() else: data, target = data data = data.cuda() target = target.cuda() output = model(data) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), data.size(0)) top1.update(acc1.item(), data.size(0)) top5.update(acc5.item(), data.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() lr = lr_scheduler.step() for pg in optimizer.param_groups: pg["lr"] = lr # impose L1 penalty to BN factors if args.sparsity != 0: for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.weight.grad.data.add_(args.sparsity*torch.sign(m.weight.data)) # L1 optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() lr = optimizer.param_groups[0]["lr"] if i % args.print_freq == 0: logger.info('Epoch[{0}/{1}] Iter[{2}/{3}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Train Loss {loss.val:.3f} ({loss.avg:.3f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})\t' 'LR: {lr:.4f}'.format( epoch, args.epochs, i, train_loader_len, batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5, lr=lr)) if args.use_dali: train_loader.reset() return losses.avg