def train(args, model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) ## Note: scaling the loss for mixed precision training. loss = loss * args.scale_factor model.zero_grad() loss.backward() set_grad(param_copy, list(model.parameters())) if args.scale_factor != 1: for param in param_copy: param.grad.data = param.grad.data / args.scale_factor optimizer.step() params = list(model.parameters()) for i in range(len(params)): params[i].data.copy_(param_copy[i].data) optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() prefetcher = data_prefetcher(train_loader, prefetch=True) input, target = prefetcher.next() i = -1 while input is not None: i += 1 if args.prof and (i > 200): break # measure data loading time data_time.update(time.time() - end) input_var = Variable(input) target_var = Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) if args.distributed: reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data losses.update(to_python_float(reduced_loss), input.size(0)) top1.update(to_python_float(prec1), input.size(0)) top5.update(to_python_float(prec5), input.size(0)) loss = loss * args.loss_scale # compute gradient and do SGD step if args.fp16: model.zero_grad() loss.backward() set_grad(param_copy, list(model.parameters())) if args.loss_scale != 1: for param in param_copy: param.grad.data = param.grad.data / args.loss_scale optimizer.step() copy_in_params(model, param_copy) torch.cuda.synchronize() else: optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() input, target = prefetcher.next() if args.rank == 0 and i % args.print_freq == 0 and i > 1: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5))
def train(train_loader, model, criterion, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() prefetcher = data_prefetcher(train_loader, prefetch=True) input, target = prefetcher.next() i = -1 while input is not None: i += 1 if args.prof: if i > 200: break # measure data loading time data_time.update(time.time() - end) input_var = Variable(input) target_var = Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) if args.distributed: reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data losses.update(to_python_float(reduced_loss), input.size(0)) top1.update(to_python_float(prec1), input.size(0)) top5.update(to_python_float(prec5), input.size(0)) loss = loss*args.loss_scale # compute gradient and do SGD step if args.fp16: model.zero_grad() loss.backward() set_grad(param_copy, list(model.parameters())) if args.loss_scale != 1: for param in param_copy: param.grad.data = param.grad.data/args.loss_scale optimizer.step() copy_in_params(model, param_copy) torch.cuda.synchronize() else: optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() input, target = prefetcher.next() if args.rank == 0 and i % args.print_freq == 0 and i > 1: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5))