def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = input.cuda() target = target.cuda(non_blocking=True) with torch.no_grad(): logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.report_freq == 0: end_time = time.time() if step == 0: duration = 0 start_time = time.time() else: duration = end_time - start_time start_time = time.time() logging.info('VALID Step: %03d Objs: %e R1: %f R5: %f Duration: %ds', step, objs.avg, top1.avg, top5.avg, duration) return top1.avg, top5.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.train() for step, (inp, target) in enumerate(train_queue): global global_step global_step += 1 inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) model.zero_grad() logits = model(inp) loss = criterion(logits, target) loss.backward() if args.grad_clip > 0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = inp.size(0) objs.update(loss.clone().item(), n) top1.update(prec1.clone().item(), n) writer_tr.add_scalar('loss', loss.item(), global_step) writer_tr.add_scalar('acc', prec1.item(), global_step) decomp_loss_1 = torch.mean(torch.log(torch.sum(torch.exp(logits - torch.max(logits, 1)[0].view(-1, 1)), 1))).item() writer_tr.add_scalar('decomp_loss_lgsmex', decomp_loss_1, global_step) decomp_loss_2 = torch.mean(torch.max(logits, 1)[0] - logits[range(len(inp)), target.cpu().numpy()]).item() writer_tr.add_scalar('decomp_loss_max', decomp_loss_2, global_step) if (step + 1) % args.report_freq == 0: logging.info('Train Step: %03d Objs: %e Acc: %.2f', step + 1, objs.avg, top1.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.train() for step, (inp, target) in enumerate(train_queue): inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) model.zero_grad() logits = model(inp) loss = criterion(logits, target) loss.backward() optimizer.step() prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = inp.size(0) objs.update(loss.clone().item(), n) top1.update(prec1.clone().item(), n) step += 1 if step % args.report_freq == 0: print('Train Step: {:3d} Objs: {:.4f} Acc: {:.2f}'.format(step, objs.avg, top1.avg)) return top1.avg, objs.avg
def infer(test_queue, model, criterion, depth_norm): objs = utils.AverageMeter() objs_ = utils.AverageMeter() model.eval() targets = [] predicteds = [] for step, (input, target) in enumerate(test_queue): input = Variable(input.float()).cuda() target = Variable(target.float()).cuda(non_blocking=True) logits, _ = model(input) loss = criterion(torch.squeeze(logits), target) loss_ = criterion(torch.squeeze(logits) * 10, target * 10) predicteds.extend(torch.squeeze(logits / depth_norm).cpu().tolist()) targets.extend((target / depth_norm).cpu().tolist()) n = input.size(0) objs.update(loss.item(), n) objs_.update(loss_.item(), n) if step % args.report_freq == 0: logging.info('test %03d %e %e', step, objs.avg, objs_.avg) return objs.avg, targets, predicteds
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (x, target) in enumerate(train_queue): x = x.cuda() target = target.cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(x) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = x.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) model.zero_grad() model.generate_share_alphas() logits = model(input) loss = criterion(logits, target) loss.backward() if args.grad_clip > 0: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.clone().item(), n) top1.update(prec1.clone().item(), n) if (step + 1) % args.report_freq == 0: logging.info('Train Step: %03d Objs: %e Acc: %.2f', step + 1, objs.avg, top1.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval() with torch.no_grad(): for step, (data, target) in enumerate(valid_queue): data = data.cuda() target = target.cuda() logits, _ = model(data) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, top5.avg, objs.avg
def infer(valid_queue, model, alpha, criterion): """Run model in eval only mode.""" objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() # model.eval() with torch.no_grad(): for step, (data, target) in enumerate(valid_queue): n = data.size(0) data = data.cuda() target = target.cuda() weights = alpha(data.size(0)) logits = model(data, weights) loss = criterion(logits, target) # Calculate the accuracy. prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if step % args.report_freq == 0 or step == len(valid_queue) - 1: objs_avg = utils.reduce_tensor(objs.avg, args.world_size) top1_avg = utils.reduce_tensor(top1.avg, args.world_size) top5_avg = utils.reduce_tensor(top5.avg, args.world_size) logging.info('valid %03d %e %f %f', step, objs_avg, top1_avg, top5_avg) return top1_avg, objs_avg
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.eval() with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = Variable(input) target = Variable(target) if args.gpu != -1: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) logits = model(input) loss = criterion(logits, target) prec1 = utils.accuracy(logits, target, topk=(1, )) n = input.size(0) objs.update(loss.detach().item(), n) top1.update(prec1[0].item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f', step, objs.avg, top1.avg) return top1.avg, objs.avg
def validate(epoch, val_loader, model, criterion, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') # switch to evaluation mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.cuda() target = target.cuda() # compute output logits = model(images) loss = criterion(logits, target) # measure accuracy and record loss pred1, pred5 = utils.accuracy(logits, target, topk=(1, 5)) n = images.size(0) losses.update(loss.item(), n) top1.update(pred1[0], n) top5.update(pred5[0], n) # measure elapsed time batch_time.update(time.time() - end) end = time.time() logger.info(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}' .format(top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def eval(self, valid_queue): objs = utils.AverageMeter() top1 = utils.AverageMeter() # top5 = utils.AverageMeter() self.model.eval() for step, (x, y) in enumerate(valid_queue): x, y = x.to(self.device), y.to(self.device) logits = self.model(x) loss = self.criterion(logits, y) prec1 = utils.accuracy(logits, y, topk=1) n = x.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) # top5.update(prec5.item(), n) if step % 100 == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg) print('valid: {} loss: {} acc: {}'.format( step, objs.avg, top1.avg)) return top1.avg, objs.avg
def train(epoch, train_loader, model, criterion, optimizer, scheduler): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') '''progress = utils.ProgressMeter( len(train_loader), [batch_time, data_time, losses, top1, top5], prefix="Epoch: [{}]".format(epoch))#''' model.train() end = time.time() scheduler.step() for param_group in optimizer.param_groups: cur_lr = param_group['lr'] logger.info('learning_rate: ' + str(cur_lr)) num_iter = len(train_loader) for i, (images, target) in enumerate(train_loader): data_time.update(time.time() - end) images = images.cuda() target = target.cuda() # compute outputy logits = model(images) loss = criterion(logits, target) # measure accuracy and record loss prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = images.size(0) losses.update(loss.item(), n) #accumulated loss top1.update(prec1.item(), n) top5.update(prec5.item(), n) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: logger.info('Epoch[{0}]({1}/{2}): ' 'Loss {loss.avg:.4f} ' 'Prec@1(1,5) {top1.avg:.2f}, {top5.avg:.2f}'.format( epoch, i, num_iter, loss=losses, top1=top1, top5=top5)) #progress.display(i) return losses.avg, top1.avg, top5.avg
def train(train_queue, model, criterion, optimizer, epoch, init_lr, warmup_epochs, global_step): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (data, target) in enumerate(train_queue): n = data.size(0) data = data.cuda() target = target.cuda() # Change lr. if epoch < warmup_epochs: len_epoch = len(train_queue) scale = float(1 + step + epoch * len_epoch) / \ (warmup_epochs * len_epoch) lr = init_lr * scale for param_group in optimizer.param_groups: param_group['lr'] = lr # Forward. optimizer.zero_grad() logits, logits_aux = model(data) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux # Backward and step. loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # Calculate the accuracy. prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) reduced_loss = utils.reduce_tensor(loss.data, args.world_size) prec1 = utils.reduce_tensor(prec1, args.world_size) prec5 = utils.reduce_tensor(prec5, args.world_size) objs.update(to_python_float(reduced_loss), n) top1.update(to_python_float(prec1), n) top5.update(to_python_float(prec5), n) if step % args.report_freq == 0: current_lr = list(optimizer.param_groups)[0]['lr'] logging.info('train %03d %e %f %f lr: %e', step, objs.avg, top1.avg, top5.avg, current_lr) writer.add_scalar('train/loss', objs.avg, global_step) writer.add_scalar('train/acc_top1', top1.avg, global_step) writer.add_scalar('train/acc_top5', top5.avg, global_step) writer.add_scalar('train/lr', optimizer.state_dict()['param_groups'][0]['lr'], global_step) global_step += 1 return top1.avg, objs.avg, global_step
def validate(epoch, val_loader, model, criterion, args): batch_time = utils.AverageMeter('Time', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') if args.use_dali: num_iter = val_loader._size // args.batch_size else: num_iter = len(val_loader) model.eval() with torch.no_grad(): end = time.time() if args.use_dali: for batch_idx, batch_data in enumerate(val_loader): images = batch_data[0]['data'].cuda() targets = batch_data[0]['label'].squeeze().long().cuda() # compute output logits = model(images) loss = criterion(logits, targets) # measure accuracy and record loss pred1, pred5 = utils.accuracy(logits, targets, topk=(1, 5)) n = images.size(0) losses.update(loss.item(), n) top1.update(pred1[0], n) top5.update(pred5[0], n) # measure elapsed time batch_time.update(time.time() - end) end = time.time() else: for batch_idx, (images, targets) in enumerate(val_loader): images = images.cuda() targets = targets.cuda() # compute output logits = model(images) loss = criterion(logits, targets) # measure accuracy and record loss pred1, pred5 = utils.accuracy(logits, targets, topk=(1, 5)) n = images.size(0) losses.update(loss.item(), n) top1.update(pred1[0], n) top5.update(pred5[0], n) # measure elapsed time batch_time.update(time.time() - end) end = time.time() logger.info(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'.format( top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def train(train_queue, valid_iter, model, architect, criterion, optimizer, lr, loggers): objs = utils.AverageMeter() top1 = utils.AverageMeter() batches = len(train_queue) for step, (input, target) in enumerate(train_queue): model.train() model.tick(1 / batches) n = input.size(0) input = Variable(input, requires_grad=False).cuda(non_blocking=True) target = Variable(target, requires_grad=False).cuda(non_blocking=True) # get a random minibatch from the search queue without replacement input_search, target_search = next(valid_iter) input_search = Variable(input_search, requires_grad=False).cuda(non_blocking=True) target_search = Variable(target_search, requires_grad=False).cuda(non_blocking=True) valid_loss = architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) utils.log_loss(loggers["val"], valid_loss, None, model.clock) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() model.mask_alphas() model.track_FI() model.update_history() prec1 = utils.accuracy(logits, target, topk=(1, )) objs.update(loss.item(), n) top1.update(prec1[0].item(), n) utils.log_loss(loggers["train"], loss.item(), prec1[0].item(), model.clock) if step % args.report_freq == 0: logging.info('train %03d %e %f', step, objs.avg, top1.avg) if (step + 1) % args.admm_freq == 0: model.update_Z() model.update_U() return top1.avg, objs.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, loggers): objs = utils.AverageMeter() top1 = utils.AverageMeter() valid_iter = iter(valid_queue) batches = len(train_queue) for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) model.tick(1 / batches) input = Variable(input, requires_grad=False).cuda(non_blocking=True) target = Variable(target, requires_grad=False).cuda(non_blocking=True) # get a random minibatch from the search queue with replacement input_search, target_search = next(valid_iter) input_search = Variable(input_search, requires_grad=False).cuda(non_blocking=True) target_search = Variable(target_search, requires_grad=False).cuda(non_blocking=True) architect.step(input_search, target_search) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) model.FI_hist.append( torch.norm( torch.stack([ torch.norm(p.grad.detach(), 2.0).cuda() for p in model.parameters() if p.grad is not None ]), 2.0)**2) if len(model.batchstep) > 0: model.batchstep.append(model.batchstep[-1] + 1 / batches) else: model.batchstep.append(0.0) optimizer.step() prec1 = utils.accuracy(logits, target, topk=(1, )) objs.update(loss.item(), n) top1.update(prec1[0].item(), n) utils.log_loss(loggers["train"], loss.item(), prec1[0].item(), model.clock) model.update_history() if step % args.report_freq == 0: logging.info('train %03d %e %f', step, objs.avg, top1.avg) return top1.avg, objs.avg
def train(epoch, train_loader, model, criterion, optimizer): batch_time = utils.AverageMeter('Time', ':6.3f') data_time = utils.AverageMeter('Data', ':6.3f') losses = utils.AverageMeter('Loss', ':.4e') top1 = utils.AverageMeter('Acc@1', ':6.2f') top5 = utils.AverageMeter('Acc@5', ':6.2f') model.train() end = time.time() #scheduler.step() num_iter = train_loader._size // args.batch_size for batch_idx, batch_data in enumerate(train_loader): images = batch_data[0]['data'].cuda() targets = batch_data[0]['label'].squeeze().long().cuda() data_time.update(time.time() - end) adjust_learning_rate(optimizer, epoch, batch_idx, num_iter) # compute output logits = model(images) loss = criterion(logits, targets) # measure accuracy and record loss prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5)) n = images.size(0) losses.update(loss.item(), n) #accumulated loss top1.update(prec1.item(), n) top5.update(prec5.item(), n) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % print_freq == 0: logger.info('Epoch[{0}]({1}/{2}): ' 'Loss {loss.avg:.4f} ' 'Prec@1(1,5) {top1.avg:.2f}, {top5.avg:.2f}'.format( epoch, batch_idx, num_iter, loss=losses, top1=top1, top5=top5)) return losses.avg, top1.avg, top5.avg
def train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, loggers): objs = utils.AverageMeter() valid_iter = iter(valid_queue) batches = len(train_queue) for step, (input, target) in enumerate(train_queue): model.train() n = input.size(0) model.tick(1 / batches) input = Variable(input.float(), requires_grad=False).cuda(non_blocking=True) target = Variable(target.float(), requires_grad=False).cuda(non_blocking=True) input_search, target_search = next(valid_iter) input_search = Variable(input_search.float(), requires_grad=False).cuda(non_blocking=True) target_search = Variable(target_search.float(), requires_grad=False).cuda(non_blocking=True) valid_loss = architect.step(input, target, input_search, target_search, lr, optimizer, unrolled=args.unrolled) utils.log_loss(loggers["val"], valid_loss.item(), None, model.clock) optimizer.zero_grad() logits = model(input) loss = criterion(logits, target) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() objs.update(loss.item(), n) utils.log_loss(loggers["train"], loss.item(), None, model.clock) if step % args.report_freq == 0: logging.info('train %03d %e', step, objs.avg) return objs.avg
def train(train_queue, model, criterion, optimizer, train_logger): objs = utils.AverageMeter() model.train() batches = len(train_queue) for step, (input, target) in enumerate(train_queue): input = Variable(input.float()).cuda() target = Variable(target.float()).cuda(non_blocking=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(torch.squeeze(logits), target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() n = input.size(0) objs.update(loss.item(), n) utils.log_loss(train_logger, loss.item(), None, 1 / batches) if step % args.report_freq == 0: logging.info('train %03d %e', step, objs.avg) return objs.avg
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval() if TORCH_VERSION.startswith('1'): with torch.no_grad(): for step, (input, target) in enumerate(valid_queue): input = input.to(device) target = target.to(device) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break else: for step, (input, target) in enumerate(valid_queue): input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda(async=True) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (input, target) in enumerate(train_queue): if TORCH_VERSION in ['1.0.1', '1.1.0']: input = input.to(device) target = target.to(device) else: input = Variable(input).cuda() target = Variable(target).cuda(async=True) optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight * loss_aux loss.backward() if TORCH_VERSION.startswith('1'): nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) else: nn.utils.clip_grad_norm(model.parameters(), args.grad_clip) optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) if TORCH_VERSION.startswith('1'): objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) else: objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.eval() for step, (inp, target) in enumerate(valid_queue): inp = inp.cuda(non_blocking=True) target = target.cuda(non_blocking=True) with torch.no_grad(): logits = model(inp) loss = criterion(logits, target) prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = inp.size(0) objs.update(loss.clone().item(), n) top1.update(prec1.clone().item(), n) return top1.avg, objs.avg
def train(self, train_queue, valid_queue, lr): objs = utils.AverageMeter() top1 = utils.AverageMeter() # top5 = utils.AverageMeter() for step, (x, y) in enumerate(train_queue): self.model.train() n = x.size(0) x, y = x.to(self.device), y.to(self.device) # get a random mini batch from the search queue with replacement x_s, y_s = next(iter(valid_queue)) x_s, y_s = x_s.to(self.device), y_s.to(self.device) self.architect.step(x, y, x_s, y_s, lr, self.optimizer, unrolled=self.unrolled) self.optimizer.zero_grad() logits = self.model(x) loss = self.criterion(logits, y) loss.backward() nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_clip) self.optimizer.step() prec1 = utils.accuracy(logits, y, topk=1) objs.update(loss.item(), n) top1.update(prec1.item(), n) # top5.update(prec5.item(), n) if step % 100 == 0: logging.info('train %03d %e %f %f', step, objs.avg, top1.avg) print('train: {} loss: {} acc: {}'.format( step, objs.avg, top1.avg)) return top1.avg, objs.avg
def train_init(train_queue, model, alpha, criterion, optimizer, weight_params): """Update network weights on train set and architecture on val set.""" objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() for step, (data, target) in enumerate(train_queue): model.train() n = data.size(0) # Update network weights using the train set. data = data.cuda() target = target.cuda() weights = alpha(data.size(0)) weights_no_grad = alpha.module.clone_weights(weights) optimizer.zero_grad() logits = model(data, weights_no_grad) loss = criterion(logits, target) dummy = sum([torch.sum(param) for param in model.parameters()]) loss += dummy * 0. loss.backward() nn.utils.clip_grad_norm_(weight_params, args.grad_clip) optimizer.step() # Calculate the accuracy. prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if (step + 1) % args.report_freq == 0 or step == len(train_queue) - 1: objs_avg = utils.reduce_tensor(objs.avg, args.world_size) top1_avg = utils.reduce_tensor(top1.avg, args.world_size) top5_avg = utils.reduce_tensor(top5.avg, args.world_size) logging.info('train_init %03d %e %f %f', step, objs_avg, top1_avg, top5_avg) return top1_avg, objs_avg
def infer(test_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() model.eval() for step, (input, target) in enumerate(test_queue): input = Variable(input).cuda() target = Variable(target).cuda(non_blocking=True) logits, _ = model(input) loss = criterion(logits, target) prec1 = utils.accuracy(logits, target, topk=(1,)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1[0].item(), n) if step % args.report_freq == 0: logging.info('test %03d %e %f', step, objs.avg, top1.avg) return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() batch_time = utils.AverageMeter() model.train() for step, (input, target) in enumerate(train_queue): target = target.cuda(non_blocking=True) input = input.cuda(non_blocking=True) b_start = time.time() optimizer.zero_grad() logits, logits_aux = model(input) loss = criterion(logits, target) if args.auxiliary: loss_aux = criterion(logits_aux, target) loss += args.auxiliary_weight*loss_aux loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() batch_time.update(time.time() - b_start) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) top5.update(prec5.data.item(), n) if step % args.report_freq == 0: end_time = time.time() if step == 0: duration = 0 start_time = time.time() else: duration = end_time - start_time start_time = time.time() logging.info('TRAIN Step: %03d Objs: %e R1: %f R5: %f Duration: %ds BTime: %.3fs', step, objs.avg, top1.avg, top5.avg, duration, batch_time.avg) return top1.avg, objs.avg
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval() def valid_generator(): for x, t in valid_queue: yield x, t valid_gen = valid_generator() # for step, (input, target) in enumerate(valid_queue): step = 0 for input, target in valid_gen: step += 1 input = Variable(input) target = Variable(target) if not args.disable_cuda: input = input.cuda() target = target.cuda(async=True) logits = model(input, 0) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.item(), n) top1.update(prec1.item(), n) top5.update(prec5.item(), n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) if args.debug: break return top1.avg, objs.avg
def train(train_queue, model, criterion, optimizer, global_step): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (data, target) in enumerate(train_queue): n = data.size(0) data = data.cuda() target = target.cuda() # Forward. optimizer.zero_grad() logits = model(data) loss = criterion(logits, target) # Backward and step. loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() # Calculate the accuracy. prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) reduced_loss = utils.reduce_tensor(loss.data, args.world_size) prec1 = utils.reduce_tensor(prec1, args.world_size) prec5 = utils.reduce_tensor(prec5, args.world_size) objs.update(to_python_float(reduced_loss), n) top1.update(to_python_float(prec1), n) top5.update(to_python_float(prec5), n) if (step + 1) % args.report_freq == 0: current_lr = list(optimizer.param_groups)[0]['lr'] logging.info('train %03d %e %f %f lr: %e', step, objs.avg, top1.avg, top5.avg, current_lr) global_step += 1 return top1.avg, top5.avg, objs.avg, global_step
def infer(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.eval() for step, (input, target) in enumerate(valid_queue): input = Variable(input, volatile=True).cuda() target = Variable(target, volatile=True).cuda(non_blocking=True) logits, _ = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data[0], n) top1.update(prec1.data[0], n) top5.update(prec5.data[0], n) if step % args.report_freq == 0: logging.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg) return top1.avg, top5.avg, objs.avg
def infer(valid_queue, model, criterion, use_fly_bn=True): objs = utils.AverageMeter() top1 = utils.AverageMeter() if not use_fly_bn: model.eval() else: model.train() for step, (input, target) in enumerate(valid_queue): input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) with torch.no_grad(): logits = model(input) loss = criterion(logits, target) prec1, _ = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.clone().item(), n) top1.update(prec1.clone().item(), n) logging.info('Valid Stats --- Objs: %e Acc: %f', objs.avg, top1.avg) return top1.avg, objs.avg