def train_fn(self, optimizer, criterion, loader, device, train=True): """ Training method :param optimizer: optimization algorithm :criterion: loss function :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: (accuracy, loss) on the data """ score = AvgrageMeter() objs = AvgrageMeter() self.train() t = tqdm(loader) for images, labels in t: images = images.to(device) labels = labels.to(device) optimizer.zero_grad() logits = self(images) loss = criterion(logits, labels) loss.backward() optimizer.step() acc, _ = accuracy(logits, labels, topk=(1, 5)) n = images.size(0) objs.update(loss.item(), n) score.update(acc.item(), n) t.set_description('(=> Training) Loss: {:.4f}'.format(objs.avg)) return score.avg, objs.avg
def infer(val_dataprovider, model, criterion, fair_arc_list, val_iters, archloader): objs = AvgrageMeter() top1 = AvgrageMeter() model.eval() now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} |=> Test rng = {}'.format(now, fair_arc_list[0])) with torch.no_grad(): for step in range(val_iters): t0 = time.time() image, target = val_dataprovider.next() datatime = time.time() - t0 image = Variable(image, requires_grad=False).cuda() target = Variable(target, requires_grad=False).cuda() logits = model(image, archloader.convert_list_arc_str(fair_arc_list[0])) loss = criterion(logits, target) prec1, _ = accuracy(logits, target, topk=(1, 5)) n = image.size(0) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( '{} |=> valid: step={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}'. format(now, step, objs.avg, top1.avg, datatime)) return top1.avg, objs.avg
def train(train_dataloader, val_dataloader, optimizer, scheduler, model, archloader, criterion, args, seed, epoch, writer=None): losses_, top1_, top5_ = AvgrageMeter(), AvgrageMeter(), AvgrageMeter() # for p in model.parameters(): # p.grad = torch.zeros_like(p) model.train() train_loader = tqdm(train_dataloader) train_loader.set_description( '[%s%04d/%04d %s%f]' % ('Epoch:', epoch + 1, args.epochs, 'lr:', scheduler.get_last_lr()[0])) for step, (image, target) in enumerate(train_loader): n = image.size(0) image = Variable(image, requires_grad=False).cuda( args.gpu, non_blocking=True) target = Variable(target, requires_grad=False).cuda( args.gpu, non_blocking=True) # Fair Sampling # [archloader.generate_niu_fair_batch(step)[-1]] # [16, 16, 16, 16, 16, 16, 16, 32, 32, 32, 32, 32, 32, 64, 64, 64, 64, 64, 64, 64] spos_arc_list = archloader.generate_spos_like_batch().tolist() # for arc in fair_arc_list: # logits = model(image, archloader.convert_list_arc_str(arc)) # loss = criterion(logits, target) # loss_reduce = reduce_tensor(loss, 0, args.world_size) # loss.backward() optimizer.zero_grad() logits = model(image, spos_arc_list[:-1]) loss = criterion(logits, target) prec1, prec5 = accuracy(logits, target, topk=(1, 5)) if torch.cuda.device_count() > 1: torch.distributed.barrier() loss = reduce_mean(loss, args.nprocs) prec1 = reduce_mean(prec1, args.nprocs) prec5 = reduce_mean(prec5, args.nprocs) loss.backward() # nn.utils.clip_grad_value_(model.parameters(), args.grad_clip) optimizer.step() losses_.update(loss.data.item(), n) top1_.update(prec1.data.item(), n) top5_.update(prec1.data.item(), n) postfix = {'train_loss': '%.6f' % ( losses_.avg), 'train_acc1': '%.6f' % top1_.avg, 'train_acc5': '%.6f' % top5_.avg} train_loader.set_postfix(log=postfix) if args.local_rank == 0 and step % 10 == 0 and writer is not None: writer.add_scalar("Train/loss", losses_.avg, step + len(train_dataloader) * epoch * args.batch_size) writer.add_scalar("Train/acc1", top1_.avg, step + len(train_dataloader) * epoch * args.batch_size) writer.add_scalar("Train/acc5", top5_.avg, step + len(train_loader)*args.batch_size*epoch)
def train(train_dataprovider, val_dataprovider, optimizer, scheduler, model, archloader, criterion, args, val_iters, seed): objs, top1 = AvgrageMeter(), AvgrageMeter() for p in model.parameters(): p.grad = torch.zeros_like(p) for step in range(args.total_iters): model.train() t0 = time.time() image, target = train_dataprovider.next() datatime = time.time() - t0 n = image.size(0) optimizer.zero_grad() image = Variable(image, requires_grad=False).cuda(args.gpu) target = Variable(target, requires_grad=False).cuda(args.gpu) # Fair Sampling # rngs = [] # for i in range(len(operations)): # 21个layer # seed += 1 # random.seed(seed) # rngs.append(random.sample(operations[i], len(operations[i]))) # rngs = np.transpose(rngs) fair_arc_list = archloader.generate_niu_fair_batch() for ii, arc in enumerate(fair_arc_list): logits = model(image, archloader.convert_list_arc_str(arc)) loss = criterion(logits, target) loss.backward() # for rng in rngs: # logits = model(image, rng) # loss = criterion(logits, target) # loss.backward() nn.utils.clip_grad_value_(model.parameters(), args.grad_clip) optimizer.step() scheduler.step() prec1, _ = accuracy(logits, target, topk=(1, 5)) objs.update(loss.data.item(), n) top1.update(prec1.data.item(), n) if step % args.report_freq == 0 and args.local_rank == 0: now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print( '{} |=> train: {} / {}, lr={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}, seed={}' .format(now, step, args.total_iters, scheduler.get_lr()[0], objs.avg, top1.avg, float(datatime), seed)) if args.local_rank == 0: now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} |=> Test rng = {}'.format(now, fair_arc_list[0])) infer(val_dataprovider, model.module, criterion, fair_arc_list, val_iters, archloader)
def train(train_dataprovider, val_dataprovider, optimizer, scheduler, model, archloader, criterion, args, val_iters, seed, writer=None): objs, top1 = AvgrageMeter(), AvgrageMeter() for p in model.parameters(): p.grad = torch.zeros_like(p) for step in range(args.total_iters): model.train() t0 = time.time() image, target = train_dataprovider.next() datatime = time.time() - t0 n = image.size(0) optimizer.zero_grad() image = Variable(image, requires_grad=False).cuda(args.gpu) target = Variable(target, requires_grad=False).cuda(args.gpu) # Fair Sampling fair_arc_list = archloader.generate_niu_fair_batch() for arc in fair_arc_list: logits = model(image, archloader.convert_list_arc_str(arc)) loss = criterion(logits, target) loss_reduce = reduce_tensor(loss, 0, args.world_size) loss.backward() nn.utils.clip_grad_value_(model.parameters(), args.grad_clip) optimizer.step() scheduler.step() prec1, _ = accuracy(logits, target, topk=(1, 5)) objs.update(loss_reduce.data.item(), n) top1.update(prec1.data.item(), n) if step % args.report_freq == 0 and args.local_rank == 0: now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) print('{} |=> train: {} / {}, lr={}, loss={:.2f}, acc={:.2f}, datatime={:.2f}, seed={}' .format(now, step, args.total_iters, scheduler.get_lr()[0], objs.avg, top1.avg, float(datatime), seed)) if args.local_rank == 0 and step % 5 == 0 and writer is not None: writer.add_scalar("Train/loss", objs.avg, step) writer.add_scalar("Train/acc1", top1.avg, step) if args.local_rank == 0 and step % args.report_freq == 0: # model top1_val, objs_val = infer(train_dataprovider, val_dataprovider, model.module, criterion, fair_arc_list, val_iters, archloader) if writer is not None: writer.add_scalar("Val/loss", objs_val, step) writer.add_scalar("Val/acc1", top1_val, step) save_checkpoint( {'state_dict': model.state_dict(), }, step, args.exp)
def validate(model, device, args, *, all_iters=None, arch_loader=None): assert arch_loader is not None objs = AvgrageMeter() top1 = AvgrageMeter() top5 = AvgrageMeter() loss_function = args.loss_function val_loader = args.val_loader model.eval() t1 = time.time() result_dict = {} arch_dict = arch_loader.get_part_dict() with torch.no_grad(): for ii, (key, value) in enumerate(arch_dict.items()): for data, target in val_loader: target = target.type(torch.LongTensor) data, target = data.to(device), target.to(device) output = model(data, value["arch"]) loss = loss_function(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) n = data.size(0) objs.update(loss.item(), n) top1.update(acc1.item(), n) top5.update(acc5.item(), n) if ii % 5: logging.info("validate acc:{:.6f} iter:{}".format( top1.avg / 100, ii)) writer.add_scalar( "Val/Loss", loss.item(), all_iters * len(val_loader) * args.batch_size + ii) writer.add_scalar( "Val/acc1", acc1.item(), all_iters * len(val_loader) * args.batch_size + ii) writer.add_scalar( "Val/acc5", acc5.item(), all_iters * len(val_loader) * args.batch_size + ii) result_dict[key] = top1.avg logInfo = 'TEST Iter {}: loss = {:.6f},\t'.format(all_iters, objs.avg) + \ 'Top-1 acc = {:.6f},\t'.format(top1.avg) + \ 'Top-5 acc = {:.6f},\t'.format(top5.avg) + \ 'val_time = {:.6f}'.format(time.time() - t1) logging.info(logInfo) logging.info("RESULTS") for ii, (key, value) in enumerate(result_dict.items()): logging.info("{: ^10} \t {:.6f}".format(key, value)) if ii > 10: break logging.info("E N D")
def eval_fn(self, loader, device, train=False): """ Evaluation method :param loader: data loader for either training or testing set :param device: torch device :param train: boolean to indicate if training or test set is used :return: accuracy on the data """ score = AvgrageMeter() self.eval() t = tqdm(loader) with torch.no_grad(): # no gradient needed for images, labels in t: images = images.to(device) labels = labels.to(device) outputs = self(images) acc, _ = accuracy(outputs, labels, topk=(1, 5)) score.update(acc.item(), images.size(0)) t.set_description('(=> Test) Score: {:.4f}'.format(score.avg)) return score.avg