def train(epoch): train_sampler.set_epoch(epoch) model.train() losses = AverageMeter() top1 = AverageMeter() global best_pred, acclist_train for batch_idx, (data, target) in enumerate(train_loader): scheduler(optimizer, batch_idx, epoch, best_pred) if not args.mixup: data, target = data.cuda(args.gpu), target.cuda(args.gpu) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() if not args.mixup: acc1 = accuracy_multilabel(output, target) top1.update(acc1, data.size(0)) losses.update(loss.item(), data.size(0)) if batch_idx % 100 == 0 and args.gpu == 0: if args.mixup: print('Batch: %d| Loss: %.3f' % (batch_idx, losses.avg)) else: print('Batch: %d| Loss: %.3f | Top1: %.3f' % (batch_idx, losses.avg, top1.avg)) acclist_train += [top1.avg]
def validate(epoch): model.eval() top1 = AverageMeter() top5 = AverageMeter() global best_pred, acclist_train, acclist_val is_best = False for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(args.gpu), target.cuda(args.gpu) with torch.no_grad(): output = model(data) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], data.size(0)) top5.update(acc5[0], data.size(0)) # sum all sum1, cnt1, sum5, cnt5 = torch_dist_sum(args.gpu, top1.sum, top1.count, top5.sum, top5.count) if args.gpu == 0: top1_acc = sum(sum1) / sum(cnt1) top5_acc = sum(sum5) / sum(cnt5) print('Validation: Top1: %.3f | Top5: %.3f'%(top1_acc, top5_acc)) # save checkpoint acclist_val += [top1_acc] if top1_acc > best_pred: best_pred = top1_acc is_best = True encoding.utils.save_checkpoint({ 'epoch': epoch, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, 'acclist_train':acclist_train, 'acclist_val':acclist_val, }, args=args, is_best=is_best)
def validate(): model.eval() top1 = AverageMeter() for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(gpu), target.cuda(gpu) with torch.no_grad(): output = model(data) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], data.size(0)) return top1.avg
def validate(epoch): model.eval() top1 = AverageMeter() top5 = AverageMeter() global best_pred, acclist_train, acclist_val is_best = False correct, total = 0, 0 for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], data.size(0)) top5.update(acc5[0], data.size(0)) pred = output.data.max(1)[1] correct += pred.eq(target.data).cpu().sum() total += target.size(0) # sum all # sum1, cnt1, sum5, cnt5 = torch_dist_sum( top1.sum, top1.count, top5.sum, top5.count) # sum1, cnt1, sum5, cnt5 = top1.sum, top1.count, top5.sum, top5.count # if args.eval: # top1_acc = sum1 / cnt1 # top5_acc = sum5 / cnt5 # print('Validation: Top1: %.3f | Top5: %.3f' % (top1_acc, top5_acc)) # return top1_acc = top1.avg top5_acc = top5.avg # print('Validation: Top1: %.3f | Top5: %.3f' % (100. * top1_acc, 100. * top5_acc)) # print('Valid set, Accuracy: %.3f' %(100. * top1_acc)) print("Validation correct:%d total:%d" % (correct, total)) print('Valid set, Accuracy: %.3f' % (correct / total)) print('Validation: Top1: %.3f | Top5: %.3f' % (top1_acc, top5_acc)) # save checkpoint acclist_val += [top1_acc] if top1_acc > best_pred: best_pred = top1_acc is_best = True encoding.utils.save_checkpoint( { 'epoch': epoch, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, 'acclist_train': acclist_train, 'acclist_val': acclist_val, }, args=args, is_best=is_best)
def train(epoch): # train_sampler.set_epoch(epoch) model.train() train_loss, correct, total = 0, 0, 0 # losses = AverageMeter() top1 = AverageMeter() global best_pred, acclist_train for batch_idx, (data, target) in enumerate(train_loader): scheduler(optimizer, batch_idx, epoch, best_pred) if not args.mixup: data, target = data.cuda(), target.cuda() optimizer.zero_grad() output, embed = model(data) loss1 = criterion(output, target) loss2 = criterion_triplet(embed, target) loss = loss1 + 0.1 * loss2 loss.backward() optimizer.step() # -----另一种计算方式 train_loss += loss.item() pred = output.data.max(1)[1] correct += pred.eq(target.data).cpu().sum() total += target.size(0) # ------end------ if not args.mixup: acc1 = accuracy(output, target, topk=(1, )) # print("acc1:") # print(acc1) top1.update(acc1[0], data.size(0)) # losses.update(loss.item(), data.size(0)) if batch_idx % 100 == 0: if args.mixup: print('Batch: %d| Loss: %.3f' % (batch_idx, train_loss / (batch_idx + 1))) else: print('Batch: %d| Loss: %.3f | Top1: %.3f' % (batch_idx, train_loss / (batch_idx + 1), top1.avg)) print(' Train set, Accuracy:({:.0f}%)\n'.format(100. * correct / total)) print(' Top1: %.3f' % top1.avg) acclist_train += [top1.avg]
def train(epoch): train_sampler.set_epoch(epoch) model.train() losses = AverageMeter() top1 = AverageMeter() global best_pred, acclist_train tic = time.time() for batch_idx, (data, target) in enumerate(train_loader): scheduler(optimizer, batch_idx, epoch, best_pred) if not args.mixup: data, target = data.cuda(args.gpu), target.cuda(args.gpu) optimizer.zero_grad() output = model(data) loss = criterion(output, target) if args.amp: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() if not args.mixup: acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], data.size(0)) losses.update(loss.item(), data.size(0)) if batch_idx % 100 == 0 and args.gpu == 0: iter_per_sec = 100.0 / (time.time() - tic) if batch_idx != 0 else 1.0 / ( time.time() - tic) tic = time.time() if args.mixup: #print('Batch: %d| Loss: %.3f'%(batch_idx, losses.avg)) print('Epoch: {}, Iter: {}, Speed: {:.3f} iter/sec, Train loss: {:.3f}'. \ format(epoch, batch_idx, iter_per_sec, losses.avg.item())) else: #print('Batch: %d| Loss: %.3f | Top1: %.3f'%(batch_idx, losses.avg, top1.avg)) print('Epoch: {}, Iter: {}, Speed: {:.3f} iter/sec, Top1: {:.3f}'. \ format(epoch, batch_idx, iter_per_sec, top1.avg.item())) acclist_train += [top1.avg]
def validate(auto_policy): model.eval() top1 = AverageMeter() _, transform_val = get_transform(args.dataset, args.base_size, args.crop_size) if auto_policy is not None: transform_val.transforms.insert(0, Augmentation(auto_policy)) valset.transform = transform_val val_loader = torch.utils.data.DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) for batch_idx, (data, target) in enumerate(val_loader): data, target = data.cuda(gpu), target.cuda(gpu) with torch.no_grad(): output = model(data) acc1 = accuracy(output, target, topk=(1, )) top1.update(acc1[0], data.size(0)) return top1.avg
def main(): # init the args args = Options().parse() args.cuda = not args.no_cuda and torch.cuda.is_available() print(args) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # init dataloader _, transform_val = encoding.transforms.get_transform( args.dataset, args.base_size, args.crop_size) valset = encoding.datasets.get_dataset( args.dataset, root=os.path.expanduser('~/.encoding/data'), transform=transform_val, train=False, download=True) val_loader = torch.utils.data.DataLoader( valset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True if args.cuda else False) # init the model model_kwargs = {'pretrained': True} if args.rectify: model_kwargs['rectified_conv'] = True model_kwargs['rectify_avg'] = args.rectify_avg model = encoding.models.get_model(args.model, **model_kwargs) print(model) if args.cuda: model.cuda() # Please use CUDA_VISIBLE_DEVICES to control the number of gpus model = nn.DataParallel(model) # checkpoint if args.verify: if os.path.isfile(args.verify): print("=> loading checkpoint '{}'".format(args.verify)) model.module.load_state_dict(torch.load(args.verify)) else: raise RuntimeError ("=> no verify checkpoint found at '{}'".\ format(args.verify)) elif args.resume is not None: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.module.load_state_dict(checkpoint['state_dict']) else: raise RuntimeError ("=> no resume checkpoint found at '{}'".\ format(args.resume)) model.eval() top1 = AverageMeter() top5 = AverageMeter() is_best = False tbar = tqdm(val_loader, desc='\r') for batch_idx, (data, target) in enumerate(tbar): if args.cuda: data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], data.size(0)) top5.update(acc5[0], data.size(0)) tbar.set_description('Top1: %.3f | Top5: %.3f' % (top1.avg, top5.avg)) print('Top1 Acc: %.3f | Top5 Acc: %.3f ' % (top1.avg, top5.avg)) if args.export: torch.save(model.module.state_dict(), args.export + '.pth')