def validate(net, path, image_size, data_loader, batch_size=100, device='cuda:0'): if 'cuda' in device: net = torch.nn.DataParallel(net).to(device) else: net = net.to(device) data_loader.dataset.transform = transforms.Compose([ transforms.Resize(int(math.ceil(image_size / 0.875))), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ), ]) cudnn.benchmark = True criterion = nn.CrossEntropyLoss().to(device) net.eval() net = net.to(device) losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() with torch.no_grad(): with tqdm(total=len(data_loader), desc='Validate') as t: for i, (images, labels) in enumerate(data_loader): images, labels = images.to(device), labels.to(device) # compute output output = net(images) # # ? pc: handle abnormal labels # labels = labels-1 # labels[labels<0]=0 # print('-'*20) # print('MIN: %d | MAX: %d'%(min(labels), max(labels))) # loss = criterion(output, labels) # measure accuracy and record loss acc1, acc5 = accuracy(output, labels, topk=(1, 5)) # losses.update(loss.item(), images.size(0)) top1.update(acc1[0].item(), images.size(0)) top5.update(acc5[0].item(), images.size(0)) t.set_postfix({ 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'img_size': images.size(2), }) t.update(1) print('Results: loss=%.5f,\t top1=%.1f,\t top5=%.1f' % (losses.avg, top1.avg, top5.avg)) return top1.avg
def validate(self, epoch=0, is_test=True, run_str='', net=None, data_loader=None, no_logs=False): if net is None: net = self.net if not isinstance(net, nn.DataParallel): net = nn.DataParallel(net) if data_loader is None: if is_test: data_loader = self.run_config.test_loader else: data_loader = self.run_config.valid_loader net.eval() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() with torch.no_grad(): with tqdm(total=len(data_loader), desc='Validate Epoch #{} {}'.format(epoch + 1, run_str), disable=no_logs) as t: for i, (images, labels) in enumerate(data_loader): images, labels = images.to(self.device), labels.to(self.device) # compute output output = net(images) loss = self.test_criterion(output, labels) # measure accuracy and record loss acc1, acc5 = accuracy(output, labels, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0].item(), images.size(0)) top5.update(acc5[0].item(), images.size(0)) t.set_postfix({ 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'img_size': images.size(2), }) t.update(1) return losses.avg, top1.avg, top5.avg
def validate(self, epoch=0, is_test=True, run_str='', net=None, data_loader=None, no_logs=False): if net is None: net = self.net if data_loader is None: if is_test: data_loader = self.run_config.test_loader else: data_loader = self.run_config.valid_loader net.eval() losses = DistributedMetric('val_loss') top1 = DistributedMetric('val_top1') top5 = DistributedMetric('val_top5') with torch.no_grad(): with tqdm(total=len(data_loader), desc='Validate Epoch #{} {}'.format(epoch + 1, run_str), disable=no_logs or not self.is_root) as t: for i, (images, labels) in enumerate(data_loader): images, labels = images.cuda(), labels.cuda() # compute output output = net(images) loss = self.test_criterion(output, labels) # measure accuracy and record loss acc1, acc5 = accuracy(output, labels, topk=(1, 5)) losses.update(loss, images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) t.set_postfix({ 'loss': losses.avg.item(), 'top1': top1.avg.item(), 'top5': top5.avg.item(), 'img_size': images.size(2), }) t.update(1) return losses.avg.item(), top1.avg.item(), top5.avg.item()
criterion = nn.CrossEntropyLoss().cuda() net.eval() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() with torch.no_grad(): with tqdm(total=len(data_loader), desc='Validate') as t: for i, (images, labels) in enumerate(data_loader): images, labels = images.cuda(), labels.cuda() # compute output output = net(images) loss = criterion(output, labels) # measure accuracy and record loss acc1, acc5 = accuracy(output, labels, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0].item(), images.size(0)) top5.update(acc5[0].item(), images.size(0)) t.set_postfix({ 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'img_size': images.size(2), }) t.update(1) print('Test OFA specialized net <%s> with image size %d:' % (args.net, image_size)) print('Results: loss=%.5f,\t top1=%.1f,\t top5=%.1f' % (losses.avg, top1.avg, top5.avg))
def train_one_epoch(run_manager, args, epoch, warmup_epochs=0, warmup_lr=0): dynamic_net = run_manager.net # switch to train mode dynamic_net.train() run_manager.run_config.train_loader.sampler.set_epoch(epoch) MyRandomResizedCrop.EPOCH = epoch nBatch = len(run_manager.run_config.train_loader) data_time = AverageMeter() losses = DistributedMetric('train_loss') top1 = DistributedMetric('train_top1') top5 = DistributedMetric('train_top5') with tqdm(total=nBatch, desc='Train Epoch #{}'.format(epoch + 1), disable=not run_manager.is_root) as t: end = time.time() for i, (images, labels) in enumerate(run_manager.run_config.train_loader): data_time.update(time.time() - end) if epoch < warmup_epochs: new_lr = run_manager.run_config.warmup_adjust_learning_rate( run_manager.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr, ) else: new_lr = run_manager.run_config.adjust_learning_rate( run_manager.optimizer, epoch - warmup_epochs, i, nBatch) images, labels = images.cuda(), labels.cuda() target = labels # soft target if args.kd_ratio > 0: args.teacher_model.train() with torch.no_grad(): soft_logits = args.teacher_model(images).detach() soft_label = F.softmax(soft_logits, dim=1) # clear gradients run_manager.optimizer.zero_grad() loss_of_subnets, acc1_of_subnets, acc5_of_subnets = [], [], [] # compute output subnet_str = '' for _ in range(args.dynamic_batch_size): # set random seed before sampling if args.independent_distributed_sampling: subnet_seed = os.getpid() + time.time() else: subnet_seed = int('%d%.3d%.3d' % (epoch * nBatch + i, _, 0)) random.seed(subnet_seed) subnet_settings = dynamic_net.sample_active_subnet() subnet_str += '%d: ' % _ + ','.join([ '%s_%s' % (key, '%.1f' % subset_mean(val, 0) if isinstance(val, list) else val) for key, val in subnet_settings.items() ]) + ' || ' output = run_manager.net(images) if args.kd_ratio == 0: loss = run_manager.train_criterion(output, labels) loss_type = 'ce' else: if args.kd_type == 'ce': kd_loss = cross_entropy_loss_with_soft_target( output, soft_label) else: kd_loss = F.mse_loss(output, soft_logits) loss = args.kd_ratio * kd_loss + run_manager.train_criterion( output, labels) loss = loss * (2 / (args.kd_ratio + 1)) loss_type = '%.1fkd-%s & ce' % (args.kd_ratio, args.kd_type) # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) loss_of_subnets.append(loss) acc1_of_subnets.append(acc1[0]) acc5_of_subnets.append(acc5[0]) loss.backward() run_manager.optimizer.step() losses.update(list_mean(loss_of_subnets), images.size(0)) top1.update(list_mean(acc1_of_subnets), images.size(0)) top5.update(list_mean(acc5_of_subnets), images.size(0)) t.set_postfix({ 'loss': losses.avg.item(), 'top1': top1.avg.item(), 'top5': top5.avg.item(), 'R': images.size(2), 'lr': new_lr, 'loss_type': loss_type, 'seed': str(subnet_seed), 'str': subnet_str, 'data_time': data_time.avg, }) t.update(1) end = time.time() return losses.avg.item(), top1.avg.item(), top5.avg.item()
def update_metric(self, metric_dict, output, labels): acc1, acc5 = accuracy(output, labels, topk=(1, 5)) metric_dict['top1'].update(acc1[0].item(), output.size(0)) metric_dict['top5'].update(acc5[0].item(), output.size(0))
def train_one_epoch(self, args, epoch, warmup_epochs=5, warmup_lr=0): self.net.train() self.run_config.train_loader.sampler.set_epoch(epoch) MyRandomResizedCrop.EPOCH = epoch nBatch = len(self.run_config.train_loader) losses = DistributedMetric('train_loss') top1 = DistributedMetric('train_top1') top5 = DistributedMetric('train_top5') data_time = AverageMeter() with tqdm(total=nBatch, desc='Train Epoch #{}'.format(epoch + 1), disable=not self.is_root) as t: end = time.time() for i, (images, labels) in enumerate(self.run_config.train_loader): data_time.update(time.time() - end) if epoch < warmup_epochs: new_lr = self.run_config.warmup_adjust_learning_rate( self.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr, ) else: new_lr = self.run_config.adjust_learning_rate(self.optimizer, epoch - warmup_epochs, i, nBatch) images, labels = images.cuda(), labels.cuda() target = labels # soft target if args.teacher_model is not None: args.teacher_model.train() with torch.no_grad(): soft_logits = args.teacher_model(images).detach() soft_label = F.softmax(soft_logits, dim=1) # compute output output = self.net(images) if args.teacher_model is None: loss = self.train_criterion(output, labels) loss_type = 'ce' else: if args.kd_type == 'ce': kd_loss = cross_entropy_loss_with_soft_target(output, soft_label) else: kd_loss = F.mse_loss(output, soft_logits) loss = args.kd_ratio * kd_loss + self.train_criterion(output, labels) loss_type = '%.1fkd-%s & ce' % (args.kd_ratio, args.kd_type) # update self.optimizer.zero_grad() loss.backward() self.optimizer.step() # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss, images.size(0)) top1.update(acc1[0], images.size(0)) top5.update(acc5[0], images.size(0)) t.set_postfix({ 'loss': losses.avg.item(), 'top1': top1.avg.item(), 'top5': top5.avg.item(), 'img_size': images.size(2), 'lr': new_lr, 'loss_type': loss_type, 'data_time': data_time.avg, }) t.update(1) end = time.time() return losses.avg.item(), top1.avg.item(), top5.avg.item()
def train_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0): # switch to train mode self.net.train() nBatch = len(self.run_config.train_loader) losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() data_time = AverageMeter() with tqdm(total=nBatch, desc='Train Epoch #{}'.format(epoch + 1)) as t: end = time.time() for i, (images, labels) in enumerate(self.run_config.train_loader): data_time.update(time.time() - end) if epoch < warmup_epochs: new_lr = self.run_config.warmup_adjust_learning_rate( self.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr, ) else: new_lr = self.run_config.adjust_learning_rate(self.optimizer, epoch - warmup_epochs, i, nBatch) images, labels = images.to(self.device), labels.to(self.device) target = labels # soft target if args.teacher_model is not None: args.teacher_model.train() with torch.no_grad(): soft_logits = args.teacher_model(images).detach() soft_label = F.softmax(soft_logits, dim=1) # compute output if isinstance(self.network, torchvision.models.Inception3): output, aux_outputs = self.net(images) loss1 = self.train_criterion(output, labels) loss2 = self.train_criterion(aux_outputs, labels) loss = loss1 + 0.4 * loss2 else: output = self.net(images) loss = self.train_criterion(output, labels) if args.teacher_model is None: loss_type = 'ce' else: if args.kd_type == 'ce': kd_loss = cross_entropy_loss_with_soft_target(output, soft_label) else: kd_loss = F.mse_loss(output, soft_logits) loss = args.kd_ratio * kd_loss + loss loss_type = '%.1fkd-%s & ce' % (args.kd_ratio, args.kd_type) # compute gradient and do SGD step self.net.zero_grad() # or self.optimizer.zero_grad() if self.mix_prec is not None: from apex import amp with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() self.optimizer.step() # measure accuracy and record loss acc1, acc5 = accuracy(output, target, topk=(1, 5)) losses.update(loss.item(), images.size(0)) top1.update(acc1[0].item(), images.size(0)) top5.update(acc5[0].item(), images.size(0)) t.set_postfix({ 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'img_size': images.size(2), 'lr': new_lr, 'loss_type': loss_type, 'data_time': data_time.avg, }) t.update(1) end = time.time() return losses.avg, top1.avg, top5.avg