def eval_net(net,args): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() if args.validating: print('Validating at epoch {}'.format(args.epoch + 1)) if args.testing: print('Testing at epoch {}'.format(args.epoch + 1)) if not args.__contains__('validating'): args.validating = False if not args.__contains__('testing'): args.testing = False net.eval() total = 1e-3 total_time = 0 end_time = time.time() for batch_idx, (inputs, targets) in enumerate(args.data_loader): with torch.no_grad(): if args.use_gpu: targets = targets.cuda() outputs = net(inputs) if type(outputs) is list: outputs = outputs[-1] if args.loss == 'CE': loss = args.criterion(outputs, targets) # .mean() elif args.loss == 'L2': from util import targets_to_one_hot targets_one_hot = targets_to_one_hot(targets, args.num_outputs) loss = args.criterion(outputs, targets_one_hot)*args.num_outputs*0.5 losses.update(loss.item(), inputs.size(0)) prec1, prec5 = accuracy(outputs, targets, topk=(1, 5)) top1.update(prec1[0].item(), inputs.size(0)) top5.update(prec5[0].item(), inputs.size(0)) total_time += (time.time() - end_time) end_time = time.time() if args.msg: print('Loss: %.3f | top1: %.3f%% ,top5: %.3f%%' % (losses.avg, top1.avg, top5.avg)) if args.testing: args.test_losses.append(losses.avg) args.test_accuracies.append(top1.avg) args.test_epoch_logger.log({ 'epoch': (args.epoch + 1), 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'time': total_time, }) if args.validating: args.valid_losses.append(losses.avg) args.valid_accuracies.append(top1.avg) args.valid_epoch_logger.log({ 'epoch': (args.epoch + 1), 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'time': total_time, }) # Save checkpoint. is_best=(top1.avg > args.best_acc) if is_best: args.best_acc = top1.avg states = { 'state_dict': net.module.state_dict() if hasattr(net,'module') else net.state_dict(), 'epoch': args.epoch+1, 'arch': args.arch, 'best_acc': args.best_acc, 'train_losses': args.train_losses, 'optimizer': args.current_optimizer.state_dict() } if args.__contains__('acc'): states['acc']=top1.avg, if args.__contains__('valid_losses'): states['valid_losses']=args.valid_losses if args.__contains__('test_losses'): states['test_losses'] = args.test_losses if (args.checkpoint_epoch > 0): if not os.path.isdir(args.checkpoint_path): os.mkdir(args.checkpoint_path) save_file_path = os.path.join(args.checkpoint_path, 'checkpoint.pth.tar') torch.save(states, save_file_path) if is_best: shutil.copyfile(save_file_path, os.path.join(args.checkpoint_path,'model_best.pth.tar')) print('Loss: %.3f | top1: %.3f%%, top5: %.3f%%, elasped time: %3.f seconds. Best Acc: %.3f%%' % (losses.avg , top1.avg, top5.avg, total_time, args.best_acc))
def train_net(net,args): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() print('training at epoch {}'.format(args.epoch+1)) net.train() total_time=0 data_time=0 total=1e-3 correct=0 extra=0. optimizer=args.current_optimizer end_time = time.time() for batch_idx, (inputs, targets) in enumerate(args.data_loader): #ff if args.use_gpu: targets = targets.cuda() data_time += (time.time() - end_time)#loading time outputs = net(inputs) maps=None if type(outputs) is list: maps=outputs outputs=outputs[-1] #loss = args.criterion(outputs, targets).mean() if args.loss=='CE': loss = args.criterion(outputs, targets)#.mean() elif args.loss=='L2': from util import targets_to_one_hot targets_one_hot=targets_to_one_hot(targets,args.num_outputs) loss = args.criterion(outputs, targets_one_hot)*args.num_outputs*0.5 losses.update(loss.item(), inputs.size(0)) prec1, prec5 = accuracy(outputs.data, targets, topk=(1, 5)) top1.update(prec1[0].item(), inputs.size(0)) top5.update(prec5[0].item(), inputs.size(0)) #bp loss.backward() optimizer.step() if args.lr_scheduler == 'cosine': args.current_scheduler.step() if args.tensorboard: if args.logger_n_iter%args.print_freq==0: args.writer.add_scalar('loss', loss.item(), args.logger_n_iter ) args.logger_n_iter += 1 optimizer.zero_grad() # flush total_time += (time.time() - end_time) end_time = time.time() if args.msg: print('Loss: %.3f | top1: %.3f%% ,top5: %.3f%%' % (losses.avg, top1.avg, top5.avg)) args.train_batch_logger.log({ 'epoch': (args.epoch+1), 'batch': batch_idx + 1, 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'time': total_time, }) args.train_epoch_logger.log({ 'epoch': (args.epoch+1), 'loss': losses.avg, 'top1': top1.avg, 'top5':top5.avg, 'time': total_time, }) print('Loss: %.3f | top1: %.3f%%, top5: %.3f%% elasped time: %3.f seconds.' % (losses.avg, top1.avg, top5.avg, total_time)) args.train_accuracies.append(top1.avg) args.train_losses.append(losses.avg)
def train_net(net, opts): top1 = AverageMeter() top5 = AverageMeter() losses = AverageMeter() print('training at epoch {}'.format(opts.epoch + 1)) net.train() total_time = 0 data_time = 0 total = 1e-3 correct = 0 extra = 0. optimizer = opts.current_optimizer end_time = time.time_ns() for batch_idx, (inputs, targets) in enumerate(opts.data_loader): #ff if opts.use_gpu: targets = targets.cuda() data_time += (time.time() - end_time) #loading time outputs = net(inputs) maps = None if type(outputs) is list: maps = outputs outputs = outputs[-1] #loss = opts.criterion(outputs, targets).mean() if opts.loss == 'CE': loss = opts.criterion(outputs, targets) #.mean() elif opts.loss == 'L2': from util import targets_to_one_hot targets_one_hot = targets_to_one_hot(targets, opts.num_outputs) loss = opts.criterion(outputs, targets_one_hot) #.mean() losses.update(loss.item(), inputs.size(0)) prec1, prec5 = accuracy(outputs.data, targets, topk=(1, 5)) top1.update(prec1[0].item(), inputs.size(0)) top5.update(prec5[0].item(), inputs.size(0)) #bp loss.backward() if opts.lr_scheduler == 'cosine': opts.current_scheduler.step() optimizer.step() if opts.tensorboard and (maps is not None): if batch_idx % opts.viz_T == 0: # Log images (image summary) for l in range(len(maps) - 1): c, h, w = maps[l][0].shape tmp = maps[l][0].detach().permute([1, 0, 2]).contiguous().view( h, w * c).cpu() opts.writer.image_summary( 'train maps {}'.format(l), [tensor2array(tmp, max_value=None, colormap='bone')], opts.logger_n_iter) # Log values and gradients of the parameters (histogram summary) for tag, value in net.named_parameters(): #print(tag) tag = tag.replace('.', '/') opts.writer.histo_summary(tag, value.data.cpu().numpy(), opts.logger_n_iter) if hasattr(value.grad, 'data'): opts.writer.histo_summary( tag + '/grad', value.grad.data.cpu().numpy(), opts.logger_n_iter) if opts.tensorboardX: if opts.logger_n_iter % opts.viz_T == 0: opts.writer.add_scalar('loss', loss.item(), opts.logger_n_iter) opts.logger_n_iter += 1 optimizer.zero_grad() # flush total_time += (time.time_ns() - end_time) / (10**9) end_time = time.time_ns() if opts.msg: print('Loss: %.3f | top1: %.3f%% ,top5: %.3f%%' % (losses.avg, top1.avg, top5.avg)) opts.train_batch_logger.log({ 'epoch': (opts.epoch + 1), 'batch': batch_idx + 1, 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'time': total_time, }) opts.train_epoch_logger.log({ 'epoch': (opts.epoch + 1), 'loss': losses.avg, 'top1': top1.avg, 'top5': top5.avg, 'time': total_time, }) print( 'Loss: %.3f | top1: %.3f%%, top5: %.3f%% elasped time: %3.f seconds.' % (losses.avg, top1.avg, top5.avg, total_time)) opts.train_accuracies.append(top1.avg) opts.train_losses.append(losses.avg)