def train(train_loader, model, criterion, optimizer, epoch, log, attacker=None, adv_train=False): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() losses_adv = AverageMeter() top1_adv = AverageMeter() top5_adv = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: # the copy will be asynchronous with respect to the host. target = target.cuda(async=True) input = input.cuda() # compute output for clean data input output = model(input) loss = criterion(output, target) pred_target = output.max(1, keepdim=True)[1].squeeze(-1) # perturb data inference if adv_train and (attacker is not None): model_cp = copy.deepcopy(model) perturbed_data = attacker.attack_method(model_cp, input, pred_target) output_adv = model(perturbed_data) loss_adv = criterion(output_adv, target) loss = 0.5 * loss + 0.5 * loss_adv prec1_adv, prec5_adv = accuracy(output_adv.data, target, topk=(1, 5)) losses_adv.update(loss_adv.item(), input.size(0)) top1_adv.update(prec1_adv.item(), input.size(0)) top5_adv.update(prec5_adv.item(), input.size(0)) # measure accuracy and record the total loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) print_log( ' **Adversarial Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1_adv, top5=top5_adv, error1=100 - top1_adv.avg), log) return top1.avg, losses.avg, top1_adv.avg, losses_adv.avg
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # print(target.size()) if args.use_cuda: # the copy will be asynchronous with respect to the host. target = target.cuda(non_blocking=True) input = input.cuda() # print(input.size()) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) top5.update(prec5.item(), input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print_log( ' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) print_log( ' **Train** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}' .format(top1=top1, top5=top5, error1=100 - top1.avg), log) return top1.avg, losses.avg
def main(): # Init logger if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open( os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init the tensorboard path and writer tb_path = os.path.join(args.save_path, 'tb_log') writer = SummaryWriter(tb_path) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) # mean and standard deviation to be used for normalization if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] elif args.dataset == 'svhn': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'mnist': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'imagenet': mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: assert False, "Unknow dataset : {}".format(args.dataset) # Current data-preprocessing does not include the normalization imagenet_train_transform = [ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ] imagenet_test_transform = [ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor() ] normal_train_transform = [ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor() ] normal_test_transform = [transforms.ToTensor()] # if not performing the adversarial training or evalutaion, we append # the normalization back to the preprocessing if not (args.adv_train or args.adv_eval): imagenet_train_transform.append(transforms.Normalize(mean, std)) imagenet_test_transform.append(transforms.Normalize(mean, std)) normal_train_transform.append(transforms.Normalize(mean, std)) normal_test_transform.append(transforms.Normalize(mean, std)) if args.dataset == 'imagenet': train_transform = transforms.Compose(imagenet_train_transform) test_transform = transforms.Compose(imagenet_test_transform) else: train_transform = transforms.Compose(normal_train_transform) test_transform = transforms.Compose(normal_test_transform) if args.dataset == 'mnist': train_data = dset.MNIST(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.MNIST(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': train_dir = os.path.join(args.data_path, 'train') test_dir = os.path.join(args.data_path, 'val') train_data = dset.ImageFolder(train_dir, transform=train_transform) test_data = dset.ImageFolder(test_dir, transform=test_transform) num_classes = 1000 else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net_c = models.__dict__[args.arch](num_classes) # For adversarial case, override the original network with normalization layer if (args.adv_train or args.adv_eval): if not args.input_noise: net = torch.nn.Sequential(Normalize_layer(mean, std), net_c) else: net = torch.nn.Sequential(noise_Normalize_layer(mean, std), net_c) else: net = net_c print_log("=> network :\n {}".format(net), log) if args.use_cuda: if args.ngpu > 1: net = torch.nn.DataParallel(net, device_ids=list(range(args.ngpu))) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() # separate the model parameters since we want the trainable # noise scaling coefficient is free from the weight penalty (weight decay) normal_param = [ param for name, param in net.named_parameters() if not 'alpha_' in name ] # this is the parameters do not contain noise scale coefficient alpha_param = [ param for name, param in net.named_parameters() if 'alpha_' in name ] if args.optimizer == "SGD": print("using SGD as optimizer") optimizer = torch.optim.SGD([{ 'params': normal_param }, { 'params': alpha_param, 'weight_decay': 0 }], lr=state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) elif args.optimizer == "Adam": print("using Adam as optimizer") optimizer = torch.optim.Adam([{ 'params': normal_param }, { 'params': alpha_param, 'weight_decay': 0 }], lr=state['learning_rate'], weight_decay=state['decay']) elif args.optimizer == "RMSprop": print("using RMSprop as optimizer") optimizer = torch.optim.RMSprop([{ 'params': normal_param }, { 'params': alpha_param, 'weight_decay': 0 }], lr=state['learning_rate'], alpha=0.99, eps=1e-08, weight_decay=0, momentum=0) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # count number of epoches # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) if not (args.fine_tune): args.start_epoch = checkpoint['epoch'] recorder = checkpoint['recorder'] optimizer.load_state_dict(checkpoint['optimizer']) state_tmp = net.state_dict() if 'state_dict' in checkpoint.keys(): state_tmp.update(checkpoint['state_dict']) else: state_tmp.update(checkpoint) net.load_state_dict(state_tmp) print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log( "=> do not use any checkpoint for {} model".format(args.arch), log) # initialize the attacker object model_attack = Attack(dataloader=train_loader, attack_method='pgd', epsilon=0.031) if args.evaluate: validate(test_loader, net, criterion, log, attacker=model_attack, adv_eval=args.adv_eval) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate, current_momentum = adjust_learning_rate( optimizer, epoch, args.gammas, args.schedule) # Display simulation time need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.4f}][M={:1.2f}]'. format(time_string(), epoch, args.epochs, need_time, current_learning_rate, current_momentum) + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format( recorder.max_accuracy(False), 100 - recorder.max_accuracy(False)), log) # delay the adversarial training after the preset number of epochs start_adv_train = False if epoch >= args.epoch_delay: start_adv_train = (True and args.adv_train) train_acc, train_los, train_adv_acc, train_adv_los = train( train_loader, net, criterion, optimizer, epoch, log, attacker=model_attack, adv_train=start_adv_train) # evaluate on validation set val_acc, val_los, val_pgd_acc, val_pgd_los, val_fgsm_acc, val_fgsm_los = validate( test_loader, net, criterion, log, attacker=model_attack, adv_eval=args.adv_eval) recorder.update(epoch, train_los, train_acc, val_los, val_acc) is_best = (val_acc >= recorder.max_accuracy(False)) if args.model_only: checkpoint_state = {'state_dict': net.state_dict} else: checkpoint_state = { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), } save_checkpoint(checkpoint_state, is_best, args.save_path, 'checkpoint.pth.tar', log) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(args.save_path, 'curve.png')) # save addition accuracy log for plotting accuracy_logger(base_dir=args.save_path, epoch=epoch, train_accuracy=train_acc, test_accuracy=val_acc) # ============ TensorBoard logging ============# # Log the graidents distribution for name, param in net.named_parameters(): name = name.replace('.', '/') writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch + 1, bins='tensorflow') if param.grad is not None: writer.add_histogram(name + '/grad', param.grad.clone().cpu().data.numpy(), epoch + 1, bins='tensorflow') # ## Log the weight and bias distribution for name, module in net.named_modules(): name = name.replace('.', '/') class_name = str(module.__class__).split('.')[-1].split("'")[0] if hasattr(module, 'alpha_w'): if module.alpha_w is not None: if module.pni is 'layerwise': writer.add_scalar(name + '/alpha/', module.alpha_w.clone().item(), epoch + 1) elif module.pni is 'channelwise': writer.add_histogram( name + '/alpha/', module.alpha_w.clone().cpu().data.numpy(), epoch + 1, bins='tensorflow') writer.add_scalar('loss/train_loss', train_los, epoch + 1) writer.add_scalar('loss/test_loss', val_los, epoch + 1) writer.add_scalar('accuracy/train_accuracy', train_acc, epoch + 1) writer.add_scalar('accuracy/test_accuracy', val_acc, epoch + 1) if args.adv_train: writer.add_scalar('loss/adv_train_loss', train_adv_los, epoch + 1) writer.add_scalar('accuracy/adv_train_accuracy', train_adv_acc, epoch + 1) if args.adv_eval: writer.add_scalar('loss/pgd_test_loss', val_pgd_los, epoch + 1) writer.add_scalar('accuracy/pgd_test_accuracy', val_pgd_acc, epoch + 1) writer.add_scalar('loss/fgsm_test_loss', val_fgsm_los, epoch + 1) writer.add_scalar('accuracy/fgsm_test_accuracy', val_fgsm_acc, epoch + 1) # ============ TensorBoard logging ============# log.close()
def main(): # Init logger6 if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open( os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) print_log("Weight Decay: {}".format(args.decay), log) # Init the tensorboard path and writer tb_path = os.path.join(args.save_path, 'tb_log') logger = Logger(tb_path) writer = SummaryWriter(tb_path) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] elif args.dataset == 'svhn': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'mnist': mean = [0.5, 0.5, 0.5] std = [0.5, 0.5, 0.5] elif args.dataset == 'imagenet': mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] elif args.dataset == 'yawnDD': print(f'YawnDD dataset!') elif args.dataset == 'eyeclosure': print(f'eyeclosure dataset!') else: assert False, "Unknow dataset : {}".format(args.dataset) if args.dataset == 'imagenet': train_transform = transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.2, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean, std) ]) # here is actually the validation dataset elif not args.dataset in ['yawnDD', 'eyeclosure']: train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std) ]) test_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std) ]) if args.dataset == 'mnist': train_data = dset.MNIST(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.MNIST(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': train_dir = os.path.join(args.data_path, 'train') test_dir = os.path.join(args.data_path, 'val') train_data = dset.ImageFolder(train_dir, transform=train_transform) test_data = dset.ImageFolder(test_dir, transform=test_transform) num_classes = 1000 elif args.dataset == 'yawnDD': dataset = torch.load('./yawning_dataset/yawnDD_image.pt') / 255 target = torch.load('./yawning_dataset/yawnDD_label.pt').long() / 255 dataset = dataset.view(dataset.size(0), dataset.size(3), dataset.size(2), dataset.size(2)) train_dataset = dataset[:int(0.8 * dataset.size(0))] train_target = target[:int(0.8 * dataset.size(0))] test_dataset = dataset[-int(0.2 * dataset.size(0)):] test_target = target[-int(0.2 * dataset.size(0)):] train_data = torch.utils.data.TensorDataset(train_dataset, train_target) test_data = torch.utils.data.TensorDataset(test_dataset, test_target) num_classes = 2 elif args.dataset == 'eyeclosure': train_dataset = torch.load('./eyeclosure/eyeclosure_train_data.pt') train_label = torch.load( './eyeclosure/eyeclosure_train_label.pt').long() test_dataset = torch.load('./eyeclosure/eyeclosure_test_data.pt') test_label = torch.load('./eyeclosure/eyeclosure_test_label.pt').long() train_dataset = train_dataset.view(train_dataset.size(0), train_dataset.size(3), train_dataset.size(2), train_dataset.size(2)) test_dataset = test_dataset.view(test_dataset.size(0), test_dataset.size(3), test_dataset.size(2), test_dataset.size(2)) train_data = torch.utils.data.TensorDataset(train_dataset, train_label) test_data = torch.utils.data.TensorDataset(test_dataset, test_label) num_classes = 2 else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print_log("=> creating model '{}'".format(args.arch), log) # Init model, criterion, and optimizer net = models.__dict__[args.arch](num_classes) print_log("=> network :\n {}".format(net), log) if args.use_cuda: if args.ngpu > 1: # net = torch.nn.DataParallel(net, device_ids=[1,2]) net = torch.nn.DataParallel(net) # define loss function (criterion) and optimizer criterion = torch.nn.CrossEntropyLoss() if args.optimizer == "SGD": print("using SGD as optimizer") optimizer = torch.optim.SGD(filter(lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) elif args.optimizer == "Adam": print("using Adam as optimizer") optimizer = torch.optim.Adam(filter(lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], weight_decay=state['decay']) elif args.optimizer == "YF": print("using YellowFin as optimizer") optimizer = YFOptimizer(filter(lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], mu=state['momentum'], weight_decay=state['decay']) elif args.optimizer == "RMSprop": print("using RMSprop as optimizer") optimizer = torch.optim.RMSprop(filter( lambda param: param.requires_grad, net.parameters()), lr=state['learning_rate'], alpha=0.99, eps=1e-08, weight_decay=0, momentum=0) if args.use_cuda: net.cuda() criterion.cuda() recorder = RecorderMeter(args.epochs) # count number of epoches for name, value in net.named_parameters(): print(name) # optionally resume from a checkpoint if args.resume: new_state_dict = OrderedDict() if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) if not (args.fine_tune): args.start_epoch = checkpoint['epoch'] recorder = checkpoint['recorder'] optimizer.load_state_dict(checkpoint['optimizer']) state_tmp = net.state_dict() for k, v in checkpoint['state_dict'].items(): print(k) name = k # name = k[7:] print(name) new_state_dict[name] = v if 'state_dict' in checkpoint.keys(): #state_tmp.update(new_state_dict['state_dict']) state_tmp.update(new_state_dict) else: print('loading from pth file not tar file') state_tmp.update(new_state_dict) #state_tmp.update(checkpoint) net.load_state_dict(state_tmp) #net.load_state_dict(torch.load('save/mobilenetv2_1.pth')) # net.load_state_dict(checkpoint['state_dict']) print_log( "=> loaded checkpoint '{}' (epoch {})".format( args.resume, args.start_epoch), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log( "=> do not use any checkpoint for {} model".format(args.arch), log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate, current_momentum = adjust_learning_rate( optimizer, epoch, args.gammas, args.schedule) # Display simulation time need_hour, need_mins, need_secs = convert_secs2time( epoch_time.avg * (args.epochs - epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format( need_hour, need_mins, need_secs) print_log( '\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [LR={:6.5f}][M={:1.2f}]'. format(time_string(), epoch, args.epochs, need_time, current_learning_rate, current_momentum) + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format( recorder.max_accuracy(False), 100 - recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set val_acc, val_los = validate(test_loader, net, criterion, log) is_best = val_acc > recorder.max_accuracy(istrain=False) recorder.update(epoch, train_los, train_acc, val_los, val_acc) if args.model_only: checkpoint_state = {'state_dict': net.state_dict()} else: checkpoint_state = { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer': optimizer.state_dict(), } save_checkpoint(checkpoint_state, is_best, args.save_path, f'checkpoint.pth.tar', log) # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve(os.path.join(args.save_path, 'curve.png')) log.close()