def train_Epoch(args, state_info, Train_loader, Test_loader): # all start_time = time.time() best_prec_result = torch.tensor(0, dtype=torch.float32) mode = args.model utils.default_model_dir = os.path.join(args.dir, mode) start_epoch = 0 checkpoint = None checkpoint = utils.load_checkpoint(utils.default_model_dir) if not checkpoint: args.last_epoch = -1 state_info.learning_scheduler_init(args, mode) else: print("loading {}/{}".format(utils.default_model_dir, "checkpoint_best.pth.tar")) state_info.load_state_dict(checkpoint, mode) state_info.learning_scheduler_init(args, mode) utils.default_model_dir = os.path.join(utils.default_model_dir, "cls") for epoch in range(0, args.epoch): epoch_result = train(args, state_info, Train_loader, Test_loader, epoch) if epoch_result > best_prec_result: best_prec_result = epoch_result utils.save_state_checkpoint(state_info, best_prec_result, epoch, 'checkpoint_best.pth.tar', utils.default_model_dir) print('save..') if args.use_switch and epoch % args.iter == args.iter - 1: utils.switching_learning(state_info.model.module) print('learning Gate') epoch_result = train(args, state_info, Train_loader, Test_loader, epoch) if epoch_result > best_prec_result: best_prec_result = epoch_result utils.save_state_checkpoint(state_info, best_prec_result, epoch, 'checkpoint_best.pth.tar', utils.default_model_dir) print('save..') utils.switching_learning(state_info.model.module) print('learning Base') state_info.lr_model.step() utils.print_log('') now = time.gmtime(time.time() - start_time) utils.print_log('Best Prec : {:.4f}'.format(best_prec_result.item())) utils.print_log('{} hours {} mins {} secs for training'.format( now.tm_hour, now.tm_min, now.tm_sec))
def main(model_dir, model, dataset): utils.default_model_dir = model_dir utils.c = None utils.str_w = '' # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) utils.init_learning(model.module) for epoch in range(start_epoch, 350): if epoch < 150: learning_rate = lr elif epoch < 250: learning_rate = lr * 0.1 else: learning_rate = lr * 0.01 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate train(model, optimizer, criterion, train_loader, epoch, True) test(model, criterion, test_loader, epoch, True) utils.switching_learning(model.module) print('switching_learning to Gate') train(model, optimizer, criterion, train_loader, epoch, False) test(model, criterion, test_loader, epoch, False) utils.switching_learning(model.module) print('switching_learning to Gate') if epoch % 5 == 0: model_filename = 'checkpoint_%03d.pth.tar' % epoch utils.save_checkpoint({ 'epoch': epoch, 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, model_filename, model_dir) now = time.gmtime(time.time() - start_time) weight_extract(model, optimizer, criterion, train_loader, epoch) utils.conv_weight_L1_printing(model.module) print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
def main(model_dir, model, dataset, layer_name, layer_n): utils.default_model_dir = model_dir utils.c = None utils.str_w = '' # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) utils.init_learning(model.module) for epoch in range(start_epoch, 165): # change 165 if epoch < 80: learning_rate = lr elif epoch < 120: learning_rate = lr * 0.1 else: learning_rate = lr * 0.01 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate train(model, optimizer, criterion, train_loader, epoch, True) test(model, criterion, test_loader, epoch, True) utils.switching_learning(model.module) print('switching_learning to Gate') train(model, optimizer, criterion, train_loader, epoch, False) test(model, criterion, test_loader, epoch, False) utils.switching_learning(model.module) print('switching_learning to Gate') model_filename = 'checkpoint_%03d.pth.tar' % epoch utils.save_checkpoint({ 'epoch': epoch, 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, model_filename, model_dir) now = time.gmtime(time.time() - start_time) weight_extract(model, optimizer, criterion, train_loader, epoch) class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv() _, index = torch.sort(total_average) layer_name = utils.make_layer_name(layer_n) for i in index: # weight delete layer_name[i] utils.weight_pruning_by_name(model.module, layer_name[i]) test(model, criterion, test_loader, epoch, True) change index[0] to 'layerN and layerN-M' # index about (smallest) index[0], index[1], .... (biggest) # layer name change, layer0 to layer'n-1' # find 'layer' + str(index[0]) from model.module # and change self.z to 0 # utils.conv_weight_L1_printing(model.module) print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
def main(): global args, best_prec1 args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') args.distributed = args.world_size > 1 # create model print("=> creating model '{}'".format(args.arch)) model = vision_model.resnet50() model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None # train_loader = torch.utils.data.DataLoader( # train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), # num_workers=args.workers, pin_memory=True, sampler=train_sampler) train_loader = imagenet_seq.data.Loader( 'train', batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, cuda=True) # val_loader = torch.utils.data.DataLoader( # datasets.ImageFolder(valdir, transforms.Compose([ # transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # normalize, # ])), # batch_size=args.batch_size, shuffle=False, # num_workers=args.workers, pin_memory=True) val_loader = imagenet_seq.data.Loader( 'val', batch_size=args.batch_size, shuffle=False, num_workers=args.workers, cuda=True) if args.evaluate: validate(val_loader, model, criterion, is_main=True) return utils.init_learning(model.module) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, is_main=True) # evaluate on validation set prec1 = validate(val_loader, model, criterion, is_main=True) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best) # if epoch % 3 == 2: # for i in range(3): utils.switching_learning(model.module) train(train_loader, model, criterion, optimizer, epoch, is_main=False) # evaluate on validation set prec1 = validate(val_loader, model, criterion, is_main=False) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best) utils.switching_learning(model.module)