def main(model_dir, model, dataset): utils.default_model_dir = model_dir # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) for epoch in range(start_epoch, 165): if epoch < 80: learning_rate = lr elif epoch < 120: learning_rate = lr * 0.1 else: learning_rate = lr * 0.01 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate train(model, optimizer, criterion, train_loader, epoch) test(model, criterion, test_loader, epoch) if epoch % 5 == 0: model_filename = 'checkpoint_%03d.pth.tar' % epoch utils.save_checkpoint({ 'epoch': epoch, 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, model_filename, model_dir) utils.conv_weight_L1_printing(model.module) now = time.gmtime(time.time() - start_time) print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
def main(model_dir, model, dataset): utils.default_model_dir = model_dir utils.c = None utils.str_w = '' # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) utils.init_learning(model.module) weight_extract_test(model, criterion, test_loader)
def main(model_dir, model, dataset, layer_name, layer_n): utils.default_model_dir = model_dir utils.c = None utils.str_w = '' # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) utils.init_learning(model.module) for epoch in range(start_epoch, 165): # change 165 if epoch < 80: learning_rate = lr elif epoch < 120: learning_rate = lr * 0.1 else: learning_rate = lr * 0.01 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate train(model, optimizer, criterion, train_loader, epoch, True) test(model, criterion, test_loader, epoch, True) utils.switching_learning(model.module) print('switching_learning to Gate') train(model, optimizer, criterion, train_loader, epoch, False) test(model, criterion, test_loader, epoch, False) utils.switching_learning(model.module) print('switching_learning to Gate') model_filename = 'checkpoint_%03d.pth.tar' % epoch utils.save_checkpoint({ 'epoch': epoch, 'model': model, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, model_filename, model_dir) now = time.gmtime(time.time() - start_time) weight_extract(model, optimizer, criterion, train_loader, epoch) class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv() _, index = torch.sort(total_average) layer_name = utils.make_layer_name(layer_n) for i in index: # weight delete layer_name[i] utils.weight_pruning_by_name(model.module, layer_name[i]) test(model, criterion, test_loader, epoch, True) change index[0] to 'layerN and layerN-M' # index about (smallest) index[0], index[1], .... (biggest) # layer name change, layer0 to layer'n-1' # find 'layer' + str(index[0]) from model.module # and change self.z to 0 # utils.conv_weight_L1_printing(model.module) print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))
def main(model_dir, model, dataset, layer_n, reversed=False): utils.default_model_dir = model_dir utils.c = None utils.str_w = '' # model = model lr = 0.1 start_time = time.time() if dataset == 'cifar10': train_loader, test_loader = utils.cifar10_loader() elif dataset == 'cifar100': train_loader, test_loader = utils.cifar100_loader() if torch.cuda.is_available(): # os.environ["CUDA_VISIBLE_DEVICES"] = '0' print("USE", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model).cuda() cudnn.benchmark = True else: print("NO GPU -_-;") optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4) criterion = nn.CrossEntropyLoss().cuda() start_epoch = 0 checkpoint = utils.load_checkpoint(model_dir) if not checkpoint: pass else: start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) utils.init_learning(model.module) # for epoch in range(start_epoch, 165): # change 165 # if epoch < 80: # learning_rate = lr # elif epoch < 120: # learning_rate = lr * 0.1 # else: # learning_rate = lr * 0.01 # for param_group in optimizer.param_groups: # param_group['lr'] = learning_rate # train(model, optimizer, criterion, train_loader, epoch, True) # test(model, criterion, test_loader, epoch, True) # utils.switching_learning(model.module) # print('switching_learning to Gate') # train(model, optimizer, criterion, train_loader, epoch, False) # test(model, criterion, test_loader, epoch, False) # utils.switching_learning(model.module) # print('switching_learning to Gate') # model_filename = 'checkpoint_%03d.pth.tar' % epoch # utils.save_checkpoint({ # 'epoch': epoch, # 'model': model, # 'state_dict': model.state_dict(), # 'optimizer': optimizer.state_dict(), # }, model_filename, model_dir) # now = time.gmtime(time.time() - start_time) utils.del_csv_weight_for_test() weight_extract(model, optimizer, criterion, train_loader, 160) class_counter, class_weight_sum, class_average, total_average = utils.load_gate_csv() if reversed is True: _, index = torch.sort(total_average, descending=True) else: _, index = torch.sort(total_average) layer_name = utils.make_layer_name(layer_n) for i in index: # weight delete layer_name[i] utils.weight_pruning_by_name(model.module, layer_name[i]) test(model, criterion, test_loader, 160, True)