def model_construct(dataset_name): if dataset_name == 'cifar10': return resnet_cifar.resnet20_cifar(), "resnet20" elif dataset_name == 'cifar100': return resnet_cifar.resnet20_cifar(), "resnet20" elif dataset_name == 'imagenet': return resnet.resnet18(), "resnet18" elif dataset_name == 'mnist': return MNISTNet(), "MNISTNet"
def model_construct(dataset_name): if dataset_name == 'cifar10': return resnet_cifar.resnet20_cifar(), 'resnet20' elif dataset_name == 'imagenet': return resnet.resnet50(), 'resnet50' elif dataset_name == 'imagenette': return resnet.resnet18(), 'resnet18' elif dataset_name == 'imagewoof': return SmallCifar10Net(), 'SmallCifar10Net' elif dataset_name == 'cifar100': return SmallCifar100Net(), 'SmallCifar100Net' elif dataset_name == 'mnist': return MNISTNet(), 'MNISTNet'
def main(): cuda = torch.device("cuda") lr = 0.1 momentum = 0.9 batch_size = 128 test_batch_size = 128 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262]), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262]), ]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=test_batch_size, shuffle=True, num_workers=0) #model = alexnet(True).to(cuda) #model = models.resnet18().to(cuda) model = resnet_cifar.resnet20_cifar().to(cuda) epochs = 10 t_prune_rate = 0.5 remove_ratio = 0.5 optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[160, 240], gamma=0.1) zero_initializer = functools.partial(torch.nn.init.constant_, val=0) logger = None rpgp_target(epochs, t_prune_rate, remove_ratio, testloader, gngtrain, cuda=cuda, model=model, train_loader=trainloader, train_ratio=1, initializer_fn=zero_initializer, optimizer=optimizer, logger=None, model_adapter=ResNetAdapter(), is_expo=True, is_break=False, scheduler=scheduler, final_fn=[20, 0.01]) #final_fn = [epochs, finetune learning rate]
def main(dataset=args.dataset, opt='sgd', lr=0.01, r=1, momentum=0.9, augment=True, beta2=args.beta2, epsilon=args.epsilon, partial=args.partial, weight_decay=args.weight_decay, amsgrad=args.amsgrad, transformer=args.transformer, grad_transf='square', smooth=5, hist=False): global args global best_acc # Training settings log_interval = args.log_interval folder = str(dataset) + '_' + str(args.NNtype) + '_batch_size_' + str( args.b) device = 'cuda' if torch.cuda.is_available() else 'cpu' # Data print('==> Preparing data..') # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) kwargs = {'num_workers': 1, 'pin_memory': True} if dataset == 'CIFAR10': trainset = torchvision.datasets.CIFAR10(root='../data', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.b, shuffle=True) testset = torchvision.datasets.CIFAR10(root='../data', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=200, shuffle=False) elif dataset == 'CIFAR100': #not work yet trainset = torchvision.datasets.CIFAR100(root='../data/CIFAR100', train=True, download=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.b, shuffle=True) testset = torchvision.datasets.CIFAR100(root='../data/CIFAR100', train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=200, shuffle=False) # ================================================================== # # Model # # ================================================================== # print('==> Building model..' + args.NNtype) if args.NNtype == 'DenseNet_BC_100_12': net = dn.DenseNet3(100, 10, 12, reduction=0.5, bottleneck=True, dropRate=0) elif args.NNtype == 'ResNet20': net = resnet_cifar.resnet20_cifar() elif args.NNtype == 'ResNet56': net = resnet_cifar.resnet56_cifar() elif args.NNtype == 'ResNet18': #for imageNet in original paper net = ResNet18() elif args.NNtype == 'DenseNet4': #for imageNet in original paper net = densenet_cifar() if device == 'cuda': net = net.cuda() net = torch.nn.DataParallel(net) cudnn.benchmark = True # ================================================================== # # Loss and optimizer # # ================================================================== # criterion = nn.CrossEntropyLoss() eps = epsilon if opt == "Sadam": if transformer == 'softplus': optimizer = Sadam.Sadam(net.parameters(), lr=lr, eps=eps, betas=(0.9, beta2), partial=partial, weight_decay=weight_decay, amsgrad=amsgrad, transformer=transformer, grad_transf=grad_transf, smooth=smooth, hist=hist) folder_name = '../logs/' + folder + "/Sadam_lr" + str( lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str( args.reduceLRtype) + '_wd_' + str( weight_decay) + '_amsgrad_' + str(amsgrad) + str( transformer) + str(grad_transf) + '_smth_' + str( int(smooth)) + '_' + str(r) file_name = '../logs/' + folder + "/Sadam_lr" + str( lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str( args.reduceLRtype) + '_wd_' + str( weight_decay) + '_amsgrad_' + str(amsgrad) + str( transformer) + str(grad_transf) + '_smth_' + str( int(smooth)) + '_' + str(r) else: optimizer = Sadam.Sadam(net.parameters(), lr=lr, eps=eps, betas=(0.9, beta2), partial=partial, weight_decay=weight_decay, amsgrad=amsgrad, transformer=transformer, grad_transf=grad_transf, hist=hist) folder_name = '../logs/' + folder + "/Padam_lr" + str( lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str( args.reduceLRtype) + '_wd_' + str( weight_decay) + '_partial_' + str( partial) + '_amsgrad_' + str(amsgrad) + str( transformer) + str(grad_transf) + '_' + str(r) file_name = '../logs/' + folder + "/Padam_lr_lr" + str( lr) + '_beta2_' + str(beta2) + '_eps_' + str(eps) + '_' + str( args.reduceLRtype) + '_wd_' + str( weight_decay) + '_partial_' + str( partial) + '_amsgrad_' + str(amsgrad) + str( transformer) + str(grad_transf) + '_' + str(r) elif opt == "sgd": optimizer = SGD_modified.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay, hist=hist) folder_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str( lr) + "_mom" + str(momentum) + '_' + str( args.reduceLRtype) + '_wd_' + str(weight_decay) + '_' + str(r) file_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str( lr) + "_mom" + str(momentum) + '_' + str( args.reduceLRtype) + '_wd_' + str(weight_decay) + '_' + str(r) elif opt == "adabound": optimizer = adabound.AdaBound(net.parameters(), lr=lr, weight_decay=weight_decay, amsbound=amsgrad) folder_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str( lr) + '_' + str(args.reduceLRtype) + '_wd_' + str( weight_decay) + '_amsgrad_' + str(amsgrad) + '_' + str(r) file_name = '../logs/' + folder + "/" + str(opt) + "_lr" + str( lr) + '_' + str(args.reduceLRtype) + '_wd_' + str( weight_decay) + '_amsgrad_' + str(amsgrad) + '_' + str(r) exists = os.path.isfile(file_name + str("_MaxAccuracy")) if exists: print(file_name + str("_MaxAccuracy has finished")) return scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max') logger = SummaryWriter(folder_name) file_1 = open(file_name + str("_percentile.header"), "w") file_1.write( 'epoch,iteration,1%,2.5%,5%,10%,25%,50%,75%,90%,95%,97.5%,99%,min,max,mean,sigma\n' ) file_1.close() file_2 = open(file_name + str("_loss.header"), "w") file_2.write( 'epoch,iteration,training_loss,training_accuracy,testing_loss,testing_accuracy\n' ) file_2.close() maxAccuracy = 0 if args.resume: if os.path.isfile(folder_name + '/ckpt.t7'): print('=> loading checkpoint "{}"'.format(folder_name + '/ckpt.t7')) checkpoint = torch.load(folder_name + '/ckpt.t7') args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] net.load_state_dict(checkpoint['net']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( folder_name + '/ckpt.t7', checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(folder_name + '/ckpt.t7')) # ================================================================== # # train model and testing error # # ================================================================== # for epoch in range(args.start_epoch, args.epochs): print('\nEpoch: %d' % epoch) net.train() for batch_idx, (inputs, targets) in enumerate(trainloader): step = epoch * len(trainloader) + batch_idx if device == 'cuda': inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() denom_info = optimizer.step() if (step + 1) % log_interval == 0: _, predicted = outputs.max(1) accuracy = predicted.eq(targets).sum().item() / targets.size(0) # ================================================================== # # Tensorboard Logging # # ================================================================== # if denom_info and denom_info['m_v_eta']: # 2. Log values and gradients of the parameters (histogram summary) for tag, value in denom_info.items(): info_array = torch.cat(value, dim=0).cpu().numpy() logger.add_histogram(tag, info_array, step + 1) #pdb.set_trace() temp = np.append( np.array([epoch + 1, step + 1]), np.percentile( info_array, [1, 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5, 99])) temp = np.append( temp, np.array([np.min(info_array), np.max(info_array)])) mean = np.mean(info_array) sigma = np.std(info_array) temp = np.append(temp, np.array([mean, sigma])) #logger.add_histogram(tag+'/grad', value.grad.data.cpu().numpy(), step+1) if tag == "denom": if 'denom_array' in locals(): denom_array = np.vstack((denom_array, temp)) else: denom_array = temp elif tag == "denom_inv": if 'denom_inv_array' in locals(): denom_inv_array = np.vstack( (denom_inv_array, temp)) else: denom_inv_array = temp elif tag == "m_v_eta": if 'm_v_eta_array' in locals(): m_v_eta_array = np.vstack( (m_v_eta_array, temp)) else: m_v_eta_array = temp training_loss, training_accuracy = train_(trainloader, net, device, criterion) testing_loss, testing_accuracy = test(testloader, net, device, criterion, epoch, folder_name, optimizer) if testing_accuracy > maxAccuracy: maxAccuracy = testing_accuracy info = { "training_loss": training_loss, 'training_accuracy': training_accuracy, 'testing_loss': testing_loss, 'testing_accuracy': testing_accuracy } if 'loss_info_array' in locals(): loss_info_array = np.vstack( (loss_info_array, np.array([ epoch + 1, (epoch + 1) * len(trainloader), training_loss, training_accuracy, testing_loss, testing_accuracy ]))) else: loss_info_array = np.array([ epoch + 1, (epoch + 1) * len(trainloader), training_loss, training_accuracy, testing_loss, testing_accuracy ]) for tag, value in info.items(): logger.add_scalar(tag, value, (epoch + 1)) logger.add_scalar('lr', optimizer.param_groups[0]['lr'], epoch + 1) if args.reduceLRtype == 'ReduceLROnPlateauMax': scheduler.step(testing_accuracy) elif args.reduceLRtype == 'manual0': if epoch < 150: optimizer.param_groups[0]['lr'] = lr elif epoch < 225: optimizer.param_groups[0]['lr'] = lr * 0.1 else: optimizer.param_groups[0]['lr'] = lr * 0.01 #pdb.set_trace() if 'denom_array' in locals(): np.savetxt(file_name + str("_denom_percentile.info"), denom_array, delimiter=",") np.savetxt(file_name + str("_denom_inv_percentile.info"), denom_inv_array, delimiter=",") if 'm_v_eta_array' in locals(): np.savetxt(file_name + str("_m_v_eta_percentile.info"), m_v_eta_array, delimiter=",") np.savetxt(file_name + str("_loss.info"), loss_info_array, delimiter=",") file = open(file_name + str("_MaxAccuracy"), "w") file.write(str(maxAccuracy) + '\n') file.close()