def __init__(self, network, depth, dataset, model=None): self._network = network self._depth = depth self._dataset = dataset self.model = model self.masks = None if self.model is None: self.model = get_network(network, depth, dataset)
def init_network(config, logger, device, imagenet=True): net = get_network(network=config.network, depth=config.depth, dataset=config.dataset) if imagenet: if config.network == "resnet" or config.network == "resnet_bottle": if config.depth == 18: net = resnet18(pretrained=True) elif config.depth == 50: net = resnet50(pretrained=True) bottleneck_net = BottleneckResNetImagenet else: print('==> Loading checkpoint from %s.' % config.load_checkpoint) logger.info('==> Loading checkpoint from %s.' % config.load_checkpoint) checkpoint = torch.load(config.load_checkpoint) if checkpoint.get('args', None) is not None: args = checkpoint['args'] print('** [%s-%s%d] Acc: %.2f%%, Epoch: %d, Loss: %.4f' % (args.dataset, args.network, args.depth, checkpoint['acc'], checkpoint['epoch'], checkpoint['loss'])) logger.info( '** [%s-%s%d] Acc: %.2f%%, Epoch: %d, Loss: %.4f' % (args.dataset, args.network, args.depth, checkpoint['acc'], checkpoint['epoch'], checkpoint['loss'])) state_dict = checkpoint['net'] if checkpoint.get( 'net', None) is not None else checkpoint['state_dict'] for key in list(state_dict.keys()): if key.startswith('module'): state_dict[key[7:]] = state_dict[key] state_dict.pop(key) net.load_state_dict(state_dict) bottleneck_net = get_bottleneck_builder(config.network) if config.data_distributed: net = nn.parallel.DistributedDataParallel( net.cuda(), device_ids=[config.local_rank], output_device=config.local_rank) return net, bottleneck_net else: net = nn.DataParallel(net) return net.to(device), bottleneck_net
def init_network(config, logger, device): net = get_network(network=config.network, depth=config.depth, dataset=config.dataset) print('==> Loading checkpoint from %s.' % config.checkpoint) logger.info('==> Loading checkpoint from %s.' % config.checkpoint) checkpoint = torch.load(config.checkpoint) if checkpoint.get('args', None) is not None: args = checkpoint['args'] print('** [%s-%s%d] Acc: %.2f%%, Epoch: %d, Loss: %.4f' % (args.dataset, args.network, args.depth, checkpoint['acc'], checkpoint['epoch'], checkpoint['loss'])) logger.info('** [%s-%s%d] Acc: %.2f%%, Epoch: %d, Loss: %.4f' % (args.dataset, args.network, args.depth, checkpoint['acc'], checkpoint['epoch'], checkpoint['loss'])) state_dict = checkpoint['net'] if checkpoint.get('net', None) is not None else checkpoint['state_dict'] net.load_state_dict(state_dict) bottleneck_net = get_bottleneck_builder(config.network) return net.to(device), bottleneck_net
def main(config, args): # init logger classes = { 'cifar10': 10, 'cifar100': 100, 'mnist': 10, 'tiny_imagenet': 200 } logger, writer = init_logger(config, args) best_acc_vec = [] test_acc_vec_vec = [] for n_runs in range(1): if args.sigma_w2 != None and n_runs != 0: break # build model model = get_network(config.network, config.depth, config.dataset, use_bn=config.get('use_bn', args.bn), scaled=args.scaled_init, act=args.act) mask = None mb = ModelBase(config.network, config.depth, config.dataset, model) mb.cuda() if mask is not None: mb.register_mask(mask) ratio_vec_ = print_mask_information(mb, logger) # preprocessing # ====================================== get dataloader ====================================== trainloader, testloader = get_dataloader(config.dataset, config.batch_size, 256, 4) # ====================================== fetch configs ====================================== ckpt_path = config.checkpoint_dir num_iterations = config.iterations if args.target_ratio == None: target_ratio = config.target_ratio else: target_ratio = args.target_ratio normalize = config.normalize # ====================================== fetch exception ====================================== exception = get_exception_layers( mb.model, str_to_list(config.exception, ',', int)) logger.info('Exception: ') for idx, m in enumerate(exception): logger.info(' (%d) %s' % (idx, m)) # ====================================== fetch training schemes ====================================== ratio = 1 - (1 - target_ratio)**(1.0 / num_iterations) learning_rates = str_to_list(config.learning_rate, ',', float) weight_decays = str_to_list(config.weight_decay, ',', float) training_epochs = str_to_list(config.epoch, ',', int) logger.info( 'Normalize: %s, Total iteration: %d, Target ratio: %.2f, Iter ratio %.4f.' % (normalize, num_iterations, target_ratio, ratio)) logger.info('Basic Settings: ') for idx in range(len(learning_rates)): logger.info(' %d: LR: %.5f, WD: %.5f, Epochs: %d' % (idx, learning_rates[idx], weight_decays[idx], training_epochs[idx])) # ====================================== start pruning ====================================== iteration = 0 for _ in range(1): logger.info( '** Target ratio: %.4f, iter ratio: %.4f, iteration: %d/%d.' % (target_ratio, ratio, 1, num_iterations)) # mb.model.apply(weights_init) print('#' * 40) print('USING {} INIT SCHEME'.format(args.init)) print('#' * 40) if args.init == 'kaiming_xavier': mb.model.apply(weights_init_kaiming_xavier) elif args.init == 'kaiming': if args.act == 'relu' or args.act == 'elu': mb.model.apply(weights_init_kaiming_relu) elif args.act == 'tanh': mb.model.apply(weights_init_kaiming_tanh) elif args.init == 'xavier': mb.model.apply(weights_init_xavier) elif args.init == 'EOC': mb.model.apply(weights_init_EOC) elif args.init == 'ordered': def weights_init_ord(m): if isinstance(m, nn.Conv2d): ord_weights(m.weight, sigma_w2=args.sigma_w2) if m.bias is not None: ord_bias(m.bias) elif isinstance(m, nn.Linear): ord_weights(m.weight, sigma_w2=args.sigma_w2) if m.bias is not None: ord_bias(m.bias) elif isinstance(m, nn.BatchNorm2d): # Note that BN's running_var/mean are # already initialized to 1 and 0 respectively. if m.weight is not None: m.weight.data.fill_(1.0) if m.bias is not None: m.bias.data.zero_() mb.model.apply(weights_init_ord) else: raise NotImplementedError print("=> Applying weight initialization(%s)." % config.get('init_method', 'kaiming')) print("Iteration of: %d/%d" % (iteration, num_iterations)) if config.pruner == 'SNIP': print('=> Using SNIP') masks, scaled_masks = SNIP( mb.model, ratio, trainloader, 'cuda', num_classes=classes[config.dataset], samples_per_class=config.samples_per_class, num_iters=config.get('num_iters', 1), scaled_init=args.scaled_init) elif config.pruner == 'GraSP': print('=> Using GraSP') masks, scaled_masks = GraSP( mb.model, ratio, trainloader, 'cuda', num_classes=classes[config.dataset], samples_per_class=config.samples_per_class, num_iters=config.get('num_iters', 1), scaled_init=args.scaled_init) iteration = 0 ################################################################################ _masks = None _masks_scaled = None if not args.bn: # build model that has the same weights as the pruned network but with BN now ! model2 = get_network(config.network, config.depth, config.dataset, use_bn=config.get('use_bn', True), scaled=args.scaled_init, act=args.act) weights_temp = [] for layer_old in mb.model.modules(): if isinstance(layer_old, nn.Conv2d) or isinstance( layer_old, nn.Linear): weights_temp.append(layer_old.weight) idx = 0 for layer_new in model2.modules(): if isinstance(layer_new, nn.Conv2d) or isinstance( layer_new, nn.Linear): layer_new.weight.data = weights_temp[idx] idx += 1 # Creating a base model with BN included now mb = ModelBase(config.network, config.depth, config.dataset, model2) mb.cuda() _masks = dict() _masks_scaled = dict() layer_keys_new = [] for layer in (mb.model.modules()): if isinstance(layer, nn.Conv2d) or isinstance( layer, nn.Linear): layer_keys_new.append(layer) for new_keys, old_keys in zip(layer_keys_new, masks.keys()): _masks[new_keys] = masks[old_keys] if args.scaled_init: _masks_scaled[new_keys] = scaled_masks[old_keys] ################################################################################ if _masks == None: _masks = masks _masks_scaled = scaled_masks # ========== register mask ================== mb.register_mask(_masks) ## ========== debugging ================== if args.scaled_init: if config.network == 'vgg': print('scaling VGG') mb.scaling_weights(_masks_scaled) # ========== save pruned network ============ logger.info('Saving..') state = { 'net': mb.model, 'acc': -1, 'epoch': -1, 'args': config, 'mask': mb.masks, 'ratio': mb.get_ratio_at_each_layer() } path = os.path.join( ckpt_path, 'prune_%s_%s%s_r%s_it%d.pth.tar' % (config.dataset, config.network, config.depth, target_ratio, iteration)) torch.save(state, path) # ========== print pruning details ============ logger.info('**[%d] Mask and training setting: ' % iteration) ratio_vec_ = print_mask_information(mb, logger) logger.info(' LR: %.5f, WD: %.5f, Epochs: %d' % (learning_rates[iteration], weight_decays[iteration], training_epochs[iteration])) results_path = config.summary_dir + args.init + '_sp' + str( args.target_ratio).replace('.', '_') if args.scaled_init: results_path += '_scaled' if args.bn: results_path += '_bn' if args.sigma_w2 != None and args.init == 'ordered': results_path += '_sgw2{}'.format(args.sigma_w2).replace( '.', '_') results_path += '_' + args.act + '_' + str(config.depth) print('saving the ratios') print(results_path) if not os.path.isdir(results_path): os.mkdir(results_path) np.save(results_path + '/ratios_pruned{}'.format(args.seed_tiny), np.array(ratio_vec_)) # if args.sigma_w2 != None: # break # ========== finetuning ======================= best_acc, test_acc_vec = train_once( mb=mb, net=mb.model, trainloader=trainloader, testloader=testloader, writer=writer, config=config, ckpt_path=ckpt_path, learning_rate=learning_rates[iteration], weight_decay=weight_decays[iteration], num_epochs=training_epochs[iteration], iteration=iteration, logger=logger, args=args) best_acc_vec.append(best_acc) test_acc_vec_vec.append(test_acc_vec) np.save(results_path + '/best_acc{}'.format(args.seed_tiny), np.array(best_acc_vec)) np.save(results_path + '/test_acc{}'.format(args.seed_tiny), np.array(test_acc_vec_vec))
def main(config, args): # init logger classes = { 'cifar10': 10, 'cifar100': 100, 'mnist': 10, 'tiny_imagenet': 200, 'imagenet': 1000 } logger, writer = init_logger(config) # build model # model = models.__dict__[config.network]() model = get_network(config.network, config.depth, config.dataset, use_bn=config.get('use_bn', args.bn), scaled=args.scaled_init, act=args.act) mb = ModelBase(config.network, config.depth, config.dataset, model) mb.cuda() # preprocessing # ====================================== fetch configs ====================================== ckpt_path = config.checkpoint_dir num_iterations = config.iterations if args.target_ratio == None: target_ratio = config.target_ratio else: target_ratio = args.target_ratio normalize = config.normalize # ====================================== fetch exception ====================================== exception = get_exception_layers(mb.model, str_to_list(config.exception, ',', int)) logger.info('Exception: ') for idx, m in enumerate(exception): logger.info(' (%d) %s' % (idx, m)) # ====================================== fetch training schemes ====================================== ratio = 1-(1-target_ratio) ** (1.0 / num_iterations) learning_rates = str_to_list(config.learning_rate, ',', float) weight_decays = str_to_list(config.weight_decay, ',', float) training_epochs = str_to_list(config.epoch, ',', int) logger.info('Normalize: %s, Total iteration: %d, Target ratio: %.2f, Iter ratio %.4f.' % (normalize, num_iterations, target_ratio, ratio)) logger.info('Basic Settings: ') for idx in range(len(learning_rates)): logger.info(' %d: LR: %.5f, WD: %.5f, Epochs: %d' % (idx, learning_rates[idx], weight_decays[idx], training_epochs[idx])) # ====================================== get dataloader ====================================== normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( config.traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) trainloader = torch.utils.data.DataLoader( train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=16, pin_memory=False, sampler=None) # ====================================== start pruning ====================================== for iteration in range(num_iterations): logger.info('** Target ratio: %.4f, iter ratio: %.4f, iteration: %d/%d.' % (target_ratio, ratio, iteration, num_iterations)) assert num_iterations == 1 print("=> Applying weight initialization.") mb.model.apply(weights_init_kaiming_xavier) print("=> Applying weight initialization(%s)." % config.get('init_method', 'kaiming')) print("Iteration of: %d/%d" % (iteration, num_iterations)) if config.pruner == 'SNIP': print('=> Using SNIP') masks, scaled_masks = SNIP(mb.model, ratio, trainloader, 'cuda', num_classes=classes[config.dataset], samples_per_class=config.samples_per_class, num_iters=config.get('num_iters', 1), scaled_init=False) elif config.pruner == 'GraSP': print('=> Using GraSP') masks = GraSP(mb.model, ratio, trainloader, 'cuda', num_classes=classes[config.dataset], samples_per_class=config.samples_per_class, num_iters=config.get('num_iters', 1)) # ========== register mask ================== mb.masks = masks # ========== save pruned network ============ logger.info('Saving..') state = { 'net': mb.model, 'acc': -1, 'epoch': -1, 'args': config, 'mask': mb.masks, 'ratio': mb.get_ratio_at_each_layer() } path = os.path.join(ckpt_path, 'prune_%s_%s%s_r%s_it%d.pth.tar' % (config.dataset, config.network, config.depth, target_ratio, iteration)) torch.save(state, path) # ========== print pruning details ============ logger.info('**[%d] Mask and training setting: ' % iteration) print_mask_information(mb, logger)
args = parser.parse_args() # init model nc = { 'cifar10': 10, 'cifar100': 100, 'mnist':10, 'fashion-mnist': 10 } num_classes = nc[args.dataset] net = get_network(args.network, #depth=args.depth, num_classes=num_classes, growthRate=args.growthRate, compressionRate=args.compressionRate, widen_factor=args.widen_factor, dropRate=args.dropRate, base_width=args.base_width, cardinality=args.cardinality) print(net) optim_name = args.optimizer.lower() net = net.to(args.device) net = extend(net) module_names = '' if hasattr(net, 'features'): module_names = 'features' elif hasattr(net, 'children'): module_names = 'children'
parser.add_argument('--weight_decay', default=3e-3, type=float) parser.add_argument('--batch_size', default=128, type=float) parser.add_argument('--network', default='vgg', type=str) parser.add_argument('--depth', default=19, type=int) parser.add_argument('--dataset', default='cifar10', type=str) parser.add_argument('--epoch', default=150, type=int) parser.add_argument('--decay_every', default=60, type=int) parser.add_argument('--decay_ratio', default=0.1, type=float) parser.add_argument('--device', default='cuda', type=str) parser.add_argument('--resume', '-r', action='store_true') parser.add_argument('--load_path', default='', type=str) parser.add_argument('--log_dir', default='runs/pretrain', type=str) args = parser.parse_args() # init model net = get_network(network=args.network, depth=args.depth, dataset=args.dataset) net = net.to(args.device) # init dataloader trainloader, testloader = get_dataloader(dataset=args.dataset, train_batch_size=args.batch_size, test_batch_size=256) # init optimizer and lr scheduler optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=args.weight_decay) lr_schedule = { 0: args.learning_rate, int(args.epoch * 0.5): args.learning_rate * 0.1,
def main(config): # init logger classes = { 'cifar10': 10, 'cifar100': 100, 'mnist': 10, 'tiny_imagenet': 200 } logger, writer = init_logger(config) # build model model = get_network(config.network, config.depth, config.dataset, use_bn=config.get('use_bn', True)) mask = None mb = ModelBase(config.network, config.depth, config.dataset, model) mb.cuda() if mask is not None: mb.register_mask(mask) print_mask_information(mb, logger) # preprocessing # ====================================== get dataloader ====================================== trainloader, testloader = get_dataloader(config.dataset, config.batch_size, 256, 4, root='/home/wzn/PycharmProjects/GraSP/data') # ====================================== fetch configs ====================================== ckpt_path = config.checkpoint_dir num_iterations = config.iterations target_ratio = config.target_ratio normalize = config.normalize # ====================================== fetch exception ====================================== # exception = get_exception_layers(mb.model, str_to_list(config.exception, ',', int)) # logger.info('Exception: ') # # for idx, m in enumerate(exception): # logger.info(' (%d) %s' % (idx, m)) # ====================================== fetch training schemes ====================================== ratio = 1 - (1 - target_ratio) ** (1.0 / num_iterations) learning_rates = str_to_list(config.learning_rate, ',', float) weight_decays = str_to_list(config.weight_decay, ',', float) training_epochs = str_to_list(config.epoch, ',', int) logger.info('Normalize: %s, Total iteration: %d, Target ratio: %.2f, Iter ratio %.4f.' % (normalize, num_iterations, target_ratio, ratio)) logger.info('Basic Settings: ') for idx in range(len(learning_rates)): logger.info(' %d: LR: %.5f, WD: %.5f, Epochs: %d' % (idx, learning_rates[idx], weight_decays[idx], training_epochs[idx])) # ====================================== start pruning ====================================== iteration = 0 for _ in range(1): # logger.info('** Target ratio: %.4f, iter ratio: %.4f, iteration: %d/%d.' % (target_ratio, # ratio, # 1, # num_iterations)) mb.model.apply(weights_init) print("=> Applying weight initialization(%s)." % config.get('init_method', 'kaiming')) # print("Iteration of: %d/%d" % (iteration, num_iterations)) # masks = GraSP(mb.model, ratio, trainloader, 'cuda', # num_classes=classes[config.dataset], # samples_per_class=config.samples_per_class, # num_iters=config.get('num_iters', 1)) # iteration = 0 # print('=> Using GraSP') # # ========== register mask ================== # mb.register_mask(masks) # # ========== save pruned network ============ # logger.info('Saving..') # state = { # 'net': mb.model, # 'acc': -1, # 'epoch': -1, # 'args': config, # 'mask': mb.masks, # 'ratio': mb.get_ratio_at_each_layer() # } # path = os.path.join(ckpt_path, 'prune_%s_%s%s_r%s_it%d.pth.tar' % (config.dataset, # config.network, # config.depth, # config.target_ratio, # iteration)) # torch.save(state, path) # # ========== print pruning details ============ # logger.info('**[%d] Mask and training setting: ' % iteration) # print_mask_information(mb, logger) # logger.info(' LR: %.5f, WD: %.5f, Epochs: %d' % # (learning_rates[iteration], weight_decays[iteration], training_epochs[iteration])) # ========== finetuning ======================= train_once(mb=mb, net=mb.model, trainloader=trainloader, testloader=testloader, writer=writer, config=config, ckpt_path=ckpt_path, learning_rate=learning_rates[iteration], weight_decay=weight_decays[iteration], num_epochs=training_epochs[iteration], iteration=iteration, logger=logger)
parser.add_argument('--TInv', default=100, type=int) parser.add_argument('--prefix', default=None, type=str) args = parser.parse_args() # init model nc = { 'cifar10': 10, 'cifar100': 100 } num_classes = nc[args.dataset] net = get_network(args.network, depth=args.depth, num_classes=num_classes, growthRate=args.growthRate, compressionRate=args.compressionRate, widen_factor=args.widen_factor, dropRate=args.dropRate) net = net.to(args.device) # init dataloader trainloader, testloader = get_dataloader(dataset=args.dataset, train_batch_size=args.batch_size, test_batch_size=256) # init optimizer and lr scheduler optim_name = args.optimizer.lower() tag = optim_name if optim_name == 'sgd': optimizer = optim.SGD(net.parameters(),
# main script # # get command-line arguments args = get_args() # set random seed for reproducibility torch.manual_seed(args.seed) # init model num_classes = { 'cifar10': 10, 'cifar100': 100 } net = get_network( args.network, depth=args.depth, num_classes=num_classes[args.dataset], growthRate=args.growthRate, compressionRate=args.compressionRate, widen_factor=args.widen_factor, dropRate=args.dropRate, hidden_dim=args.hidden_dim ).to(args.device) # init dataloader trainloader, testloader = get_dataloader(dataset=args.dataset, train_batch_size=args.batch_size, test_batch_size=256) # init optimizer optim_name = args.optimizer.lower() tag = optim_name optimizer = get_optimizer(optim_name, net, args) # init lr scheduler
parser.add_argument('--decay_ratio', default=0.1, type=float) parser.add_argument('--device', default=0, type=int) parser.add_argument('--resume', '-r', action='store_true') parser.add_argument('--load_path', default='', type=str) parser.add_argument('--log_dir', default='runs/pretrain', type=str) parser.add_argument('--rank-scale', default=0.0, type=float) parser.add_argument('--wd2fd', action='store_true') parser.add_argument('--spectral', action='store_true') parser.add_argument('--kaiming', action='store_true') parser.add_argument('--target-ratio', default=0.0, type=float) parser.add_argument('--auto-resume', action='store_true') args = parser.parse_args() # init model net = get_network(network=args.network, depth=args.depth, dataset=args.dataset, kaiming=args.kaiming) origpar = parameter_count(net) print('Original weight count:', origpar) if args.rank_scale or args.target_ratio: if args.network == 'vgg': names = [ str(i) for i, child in enumerate(net.feature) if i and type(child) == nn.Conv2d ] denoms = [ child.out_channels * child.kernel_size[0] * child.kernel_size[1] for child in net.feature if type(child) == nn.Conv2d ] def compress(model, rank_scale, spectral=False, kaiming=False):