def test_prune(self): ori_sparsity = UnstructuredPruner.total_sparse(self.net) ori_threshold = self.pruner.threshold self.pruner.step() self.net( paddle.to_tensor(np.random.uniform(0, 1, [16, 3, 32, 32]), dtype='float32')) cur_sparsity = UnstructuredPruner.total_sparse(self.net) cur_threshold = self.pruner.threshold print("Original threshold: {}".format(ori_threshold)) print("Current threshold: {}".format(cur_threshold)) print("Original sparsity: {}".format(ori_sparsity)) print("Current sparsity: {}".format(cur_sparsity)) self.assertLessEqual(ori_threshold, cur_threshold) self.assertGreaterEqual(cur_sparsity, ori_sparsity) self.pruner.update_params() self.assertEqual(cur_sparsity, UnstructuredPruner.total_sparse(self.net))
def test_unstructured_prune_conv1x1(self): print(self.pruner.skip_params) print(self.pruner_conv1x1.skip_params) self.assertTrue( len(self.pruner.skip_params) < len(self.pruner_conv1x1.skip_params) ) self.pruner_conv1x1.step() self.pruner_conv1x1.update_params() cur_sparsity = UnstructuredPruner.total_sparse_conv1x1( self.net_conv1x1) self.assertTrue(abs(cur_sparsity - 0.55) < 0.01)
def create_unstructured_pruner(model, args, configs=None): if configs is None: return UnstructuredPruner(model, mode=args.pruning_mode, ratio=args.ratio, threshold=args.threshold, prune_params_type=args.prune_params_type, local_sparsity=args.local_sparsity) else: return GMPUnstructuredPruner(model, ratio=args.ratio, prune_params_type=args.prune_params_type, local_sparsity=args.local_sparsity, configs=configs)
class TestUnstructuredPruner(unittest.TestCase): def __init__(self, *args, **kwargs): super(TestUnstructuredPruner, self).__init__(*args, **kwargs) paddle.disable_static() self._gen_model() def _gen_model(self): self.net = mobilenet_v1(num_classes=10, pretrained=False) self.pruner = UnstructuredPruner(self.net, mode='ratio', ratio=0.98, threshold=0.0) def test_prune(self): ori_density = UnstructuredPruner.total_sparse(self.net) ori_threshold = self.pruner.threshold self.pruner.step() self.net( paddle.to_tensor(np.random.uniform(0, 1, [16, 3, 32, 32]), dtype='float32')) cur_density = UnstructuredPruner.total_sparse(self.net) cur_threshold = self.pruner.threshold print("Original threshold: {}".format(ori_threshold)) print("Current threshold: {}".format(cur_threshold)) print("Original density: {}".format(ori_density)) print("Current density: {}".format(cur_density)) self.assertLessEqual(ori_threshold, cur_threshold) self.assertLessEqual(cur_density, ori_density) self.pruner.update_params() self.assertEqual(cur_density, UnstructuredPruner.total_sparse(self.net)) def test_summarize_weights(self): max_value = -float("inf") threshold = self.pruner.summarize_weights(self.net, 1.0) for name, sub_layer in self.net.named_sublayers(): if not self.pruner._should_prune_layer(sub_layer): continue for param in sub_layer.parameters(include_sublayers=False): max_value = max( max_value, np.max(np.abs(np.array(param.value().get_tensor())))) print("The returned threshold is {}.".format(threshold)) print("The max_value is {}.".format(max_value)) self.assertEqual(max_value, threshold)
def _gen_model(self): self.net = mobilenet_v1(num_classes=10, pretrained=False) self.net_conv1x1 = mobilenet_v1(num_classes=10, pretrained=False) self.net_mxn = mobilenet_v1(num_classes=10, pretrained=False) self.pruner = UnstructuredPruner(self.net, mode='ratio', ratio=0.55, local_sparsity=True) self.pruner_conv1x1 = UnstructuredPruner( self.net_conv1x1, mode='ratio', ratio=0.55, prune_params_type='conv1x1_only', local_sparsity=False) self.pruner_mxn = UnstructuredPruner(self.net_mxn, mode='ratio', ratio=0.55, local_sparsity=True, sparse_block=[2, 1])
def compress(args): shuffle = True if args.ce_test: # set seed seed = 111 paddle.seed(seed) np.random.seed(seed) random.seed(seed) args.num_workers = 0 shuffle = False if args.use_gpu: place = paddle.set_device('gpu') else: place = paddle.set_device('cpu') trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 if use_data_parallel: dist.init_parallel_env() train_reader = None test_reader = None if args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=shuffle, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, places=place, batch_sampler=batch_sampler, return_list=True, num_workers=args.num_workers, use_shared_memory=True) valid_loader = paddle.io.DataLoader( val_dataset, places=place, drop_last=False, return_list=True, batch_size=args.batch_size_for_validation, shuffle=False, use_shared_memory=True) step_per_epoch = int( np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks)) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) if ParallelEnv().nranks > 1: model = paddle.DataParallel(model) opt, learning_rate = create_optimizer(args, step_per_epoch, model) if args.checkpoint is not None and args.last_epoch > -1: if args.checkpoint.endswith('pdparams'): args.checkpoint = args.checkpoint[:-9] if args.checkpoint.endswith('pdopt'): args.checkpoint = args.checkpoint[:-6] model.set_state_dict(paddle.load(args.checkpoint + ".pdparams")) opt.set_state_dict(paddle.load(args.checkpoint + ".pdopt")) elif args.pretrained_model is not None: if args.pretrained_model.endswith('pdparams'): args.pretrained_model = args.pretrained_model[:-9] if args.pretrained_model.endswith('pdopt'): args.pretrained_model = args.pretrained_model[:-6] model.set_state_dict(paddle.load(args.pretrained_model + ".pdparams")) if args.pruning_strategy == 'gmp': # GMP pruner step 0: define configs. No need to do this if you are not using 'gmp' configs = { 'stable_iterations': args.stable_epochs * step_per_epoch, 'pruning_iterations': args.pruning_epochs * step_per_epoch, 'tunning_iterations': args.tunning_epochs * step_per_epoch, 'resume_iteration': (args.last_epoch + 1) * step_per_epoch, 'pruning_steps': args.pruning_steps, 'initial_ratio': args.initial_ratio, } else: configs = None # GMP pruner step 1: initialize a pruner object pruner = create_unstructured_pruner(model, args, configs=configs) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) def train(epoch): model.train() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, data in enumerate(train_loader): train_reader_cost += time.time() - reader_start x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) train_start = time.time() logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) loss.backward() opt.step() learning_rate.step() opt.clear_grad() # GMP pruner step 2: step() to update ratios and other internal states of the pruner. pruner.step() train_run_cost += time.time() - train_start total_samples += args.batch_size if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec" .format( epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()), np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), train_reader_cost / args.log_period, (train_reader_cost + train_run_cost) / args.log_period, total_samples / args.log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for i in range(args.last_epoch + 1, args.num_epochs): train(i) # GMP pruner step 3: update params before summrizing sparsity, saving model or evaluation. pruner.update_params() if (i + 1) % args.test_period == 0: _logger.info( "The current sparsity of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(i) if (i + 1) % args.model_period == 0: pruner.update_params() paddle.save(model.state_dict(), os.path.join(args.model_path, "model.pdparams")) paddle.save(opt.state_dict(), os.path.join(args.model_path, "model.pdopt"))
def compress(args): if args.use_gpu: place = paddle.set_device('gpu') else: place = paddle.set_device('cpu') trainer_num = paddle.distributed.get_world_size() use_data_parallel = trainer_num != 1 if use_data_parallel: dist.init_parallel_env() train_reader = None test_reader = None if args.data == "imagenet": import imagenet_reader as reader train_dataset = reader.ImageNetDataset(mode='train') val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) train_dataset = paddle.vision.datasets.Cifar10(mode='train', backend='cv2', transform=transform) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader = paddle.io.DataLoader(train_dataset, places=place, batch_sampler=batch_sampler, return_list=True, num_workers=args.num_workers, use_shared_memory=True) valid_loader = paddle.io.DataLoader( val_dataset, places=place, drop_last=False, return_list=True, batch_size=args.batch_size_for_validation, shuffle=False, use_shared_memory=True) step_per_epoch = int( np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks)) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) if ParallelEnv().nranks > 1: model = paddle.DataParallel(model) if args.pretrained_model is not None: model.set_state_dict(paddle.load(args.pretrained_model)) opt, learning_rate = create_optimizer(args, step_per_epoch, model) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) def train(epoch): model.train() train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() for batch_id, data in enumerate(train_loader): train_reader_cost += time.time() - reader_start x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) train_start = time.time() logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) loss.backward() opt.step() learning_rate.step() opt.clear_grad() pruner.step() train_run_cost += time.time() - train_start total_samples += args.batch_size * ParallelEnv().nranks if batch_id % args.log_period == 0: _logger.info( "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec" .format( epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()), np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), train_reader_cost / args.log_period, (train_reader_cost + train_run_cost) / args.log_period, total_samples / args.log_period, total_samples / (train_reader_cost + train_run_cost))) train_reader_cost = 0.0 train_run_cost = 0.0 total_samples = 0 reader_start = time.time() pruner = UnstructuredPruner(model, mode=args.pruning_mode, ratio=args.ratio, threshold=args.threshold) for i in range(args.resume_epoch + 1, args.num_epochs): train(i) if (i + 1) % args.test_period == 0: pruner.update_params() _logger.info( "The current density of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(i) if (i + 1) % args.model_period == 0: pruner.update_params() paddle.save(model.state_dict(), os.path.join(args.model_path, "model-pruned.pdparams")) paddle.save(opt.state_dict(), os.path.join(args.model_path, "opt-pruned.pdopt"))
def compress(args): test_reader = None if args.data == "imagenet": import imagenet_reader as reader val_dataset = reader.ImageNetDataset(mode='val') class_dim = 1000 elif args.data == "cifar10": normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], data_format='CHW') transform = T.Compose([T.Transpose(), normalize]) val_dataset = paddle.vision.datasets.Cifar10(mode='test', backend='cv2', transform=transform) class_dim = 10 else: raise ValueError("{} is not supported.".format(args.data)) places = paddle.static.cuda_places( ) if args.use_gpu else paddle.static.cpu_places() valid_loader = paddle.io.DataLoader(val_dataset, places=places, drop_last=False, return_list=True, batch_size=args.batch_size, shuffle=False, use_shared_memory=True) # model definition model = mobilenet_v1(num_classes=class_dim, pretrained=True) def test(epoch): model.eval() acc_top1_ns = [] acc_top5_ns = [] for batch_id, data in enumerate(valid_loader): start_time = time.time() x_data = data[0] y_data = paddle.to_tensor(data[1]) if args.data == 'cifar10': y_data = paddle.unsqueeze(y_data, 1) logits = model(x_data) loss = F.cross_entropy(logits, y_data) acc_top1 = paddle.metric.accuracy(logits, y_data, k=1) acc_top5 = paddle.metric.accuracy(logits, y_data, k=5) end_time = time.time() if batch_id % args.log_period == 0: _logger.info( "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}" .format(epoch, batch_id, np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()), end_time - start_time)) acc_top1_ns.append(np.mean(acc_top1.numpy())) acc_top5_ns.append(np.mean(acc_top5.numpy())) _logger.info( "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format( epoch, np.mean(np.array(acc_top1_ns, dtype="object")), np.mean(np.array(acc_top5_ns, dtype="object")))) model.set_state_dict(paddle.load(args.pruned_model)) _logger.info("The current sparsity of the pruned model is: {}%".format( round(100 * UnstructuredPruner.total_sparse(model), 2))) test(0)
def runTest(self): with fluid.unique_name.guard(): net = paddle.vision.models.LeNet() optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=net.parameters()) inputs = [Input([None, 1, 28, 28], 'float32', name='image')] labels = [Input([None, 1], 'int64', name='label')] pruner = UnstructuredPruner(net, mode='ratio', ratio=0.55) net.train() self._update_masks(pruner, 0.0) pruner.update_params() self._update_masks(pruner, 1.0) pruner.set_static_masks() sparsity_0 = UnstructuredPruner.total_sparse(net) for i, data in enumerate(self.train_loader): x_data = data[0] y_data = paddle.to_tensor(data[1]) logits = net(x_data) loss = F.cross_entropy(logits, y_data) loss.backward() optimizer.step() optimizer.clear_grad() if i == 10: break sparsity_1 = UnstructuredPruner.total_sparse(net) pruner.update_params() sparsity_2 = UnstructuredPruner.total_sparse(net) print(sparsity_0, sparsity_1, sparsity_2) self.assertEqual(sparsity_0, 1.0) self.assertEqual(sparsity_2, 1.0) self.assertLess(sparsity_1, 1.0)
def test_block_prune_mxn(self): self.pruner_mxn.step() self.pruner_mxn.update_params() cur_sparsity = UnstructuredPruner.total_sparse(self.net_mxn) self.assertTrue(abs(cur_sparsity - 0.55) < 0.01)
def _gen_model(self): self.net = mobilenet_v1(num_classes=10, pretrained=False) self.pruner = UnstructuredPruner(self.net, mode='ratio', ratio=0.98, threshold=0.0)