def test_prune(self):
        ori_sparsity = UnstructuredPruner.total_sparse(self.net)
        ori_threshold = self.pruner.threshold
        self.pruner.step()
        self.net(
            paddle.to_tensor(np.random.uniform(0, 1, [16, 3, 32, 32]),
                             dtype='float32'))
        cur_sparsity = UnstructuredPruner.total_sparse(self.net)
        cur_threshold = self.pruner.threshold
        print("Original threshold: {}".format(ori_threshold))
        print("Current threshold: {}".format(cur_threshold))
        print("Original sparsity: {}".format(ori_sparsity))
        print("Current sparsity: {}".format(cur_sparsity))
        self.assertLessEqual(ori_threshold, cur_threshold)
        self.assertGreaterEqual(cur_sparsity, ori_sparsity)

        self.pruner.update_params()
        self.assertEqual(cur_sparsity,
                         UnstructuredPruner.total_sparse(self.net))
 def test_unstructured_prune_conv1x1(self):
     print(self.pruner.skip_params)
     print(self.pruner_conv1x1.skip_params)
     self.assertTrue(
         len(self.pruner.skip_params) < len(self.pruner_conv1x1.skip_params)
     )
     self.pruner_conv1x1.step()
     self.pruner_conv1x1.update_params()
     cur_sparsity = UnstructuredPruner.total_sparse_conv1x1(
         self.net_conv1x1)
     self.assertTrue(abs(cur_sparsity - 0.55) < 0.01)
Пример #3
0
def create_unstructured_pruner(model, args, configs=None):
    if configs is None:
        return UnstructuredPruner(model,
                                  mode=args.pruning_mode,
                                  ratio=args.ratio,
                                  threshold=args.threshold,
                                  prune_params_type=args.prune_params_type,
                                  local_sparsity=args.local_sparsity)
    else:
        return GMPUnstructuredPruner(model,
                                     ratio=args.ratio,
                                     prune_params_type=args.prune_params_type,
                                     local_sparsity=args.local_sparsity,
                                     configs=configs)
Пример #4
0
class TestUnstructuredPruner(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(TestUnstructuredPruner, self).__init__(*args, **kwargs)
        paddle.disable_static()
        self._gen_model()

    def _gen_model(self):
        self.net = mobilenet_v1(num_classes=10, pretrained=False)
        self.pruner = UnstructuredPruner(self.net,
                                         mode='ratio',
                                         ratio=0.98,
                                         threshold=0.0)

    def test_prune(self):
        ori_density = UnstructuredPruner.total_sparse(self.net)
        ori_threshold = self.pruner.threshold
        self.pruner.step()
        self.net(
            paddle.to_tensor(np.random.uniform(0, 1, [16, 3, 32, 32]),
                             dtype='float32'))
        cur_density = UnstructuredPruner.total_sparse(self.net)
        cur_threshold = self.pruner.threshold
        print("Original threshold: {}".format(ori_threshold))
        print("Current threshold: {}".format(cur_threshold))
        print("Original density: {}".format(ori_density))
        print("Current density: {}".format(cur_density))
        self.assertLessEqual(ori_threshold, cur_threshold)
        self.assertLessEqual(cur_density, ori_density)

        self.pruner.update_params()
        self.assertEqual(cur_density,
                         UnstructuredPruner.total_sparse(self.net))

    def test_summarize_weights(self):
        max_value = -float("inf")
        threshold = self.pruner.summarize_weights(self.net, 1.0)
        for name, sub_layer in self.net.named_sublayers():
            if not self.pruner._should_prune_layer(sub_layer):
                continue
            for param in sub_layer.parameters(include_sublayers=False):
                max_value = max(
                    max_value,
                    np.max(np.abs(np.array(param.value().get_tensor()))))
        print("The returned threshold is {}.".format(threshold))
        print("The max_value is {}.".format(max_value))
        self.assertEqual(max_value, threshold)
 def _gen_model(self):
     self.net = mobilenet_v1(num_classes=10, pretrained=False)
     self.net_conv1x1 = mobilenet_v1(num_classes=10, pretrained=False)
     self.net_mxn = mobilenet_v1(num_classes=10, pretrained=False)
     self.pruner = UnstructuredPruner(self.net,
                                      mode='ratio',
                                      ratio=0.55,
                                      local_sparsity=True)
     self.pruner_conv1x1 = UnstructuredPruner(
         self.net_conv1x1,
         mode='ratio',
         ratio=0.55,
         prune_params_type='conv1x1_only',
         local_sparsity=False)
     self.pruner_mxn = UnstructuredPruner(self.net_mxn,
                                          mode='ratio',
                                          ratio=0.55,
                                          local_sparsity=True,
                                          sparse_block=[2, 1])
Пример #6
0
def compress(args):
    shuffle = True
    if args.ce_test:
        # set seed
        seed = 111
        paddle.seed(seed)
        np.random.seed(seed)
        random.seed(seed)
        args.num_workers = 0
        shuffle = False

    if args.use_gpu:
        place = paddle.set_device('gpu')
    else:
        place = paddle.set_device('cpu')

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1
    if use_data_parallel:
        dist.init_parallel_env()

    train_reader = None
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       backend='cv2',
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=shuffle,
        drop_last=True)

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=place,
                                        batch_sampler=batch_sampler,
                                        return_list=True,
                                        num_workers=args.num_workers,
                                        use_shared_memory=True)

    valid_loader = paddle.io.DataLoader(
        val_dataset,
        places=place,
        drop_last=False,
        return_list=True,
        batch_size=args.batch_size_for_validation,
        shuffle=False,
        use_shared_memory=True)
    step_per_epoch = int(
        np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks))
    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)
    if ParallelEnv().nranks > 1:
        model = paddle.DataParallel(model)

    opt, learning_rate = create_optimizer(args, step_per_epoch, model)

    if args.checkpoint is not None and args.last_epoch > -1:
        if args.checkpoint.endswith('pdparams'):
            args.checkpoint = args.checkpoint[:-9]
        if args.checkpoint.endswith('pdopt'):
            args.checkpoint = args.checkpoint[:-6]
        model.set_state_dict(paddle.load(args.checkpoint + ".pdparams"))
        opt.set_state_dict(paddle.load(args.checkpoint + ".pdopt"))
    elif args.pretrained_model is not None:
        if args.pretrained_model.endswith('pdparams'):
            args.pretrained_model = args.pretrained_model[:-9]
        if args.pretrained_model.endswith('pdopt'):
            args.pretrained_model = args.pretrained_model[:-6]
        model.set_state_dict(paddle.load(args.pretrained_model + ".pdparams"))

    if args.pruning_strategy == 'gmp':
        # GMP pruner step 0: define configs. No need to do this if you are not using 'gmp'
        configs = {
            'stable_iterations': args.stable_epochs * step_per_epoch,
            'pruning_iterations': args.pruning_epochs * step_per_epoch,
            'tunning_iterations': args.tunning_epochs * step_per_epoch,
            'resume_iteration': (args.last_epoch + 1) * step_per_epoch,
            'pruning_steps': args.pruning_steps,
            'initial_ratio': args.initial_ratio,
        }
    else:
        configs = None

    # GMP pruner step 1: initialize a pruner object
    pruner = create_unstructured_pruner(model, args, configs=configs)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    def train(epoch):
        model.train()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()

        for batch_id, data in enumerate(train_loader):
            train_reader_cost += time.time() - reader_start
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            train_start = time.time()
            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)

            loss.backward()
            opt.step()
            learning_rate.step()
            opt.clear_grad()
            # GMP pruner step 2: step() to update ratios and other internal states of the pruner.
            pruner.step()

            train_run_cost += time.time() - train_start
            total_samples += args.batch_size

            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
                    .format(
                        epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()),
                        np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()),
                        train_reader_cost / args.log_period,
                        (train_reader_cost + train_run_cost) / args.log_period,
                        total_samples / args.log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0
            reader_start = time.time()

    for i in range(args.last_epoch + 1, args.num_epochs):
        train(i)
        # GMP pruner step 3: update params before summrizing sparsity, saving model or evaluation.
        pruner.update_params()

        if (i + 1) % args.test_period == 0:
            _logger.info(
                "The current sparsity of the pruned model is: {}%".format(
                    round(100 * UnstructuredPruner.total_sparse(model), 2)))
            test(i)

        if (i + 1) % args.model_period == 0:
            pruner.update_params()
            paddle.save(model.state_dict(),
                        os.path.join(args.model_path, "model.pdparams"))
            paddle.save(opt.state_dict(),
                        os.path.join(args.model_path, "model.pdopt"))
Пример #7
0
def compress(args):
    if args.use_gpu:
        place = paddle.set_device('gpu')
    else:
        place = paddle.set_device('cpu')

    trainer_num = paddle.distributed.get_world_size()
    use_data_parallel = trainer_num != 1
    if use_data_parallel:
        dist.init_parallel_env()

    train_reader = None
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        train_dataset = reader.ImageNetDataset(mode='train')
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        train_dataset = paddle.vision.datasets.Cifar10(mode='train',
                                                       backend='cv2',
                                                       transform=transform)
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    batch_sampler = paddle.io.DistributedBatchSampler(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True)

    train_loader = paddle.io.DataLoader(train_dataset,
                                        places=place,
                                        batch_sampler=batch_sampler,
                                        return_list=True,
                                        num_workers=args.num_workers,
                                        use_shared_memory=True)

    valid_loader = paddle.io.DataLoader(
        val_dataset,
        places=place,
        drop_last=False,
        return_list=True,
        batch_size=args.batch_size_for_validation,
        shuffle=False,
        use_shared_memory=True)
    step_per_epoch = int(
        np.ceil(len(train_dataset) / args.batch_size / ParallelEnv().nranks))
    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)
    if ParallelEnv().nranks > 1:
        model = paddle.DataParallel(model)

    if args.pretrained_model is not None:
        model.set_state_dict(paddle.load(args.pretrained_model))

    opt, learning_rate = create_optimizer(args, step_per_epoch, model)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    def train(epoch):
        model.train()
        train_reader_cost = 0.0
        train_run_cost = 0.0
        total_samples = 0
        reader_start = time.time()

        for batch_id, data in enumerate(train_loader):
            train_reader_cost += time.time() - reader_start
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            train_start = time.time()
            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)

            loss.backward()
            opt.step()
            learning_rate.step()
            opt.clear_grad()
            pruner.step()
            train_run_cost += time.time() - train_start
            total_samples += args.batch_size * ParallelEnv().nranks

            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {}; acc_top1: {}; acc_top5: {}; avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, avg_samples: {:.5f}, ips: {:.5f} images/sec"
                    .format(
                        epoch, batch_id, opt.get_lr(), np.mean(loss.numpy()),
                        np.mean(acc_top1.numpy()), np.mean(acc_top5.numpy()),
                        train_reader_cost / args.log_period,
                        (train_reader_cost + train_run_cost) / args.log_period,
                        total_samples / args.log_period,
                        total_samples / (train_reader_cost + train_run_cost)))
                train_reader_cost = 0.0
                train_run_cost = 0.0
                total_samples = 0

            reader_start = time.time()

    pruner = UnstructuredPruner(model,
                                mode=args.pruning_mode,
                                ratio=args.ratio,
                                threshold=args.threshold)

    for i in range(args.resume_epoch + 1, args.num_epochs):
        train(i)
        if (i + 1) % args.test_period == 0:
            pruner.update_params()
            _logger.info(
                "The current density of the pruned model is: {}%".format(
                    round(100 * UnstructuredPruner.total_sparse(model), 2)))
            test(i)
        if (i + 1) % args.model_period == 0:
            pruner.update_params()
            paddle.save(model.state_dict(),
                        os.path.join(args.model_path, "model-pruned.pdparams"))
            paddle.save(opt.state_dict(),
                        os.path.join(args.model_path, "opt-pruned.pdopt"))
Пример #8
0
def compress(args):
    test_reader = None
    if args.data == "imagenet":
        import imagenet_reader as reader
        val_dataset = reader.ImageNetDataset(mode='val')
        class_dim = 1000
    elif args.data == "cifar10":
        normalize = T.Normalize(mean=[0.5, 0.5, 0.5],
                                std=[0.5, 0.5, 0.5],
                                data_format='CHW')
        transform = T.Compose([T.Transpose(), normalize])
        val_dataset = paddle.vision.datasets.Cifar10(mode='test',
                                                     backend='cv2',
                                                     transform=transform)
        class_dim = 10
    else:
        raise ValueError("{} is not supported.".format(args.data))

    places = paddle.static.cuda_places(
    ) if args.use_gpu else paddle.static.cpu_places()
    valid_loader = paddle.io.DataLoader(val_dataset,
                                        places=places,
                                        drop_last=False,
                                        return_list=True,
                                        batch_size=args.batch_size,
                                        shuffle=False,
                                        use_shared_memory=True)

    # model definition
    model = mobilenet_v1(num_classes=class_dim, pretrained=True)

    def test(epoch):
        model.eval()
        acc_top1_ns = []
        acc_top5_ns = []
        for batch_id, data in enumerate(valid_loader):
            start_time = time.time()
            x_data = data[0]
            y_data = paddle.to_tensor(data[1])
            if args.data == 'cifar10':
                y_data = paddle.unsqueeze(y_data, 1)

            logits = model(x_data)
            loss = F.cross_entropy(logits, y_data)
            acc_top1 = paddle.metric.accuracy(logits, y_data, k=1)
            acc_top5 = paddle.metric.accuracy(logits, y_data, k=5)
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1.numpy()),
                            np.mean(acc_top5.numpy()), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1.numpy()))
            acc_top5_ns.append(np.mean(acc_top5.numpy()))

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns, dtype="object")),
                np.mean(np.array(acc_top5_ns, dtype="object"))))

    model.set_state_dict(paddle.load(args.pruned_model))
    _logger.info("The current sparsity of the pruned model is: {}%".format(
        round(100 * UnstructuredPruner.total_sparse(model), 2)))
    test(0)
 def runTest(self):
     with fluid.unique_name.guard():
         net = paddle.vision.models.LeNet()
         optimizer = paddle.optimizer.Adam(learning_rate=0.001,
                                           parameters=net.parameters())
         inputs = [Input([None, 1, 28, 28], 'float32', name='image')]
         labels = [Input([None, 1], 'int64', name='label')]
     pruner = UnstructuredPruner(net, mode='ratio', ratio=0.55)
     net.train()
     self._update_masks(pruner, 0.0)
     pruner.update_params()
     self._update_masks(pruner, 1.0)
     pruner.set_static_masks()
     sparsity_0 = UnstructuredPruner.total_sparse(net)
     for i, data in enumerate(self.train_loader):
         x_data = data[0]
         y_data = paddle.to_tensor(data[1])
         logits = net(x_data)
         loss = F.cross_entropy(logits, y_data)
         loss.backward()
         optimizer.step()
         optimizer.clear_grad()
         if i == 10: break
     sparsity_1 = UnstructuredPruner.total_sparse(net)
     pruner.update_params()
     sparsity_2 = UnstructuredPruner.total_sparse(net)
     print(sparsity_0, sparsity_1, sparsity_2)
     self.assertEqual(sparsity_0, 1.0)
     self.assertEqual(sparsity_2, 1.0)
     self.assertLess(sparsity_1, 1.0)
Пример #10
0
 def test_block_prune_mxn(self):
     self.pruner_mxn.step()
     self.pruner_mxn.update_params()
     cur_sparsity = UnstructuredPruner.total_sparse(self.net_mxn)
     self.assertTrue(abs(cur_sparsity - 0.55) < 0.01)
Пример #11
0
 def _gen_model(self):
     self.net = mobilenet_v1(num_classes=10, pretrained=False)
     self.pruner = UnstructuredPruner(self.net,
                                      mode='ratio',
                                      ratio=0.98,
                                      threshold=0.0)