Пример #1
0
    def set_vars(self):
        self.qat = ImperativeQuantAware()

        self.train_batch_num = 30
        self.train_batch_size = 32
        self.test_batch_num = 100
        self.test_batch_size = 32
        self.eval_acc_top1 = 0.99
Пример #2
0
    def _remove_preprocess(self, model):
        state_dict = model.state_dict()
        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'])

        with paddle.utils.unique_name.guard():
            model.__init__()
            self.imperative_qat.quantize(model)
            state_dict = model.state_dict()
            model.set_state_dict(state_dict)
        return model
Пример #3
0
    def test_warning(self):
        path = "./dynamic_outscale_infer_model_with_warnings/lenet"
        imperative_out_scale = ImperativeQuantAware()
        with fluid.dygraph.guard():
            lenet = ImperativeLenet()

        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            imperative_out_scale.save_quantized_model(
                layer=lenet,
                path=path,
                input_spec=[
                    paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                            dtype='float32')
                ])

        warning_message = "Warning: No Layer of the model while to be " \
                          "saved contains the out_threshold attribute, so the " \
                          "generated inference model would not contain the " \
                          "out_threshold."
        num = get_vaild_warning_num(warning_message, w)
        assert num == 1
Пример #4
0
    def func_out_scale_acc(self):
        seed = 1000
        lr = 0.001

        weight_quantize_type = 'abs_max'
        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed

            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_out_scale.quantize(lenet)

            reader = paddle.batch(paddle.dataset.mnist.test(),
                                  batch_size=32,
                                  drop_last=True)
            adam = AdamOptimizer(learning_rate=lr,
                                 parameter_list=lenet.parameters())
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

        param_save_path = "test_save_quantized_model/lenet.pdparams"
        save_dict = lenet.state_dict()
        paddle.save(save_dict, param_save_path)

        save_path = "./dynamic_outscale_infer_model/lenet"
        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=save_path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        for i in range(len(loss_list) - 1):
            self.assertTrue(loss_list[i] > loss_list[i + 1],
                            msg='Failed to do the imperative qat.')
    def test_save_quantized_model(self):
        lr = 0.001

        load_param_path = "test_save_quantized_model/lenet.pdparams"
        save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"

        weight_quantize_type = 'abs_max'
        activation_quantize_type = 'moving_average_abs_max'
        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            load_dict = paddle.load(load_param_path)
            imperative_out_scale.quantize(lenet)
            lenet.set_dict(load_dict)

            reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
            loss_list = train_lenet(lenet, reader, adam)
            lenet.eval()

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=save_path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        for i in range(len(loss_list) - 1):
            self.assertTrue(
                loss_list[i] > loss_list[i + 1],
                msg='Failed to do the imperative qat.')
Пример #6
0
class TestImperativeQatAmp(unittest.TestCase):
    """
    Test the combination of qat and amp.
    """
    @classmethod
    def setUpClass(cls):
        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
        cls.root_path = os.path.join(os.getcwd(),
                                     "imperative_qat_amp_" + timestamp)
        cls.save_path = os.path.join(cls.root_path, "model")

        cls.download_path = 'dygraph_int8/download'
        cls.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
                                              cls.download_path)

        cls.lenet_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/lenet_pretrained.tar.gz"
        cls.lenet_md5 = "953b802fb73b52fae42896e3c24f0afb"

        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed

    @classmethod
    def tearDownClass(cls):
        try:
            shutil.rmtree(cls.root_path)
        except Exception as e:
            print("Failed to delete {} due to {}".format(
                cls.root_path, str(e)))

    def cache_unzipping(self, target_folder, zip_path):
        if not os.path.exists(target_folder):
            cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(
                target_folder, zip_path)
            os.system(cmd)

    def download_model(self, data_url, data_md5, folder_name):
        download(data_url, self.download_path, data_md5)
        file_name = data_url.split('/')[-1]
        zip_path = os.path.join(self.cache_folder, file_name)
        print('Data is downloaded at {0}'.format(zip_path))

        data_cache_folder = os.path.join(self.cache_folder, folder_name)
        self.cache_unzipping(data_cache_folder, zip_path)
        return data_cache_folder

    def set_vars(self):
        self.qat = ImperativeQuantAware()

        self.train_batch_num = 30
        self.train_batch_size = 32
        self.test_batch_num = 100
        self.test_batch_size = 32
        self.eval_acc_top1 = 0.99

    def model_train(self, model, batch_num=-1, batch_size=32, use_amp=False):
        model.train()

        train_reader = paddle.batch(paddle.dataset.mnist.train(),
                                    batch_size=batch_size)
        adam = paddle.optimizer.Adam(learning_rate=0.001,
                                     parameters=model.parameters())
        scaler = paddle.amp.GradScaler(init_loss_scaling=500)

        for batch_id, data in enumerate(train_reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)

            img = paddle.to_tensor(x_data)
            label = paddle.to_tensor(y_data)

            if use_amp:
                with paddle.amp.auto_cast():
                    out = model(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                scaled_loss = scaler.scale(avg_loss)
                scaled_loss.backward()

                scaler.minimize(adam, scaled_loss)
                adam.clear_gradients()
            else:
                out = model(img)
                acc = fluid.layers.accuracy(out, label)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()

                adam.minimize(avg_loss)
                model.clear_gradients()

            if batch_id % 100 == 0:
                _logger.info("Train | step {}: loss = {:}, acc= {:}".format(
                    batch_id, avg_loss.numpy(), acc.numpy()))

            if batch_num > 0 and batch_id + 1 >= batch_num:
                break

    def model_test(self, model, batch_num=-1, batch_size=32, use_amp=False):
        model.eval()

        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=batch_size)

        acc_top1_list = []
        for batch_id, data in enumerate(test_reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)

            img = paddle.to_tensor(x_data)
            label = paddle.to_tensor(y_data)

            with paddle.amp.auto_cast(use_amp):
                out = model(img)
                acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
                acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

            acc_top1_list.append(float(acc_top1.numpy()))
            if batch_id % 100 == 0:
                _logger.info(
                    "Test | At step {}: acc1 = {:}, acc5 = {:}".format(
                        batch_id, acc_top1.numpy(), acc_top5.numpy()))

            if batch_num > 0 and batch_id + 1 >= batch_num:
                break

        acc_top1 = sum(acc_top1_list) / len(acc_top1_list)
        return acc_top1

    def ptq(self):
        start_time = time.time()

        self.set_vars()

        params_path = self.download_model(self.lenet_url, self.lenet_md5,
                                          "lenet")
        params_path += "/lenet_pretrained/lenet.pdparams"

        with fluid.dygraph.guard():
            model = ImperativeLenet()
            model_state_dict = paddle.load(params_path)
            model.set_state_dict(model_state_dict)

            _logger.info("Test fp32 model")
            fp32_acc_top1 = self.model_test(model, self.test_batch_num,
                                            self.test_batch_size)

            self.qat.quantize(model)

            use_amp = True
            self.model_train(model, self.train_batch_num,
                             self.train_batch_size, use_amp)

            _logger.info("Test int8 model")
            int8_acc_top1 = self.model_test(model, self.test_batch_num,
                                            self.test_batch_size, use_amp)

            _logger.info('fp32_acc_top1: %f, int8_acc_top1: %f' %
                         (fp32_acc_top1, int8_acc_top1))
            self.assertTrue(int8_acc_top1 > fp32_acc_top1 - 0.01,
                            msg='fp32_acc_top1: %f, int8_acc_top1: %f' %
                            (fp32_acc_top1, int8_acc_top1))

        input_spec = [
            paddle.static.InputSpec(shape=[None, 1, 28, 28], dtype='float32')
        ]
        paddle.jit.save(layer=model,
                        path=self.save_path,
                        input_spec=input_spec)
        print('Quantized model saved in {%s}' % self.save_path)

        end_time = time.time()
        print("total time: %ss" % (end_time - start_time))

    def test_ptq(self):
        self.ptq()
        with _test_eager_guard():
            self.ptq()
Пример #7
0
 def setUp(self):
     _logger.info("test weight_quantize")
     self.imperative_qat = ImperativeQuantAware(
         weight_quantize_layer=CustomQAT)
Пример #8
0
 def setUp(self):
     _logger.info("test weight_preprocess")
     self.imperative_qat = ImperativeQuantAware(
         weight_preprocess_layer=PACT)
Пример #9
0
    def func_out_scale_acc(self):
        paddle.disable_static()
        seed = 1000
        lr = 0.1

        qat = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(paddle.dataset.mnist.test(),
                              batch_size=512,
                              drop_last=True)

        lenet = ImperativeLenetWithSkipQuant()
        lenet = fix_model_dict(lenet)
        qat.quantize(lenet)

        adam = AdamOptimizer(learning_rate=lr,
                             parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
        loss_list = train_lenet(lenet, reader, adam)

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

        qat.save_quantized_model(layer=lenet,
                                 path=path,
                                 input_spec=[
                                     paddle.static.InputSpec(
                                         shape=[None, 1, 28, 28],
                                         dtype='float32')
                                 ])

        paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        [inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        model_ops = inference_program.global_block().ops

        conv2d_count, matmul_count = 0, 0
        conv2d_skip_count, matmul_skip_count = 0, 0
        find_conv2d = False
        find_matmul = False
        for i, op in enumerate(model_ops):
            if op.type == 'conv2d':
                find_conv2d = True
                if op.has_attr("skip_quant"):
                    conv2d_skip_count += 1
                if conv2d_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i -
                                                                    1].type)
                conv2d_count += 1

            if op.type == 'matmul':
                find_matmul = True
                if op.has_attr("skip_quant"):
                    matmul_skip_count += 1
                if matmul_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i -
                                                                    1].type)
                matmul_count += 1

        if find_conv2d:
            self.assertTrue(conv2d_skip_count == 1)
        if find_matmul:
            self.assertTrue(matmul_skip_count == 1)
Пример #10
0
    def test_qat_save(self):

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type='abs_max',
            activation_quantize_type='moving_average_abs_max',
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())
            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))

                lenet.eval()
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

            # save weights
            model_dict = lenet.state_dict()
            fluid.save_dygraph(model_dict, "save_temp")

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            test_img = fluid.dygraph.to_variable(test_data)
            lenet.eval()
            before_save = lenet(test_img)

        # save inference quantized model
        path = "./qat_infer_model/lenet"
        save_dir = "./qat_infer_model"
        paddle.jit.save(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)
        [inference_program, feed_target_names,
         fetch_targets] = fluid.io.load_inference_model(
             dirname=save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX)
        after_save, = exe.run(inference_program,
                              feed={feed_target_names[0]: test_data},
                              fetch_list=fetch_targets)

        self.assertTrue(
            np.allclose(after_save, before_save.numpy()),
            msg='Failed to save the inference quantized model.')
Пример #11
0
class QAT(object):
    """
    Quant Aware Training(QAT): Add the fake quant logic for given quantizable layers, namely add the quant_dequant computational logic both for activation inputs and weight inputs.
    """

    def __init__(self,
                 config=None,
                 weight_preprocess=None,
                 act_preprocess=None,
                 weight_quantize=None,
                 act_quantize=None):
        """
        Args:
            model(nn.Layer)
            config(dict, optional): configs for quantization. if None, will use default config. 
                    Default: None.
            weight_quantize(class, optional): Defines how to quantize weight. Using this
                    can quickly test if user's quantization method works or not. In this method, user should
                    both define quantization function and dequantization function, that is, the function's input
                    is non-quantized weight and function returns dequantized weight. If None, will use
                    quantization op defined by 'weight_quantize_type'.
                    Default is None.
            act_quantize(class, optional): Defines how to quantize activation. Using this
                    can quickly test if user's quantization method works or not. In this function, user should
                    both define quantization and dequantization process, that is, the function's input
                    is non-quantized activation and function returns dequantized activation. If None, will use 
                    quantization op defined by 'activation_quantize_type'.
                    Default is None.
            weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized weight and function returns processed weight to be quantized. If None, will
                    use preprocess method defined by 'weight_preprocess_type'.
                    Default is None.
            act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized activation and function returns processed activation to be quantized. If None,
                    will use preprocess method defined by 'activation_preprocess_type'.
                    Default is None.
        """
        if config is None:
            config = _quant_config_default
        else:
            assert isinstance(config, dict), "config must be dict"
            config = _parse_configs(config)
        self.config = config

        self.weight_preprocess = PACT if self.config[
            'weight_preprocess_type'] == 'PACT' else None
        self.act_preprocess = PACT if self.config[
            'activation_preprocess_type'] == 'PACT' else None

        self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess
        self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess
        self.weight_quantize = weight_quantize
        self.act_quantize = act_quantize

        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'],
            weight_preprocess_layer=self.weight_preprocess,
            act_preprocess_layer=self.act_preprocess,
            weight_quantize_layer=self.weight_quantize,
            act_quantize_layer=self.act_quantize)

    def quantize(self, model):
        self.imperative_qat.quantize(model)

    def save_quantized_model(self, model, path, input_spec=None):
        if self.weight_preprocess is not None or self.act_preprocess is not None:
            model = self._remove_preprocess(model)

        self.imperative_qat.save_quantized_model(
            layer=model, path=path, input_spec=input_spec)

    def _remove_preprocess(self, model):
        state_dict = model.state_dict()
        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'])

        with paddle.utils.unique_name.guard():
            if hasattr(model, "_layers"):
                model = model._layers
            model.__init__()
            self.imperative_qat.quantize(model)
            state_dict = model.state_dict()
            model.set_state_dict(state_dict)
        return model
Пример #12
0
def main():
    # create model
    model_list = [x for x in models.__dict__["__all__"]]
    assert FLAGS.arch in model_list, \
        "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
    model = models.__dict__[FLAGS.arch](pretrained=not FLAGS.resume)

    # quantize model
    if FLAGS.enable_quant:
        if not FLAGS.use_naive_api:
            print("use slim api")
            quant_config = {
                'weight_quantize_type': FLAGS.weight_quantize_type,
            }
            dygraph_qat = QAT(quant_config)
        else:
            print("use navie api")
            dygraph_qat = ImperativeQuantAware(
                weight_quantize_type=FLAGS.weight_quantize_type, )
        dygraph_qat.quantize(model)

    # prepare
    model = paddle.Model(model)
    if FLAGS.resume is not None:
        print("Resume from " + FLAGS.resume)
        model.load(FLAGS.resume)

    train_dataset = ImageNetDataset(FLAGS.data, mode='train')
    val_dataset = ImageNetDataset(FLAGS.data, mode='val')

    optim = make_optimizer(
        np.ceil(
            float(len(train_dataset)) / FLAGS.batch_size /
            ParallelEnv().nranks),
        parameter_list=model.parameters())

    model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5)))

    # test
    if FLAGS.eval_only:
        model.evaluate(
            val_dataset,
            batch_size=FLAGS.batch_size,
            num_workers=FLAGS.num_workers)
        return

    # train
    output_dir = os.path.join(FLAGS.output_dir, "checkpoint",
                              FLAGS.arch + "_checkpoint",
                              time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
              save_dir=output_dir,
              num_workers=FLAGS.num_workers)

    # save
    if FLAGS.enable_quant:
        quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "model")
        input_spec = paddle.static.InputSpec(
            shape=[None, 3, 224, 224], dtype='float32')
        dygraph_qat.save_quantized_model(model.network, quant_output_dir,
                                         [input_spec])
        print("save all checkpoints in " + output_dir)
        print("save quantized inference model in " + quant_output_dir)
Пример #13
0
    def test_out_scale_acc(self):
        seed = 1000
        lr = 0.1

        imperative_out_scale = ImperativeQuantAware()

        np.random.seed(seed)
        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        lenet = ImperativeLenet()
        fixed_state = {}
        for name, param in lenet.named_parameters():
            p_shape = param.numpy().shape
            p_value = param.numpy()
            if name.endswith("bias"):
                value = np.zeros_like(p_value).astype('float32')
            else:
                value = np.random.normal(
                    loc=0.0, scale=0.01,
                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
            fixed_state[name] = value
        lenet.set_dict(fixed_state)
        imperative_out_scale.quantize(lenet)
        adam = AdamOptimizer(
            learning_rate=lr, parameter_list=lenet.parameters())
        dynamic_loss_rec = []
        lenet.train()
        for batch_id, data in enumerate(reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)

            img = fluid.dygraph.to_variable(x_data)
            label = fluid.dygraph.to_variable(y_data)

            out = lenet(img)
            loss = fluid.layers.cross_entropy(out, label)
            avg_loss = fluid.layers.mean(loss)
            avg_loss.backward()
            adam.minimize(avg_loss)
            lenet.clear_gradients()
            dynamic_loss_rec.append(avg_loss.numpy()[0])
            if batch_id % 100 == 0:
                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

        lenet.eval()

        path = "./save_dynamic_quant_infer_model/lenet"
        save_dir = "./save_dynamic_quant_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        [inference_program, feed_target_names, fetch_targets] = (
            fluid.io.load_inference_model(
                dirname=save_dir,
                executor=exe,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX))
        model_ops = inference_program.global_block().ops

        conv2d_count, mul_count = 0, 0
        for i, op in enumerate(model_ops):
            if op.type == 'conv2d':
                if conv2d_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                conv2d_count += 1

            if op.type == 'mul':
                if mul_count > 0:
                    self.assertTrue(
                        'fake_quantize_dequantize' in model_ops[i - 1].type)
                else:
                    self.assertTrue(
                        'fake_quantize_dequantize' not in model_ops[i - 1].type)
                mul_count += 1
Пример #14
0
    def test_out_scale_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(name='image',
                                            shape=[1, 28, 28],
                                            dtype='float32')
                    label = fluid.layers.data(name='label',
                                              shape=[1],
                                              dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(input=prediction,
                                                          label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(paddle.dataset.mnist.test(),
                              batch_size=32,
                              drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.1
        dynamic_out_scale_list = []
        static_out_scale_list = []

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_out_scale = ImperativeQuantAware()

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(loc=0.0,
                                             scale=0.01,
                                             size=np.product(p_shape)).reshape(
                                                 p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)
            imperative_out_scale.quantize(lenet)
            adam = AdamOptimizer(learning_rate=lr,
                                 parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array([x[1] for x in data
                                   ]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))

            lenet.eval()

        path = "./dynamic_outscale_infer_model/lenet"
        dynamic_save_dir = "./dynamic_outscale_infer_model"

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
        outscale_pass.apply(main_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(feed_list=[static_img, static_label],
                                  place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))
        scale_inference_pass = OutScaleForInferencePass(scope=scope)
        scale_inference_pass.apply(infer_graph)

        save_program = infer_graph.to_program()
        static_save_dir = "./static_outscale_infer_model"
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model(
                dirname=static_save_dir,
                feeded_var_names=[infer_img.name],
                target_vars=[infer_pre],
                executor=exe,
                main_program=save_program,
                model_filename="lenet" + INFER_MODEL_SUFFIX,
                params_filename="lenet" + INFER_PARAMS_SUFFIX)

        rtol = 1e-05
        atol = 1e-08
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(np.allclose(np.array(dynamic_loss_rec),
                                    np.array(static_loss_rec),
                                    rtol=rtol,
                                    atol=atol,
                                    equal_nan=True),
                        msg='Failed to do the imperative qat.')

        # load dynamic model
        [dynamic_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=dynamic_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        # load static model
        [static_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=static_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))

        dynamic_ops = dynamic_inference_program.global_block().ops
        static_ops = static_inference_program.global_block().ops

        for op in dynamic_ops[:]:
            if op.type == "flatten2" or 'fake' in op.type:
                dynamic_ops.remove(op)

        for op in static_ops[:]:
            if 'fake' in op.type:
                static_ops.remove(op)

        for i in range(len(dynamic_ops)):
            if dynamic_ops[i].has_attr("out_threshold"):
                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
                                static_ops[i].attr("out_threshold"))
Пример #15
0
class QAT(object):
    """
    Quant Aware Training(QAT): Add the fake quant logic for given quantizable layers, namely add the quant_dequant computational logic both for activation inputs and weight inputs.
    """
    def __init__(self,
                 config=None,
                 weight_preprocess=None,
                 act_preprocess=None,
                 weight_quantize=None,
                 act_quantize=None):
        """
        Args:
            model(nn.Layer)
            config(dict, optional): configs for quantization. if None, will use default config. 
                    Default: None.
            weight_quantize(class, optional): Defines how to quantize weight. Using this
                    can quickly test if user's quantization method works or not. In this method, user should
                    both define quantization function and dequantization function, that is, the function's input
                    is non-quantized weight and function returns dequantized weight. If None, will use
                    quantization op defined by 'weight_quantize_type'.
                    Default is None.
            act_quantize(class, optional): Defines how to quantize activation. Using this
                    can quickly test if user's quantization method works or not. In this function, user should
                    both define quantization and dequantization process, that is, the function's input
                    is non-quantized activation and function returns dequantized activation. If None, will use 
                    quantization op defined by 'activation_quantize_type'.
                    Default is None.
            weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized weight and function returns processed weight to be quantized. If None, will
                    use preprocess method defined by 'weight_preprocess_type'.
                    Default is None.
            act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized activation and function returns processed activation to be quantized. If None,
                    will use preprocess method defined by 'activation_preprocess_type'.
                    Default is None.
        """
        if config is None:
            config = _quant_config_default
        else:
            assert isinstance(config, dict), "config must be dict"
            config = _parse_configs(config)
        self.config = config

        self.weight_preprocess = PACT if self.config[
            'weight_preprocess_type'] == 'PACT' else None
        self.act_preprocess = PACT if self.config[
            'activation_preprocess_type'] == 'PACT' else None

        self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess
        self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess
        self.weight_quantize = weight_quantize
        self.act_quantize = act_quantize

        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'],
            weight_preprocess_layer=self.weight_preprocess,
            act_preprocess_layer=self.act_preprocess,
            weight_quantize_layer=self.weight_quantize,
            act_quantize_layer=self.act_quantize)

    def quantize(self, model, inplace=True):
        """
        Quantize the input model.

        Args:
            model(paddle.nn.Layer): The model to be quantized.
            inplace(bool): Whether apply quantization to the input model.
                           Default: False.
        Returns:
            quantized_model(paddle.nn.Layer): The quantized model.
        """
        assert isinstance(model, paddle.nn.Layer), \
            "The model must be the instance of paddle.nn.Layer."

        self._model = copy.deepcopy(model)

        if inplace:
            self.imperative_qat.quantize(model)
            quant_model = model
        else:
            quant_model = copy.deepcopy(model)
            self.imperative_qat.quantize(quant_model)

        return quant_model

    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False):
        """
        Save the quantized inference model.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default: None.

        Returns:
            None
        """
        if self.weight_preprocess is not None or self.act_preprocess is not None:
            training = model.training
            model = self._remove_preprocess(model)
            if training:
                model.train()
            else:
                model.eval()

        self.imperative_qat.save_quantized_model(layer=model,
                                                 path=path,
                                                 input_spec=input_spec,
                                                 onnx_format=onnx_format)

    def _remove_preprocess(self, model):
        state_dict = model.state_dict()
        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'])

        with paddle.utils.unique_name.guard():
            if hasattr(model, "_layers"):
                model = model._layers
            model = self._model
            self.imperative_qat.quantize(model)
            model.set_state_dict(state_dict)

        return model
Пример #16
0
    def __init__(self,
                 config=None,
                 weight_preprocess=None,
                 act_preprocess=None,
                 weight_quantize=None,
                 act_quantize=None):
        """
        Args:
            model(nn.Layer)
            config(dict, optional): configs for quantization. if None, will use default config. 
                    Default: None.
            weight_quantize(class, optional): Defines how to quantize weight. Using this
                    can quickly test if user's quantization method works or not. In this method, user should
                    both define quantization function and dequantization function, that is, the function's input
                    is non-quantized weight and function returns dequantized weight. If None, will use
                    quantization op defined by 'weight_quantize_type'.
                    Default is None.
            act_quantize(class, optional): Defines how to quantize activation. Using this
                    can quickly test if user's quantization method works or not. In this function, user should
                    both define quantization and dequantization process, that is, the function's input
                    is non-quantized activation and function returns dequantized activation. If None, will use 
                    quantization op defined by 'activation_quantize_type'.
                    Default is None.
            weight_preprocess(class, optional): Defines how to preprocess weight before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized weight and function returns processed weight to be quantized. If None, will
                    use preprocess method defined by 'weight_preprocess_type'.
                    Default is None.
            act_preprocess(class, optional): Defines how to preprocess activation before quantization. Using this
                    can quickly test if user's preprocess method works or not. The function's input
                    is non-quantized activation and function returns processed activation to be quantized. If None,
                    will use preprocess method defined by 'activation_preprocess_type'.
                    Default is None.
        """
        if config is None:
            config = _quant_config_default
        else:
            assert isinstance(config, dict), "config must be dict"
            config = _parse_configs(config)
        self.config = config

        self.weight_preprocess = PACT if self.config[
            'weight_preprocess_type'] == 'PACT' else None
        self.act_preprocess = PACT if self.config[
            'activation_preprocess_type'] == 'PACT' else None

        self.weight_preprocess = weight_preprocess if weight_preprocess is not None else self.weight_preprocess
        self.act_preprocess = act_preprocess if act_preprocess is not None else self.act_preprocess
        self.weight_quantize = weight_quantize
        self.act_quantize = act_quantize

        self.imperative_qat = ImperativeQuantAware(
            weight_bits=self.config['weight_bits'],
            activation_bits=self.config['activation_bits'],
            weight_quantize_type=self.config['weight_quantize_type'],
            activation_quantize_type=self.config['activation_quantize_type'],
            moving_rate=self.config['moving_rate'],
            quantizable_layer_type=self.config['quantizable_layer_type'],
            weight_preprocess_layer=self.weight_preprocess,
            act_preprocess_layer=self.act_preprocess,
            weight_quantize_layer=self.weight_quantize,
            act_quantize_layer=self.act_quantize)
Пример #17
0
    def func_qat(self):
        self.set_vars()

        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=self.weight_quantize_type,
            activation_quantize_type=self.activation_quantize_type,
            fuse_conv_bn=self.fuse_conv_bn)

        with fluid.dygraph.guard():
            # For CI coverage
            conv1 = Conv2D(
                in_channels=3,
                out_channels=2,
                kernel_size=3,
                stride=1,
                padding=1,
                padding_mode='replicate')
            quant_conv1 = QuantizedConv2D(conv1)
            data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
            quant_conv1(fluid.dygraph.to_variable(data))

            conv_transpose = Conv2DTranspose(4, 6, (3, 3))
            quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
            x_var = paddle.uniform(
                (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0)
            quant_conv_transpose(x_var)

            seed = 1
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed

            lenet = ImperativeLenet()
            lenet = fix_model_dict(lenet)
            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=0.001, parameter_list=lenet.parameters())

            train_reader = paddle.batch(
                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
            test_reader = paddle.batch(
                paddle.dataset.mnist.test(), batch_size=32)

            epoch_num = 1
            for epoch in range(epoch_num):
                lenet.train()
                for batch_id, data in enumerate(train_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)
                    out = lenet(img)
                    acc = fluid.layers.accuracy(out, label)
                    loss = fluid.layers.cross_entropy(out, label)
                    avg_loss = fluid.layers.mean(loss)
                    avg_loss.backward()
                    adam.minimize(avg_loss)
                    lenet.clear_gradients()
                    if batch_id % 100 == 0:
                        _logger.info(
                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
                            format(epoch, batch_id,
                                   avg_loss.numpy(), acc.numpy()))
                    if batch_id == 500:  # For shortening CI time
                        break

                lenet.eval()
                eval_acc_top1_list = []
                for batch_id, data in enumerate(test_reader()):
                    x_data = np.array([x[0].reshape(1, 28, 28)
                                       for x in data]).astype('float32')
                    y_data = np.array(
                        [x[1] for x in data]).astype('int64').reshape(-1, 1)

                    img = fluid.dygraph.to_variable(x_data)
                    label = fluid.dygraph.to_variable(y_data)

                    out = lenet(img)
                    acc_top1 = fluid.layers.accuracy(
                        input=out, label=label, k=1)
                    acc_top5 = fluid.layers.accuracy(
                        input=out, label=label, k=5)

                    if batch_id % 100 == 0:
                        eval_acc_top1_list.append(float(acc_top1.numpy()))
                        _logger.info(
                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                            format(epoch, batch_id,
                                   acc_top1.numpy(), acc_top5.numpy()))

                # check eval acc
                eval_acc_top1 = sum(eval_acc_top1_list) / len(
                    eval_acc_top1_list)
                print('eval_acc_top1', eval_acc_top1)
                self.assertTrue(
                    eval_acc_top1 > 0.9,
                    msg="The test acc {%f} is less than 0.9." % eval_acc_top1)

            # test the correctness of `paddle.jit.save`
            data = next(test_reader())
            test_data = np.array([x[0].reshape(1, 28, 28)
                                  for x in data]).astype('float32')
            y_data = np.array(
                [x[1] for x in data]).astype('int64').reshape(-1, 1)
            test_img = fluid.dygraph.to_variable(test_data)
            label = fluid.dygraph.to_variable(y_data)
            lenet.eval()
            fp32_out = lenet(test_img)
            fp32_acc = fluid.layers.accuracy(fp32_out, label).numpy()

        with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
            # save inference quantized model
            imperative_qat.save_quantized_model(
                layer=lenet,
                path=os.path.join(tmpdir, "lenet"),
                input_spec=[
                    paddle.static.InputSpec(
                        shape=[None, 1, 28, 28], dtype='float32')
                ],
                onnx_format=self.onnx_format)
            print('Quantized model saved in %s' % tmpdir)

            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
            exe = fluid.Executor(place)
            [inference_program, feed_target_names,
             fetch_targets] = fluid.io.load_inference_model(
                 dirname=tmpdir,
                 executor=exe,
                 model_filename="lenet" + INFER_MODEL_SUFFIX,
                 params_filename="lenet" + INFER_PARAMS_SUFFIX)
            quant_out, = exe.run(inference_program,
                                 feed={feed_target_names[0]: test_data},
                                 fetch_list=fetch_targets)
            paddle.disable_static()
            quant_out = fluid.dygraph.to_variable(quant_out)
            quant_acc = fluid.layers.accuracy(quant_out, label).numpy()
            paddle.enable_static()
            delta_value = fp32_acc - quant_acc
            self.assertLess(delta_value, self.diff_threshold)
Пример #18
0
    def test_qat_acc(self):
        def _build_static_lenet(main, startup, is_test=False, seed=1000):
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    main.random_seed = seed
                    startup.random_seed = seed
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    prediction = StaticLenet(img)
                    if not is_test:
                        loss = fluid.layers.cross_entropy(
                            input=prediction, label=label)
                        avg_loss = fluid.layers.mean(loss)
                    else:
                        avg_loss = prediction
            return img, label, avg_loss

        reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
        weight_quantize_type = 'abs_max'
        activation_quant_type = 'moving_average_abs_max'
        param_init_map = {}
        seed = 1000
        lr = 0.001

        # imperative train
        _logger.info(
            "--------------------------dynamic graph qat--------------------------"
        )
        imperative_qat = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quant_type,
            quantizable_layer_type=[
                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
                'Swish'
            ])

        with fluid.dygraph.guard():
            np.random.seed(seed)
            fluid.default_main_program().random_seed = seed
            fluid.default_startup_program().random_seed = seed
            lenet = ImperativeLenet()
            fixed_state = {}
            for name, param in lenet.named_parameters():
                p_shape = param.numpy().shape
                p_value = param.numpy()
                if name.endswith("bias"):
                    value = np.zeros_like(p_value).astype('float32')
                else:
                    value = np.random.normal(
                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
                            p_shape).astype('float32')
                fixed_state[name] = value
                param_init_map[param.name] = value
            lenet.set_dict(fixed_state)

            imperative_qat.quantize(lenet)
            adam = AdamOptimizer(
                learning_rate=lr, parameter_list=lenet.parameters())
            dynamic_loss_rec = []
            lenet.train()
            for batch_id, data in enumerate(reader()):
                x_data = np.array([x[0].reshape(1, 28, 28)
                                   for x in data]).astype('float32')
                y_data = np.array(
                    [x[1] for x in data]).astype('int64').reshape(-1, 1)

                img = fluid.dygraph.to_variable(x_data)
                label = fluid.dygraph.to_variable(y_data)

                out = lenet(img)
                loss = fluid.layers.cross_entropy(out, label)
                avg_loss = fluid.layers.mean(loss)
                avg_loss.backward()
                adam.minimize(avg_loss)
                lenet.clear_gradients()
                dynamic_loss_rec.append(avg_loss.numpy()[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
                if batch_id > 500:
                    break
            lenet.eval()
        paddle.jit.save(
            layer=lenet,
            path="./dynamic_mnist/model",
            input_spec=[
                paddle.static.InputSpec(
                    shape=[None, 1, 28, 28], dtype='float32')
            ])

        # static graph train
        _logger.info(
            "--------------------------static graph qat--------------------------"
        )
        static_loss_rec = []
        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        main = fluid.Program()
        infer = fluid.Program()
        startup = fluid.Program()
        static_img, static_label, static_loss = _build_static_lenet(
            main, startup, False, seed)
        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
                                                      seed)
        with fluid.unique_name.guard():
            with fluid.program_guard(main, startup):
                opt = AdamOptimizer(learning_rate=lr)
                opt.minimize(static_loss)

        scope = core.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        for param in main.all_parameters():
            param_tensor = scope.var(param.name).get_tensor()
            param_tensor.set(param_init_map[param.name], place)

        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
        add_quant_dequant_pass = AddQuantDequantPass(
            scope=scope,
            place=place,
            quantizable_op_type=[
                'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
            ])
        transform_pass.apply(main_graph)
        transform_pass.apply(infer_graph)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(infer_graph)
        build_strategy = fluid.BuildStrategy()
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=static_loss.name, build_strategy=build_strategy)

        feeder = fluid.DataFeeder(
            feed_list=[static_img, static_label], place=place)
        with fluid.scope_guard(scope):
            for batch_id, data in enumerate(reader()):
                loss_v, = exe.run(binary,
                                  feed=feeder.feed(data),
                                  fetch_list=[static_loss])
                static_loss_rec.append(loss_v[0])
                if batch_id % 100 == 0:
                    _logger.info('{}: {}'.format('loss', loss_v))

        save_program = infer_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
                                          [infer_pre], exe, save_program)
        rtol = 1e-08
        atol = 1e-10
        for i, (loss_d,
                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
            diff = np.abs(loss_d - loss_s)
            if diff > (atol + rtol * np.abs(loss_s)):
                _logger.info(
                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
                    format(diff, i, loss_d, loss_s))
                break

        self.assertTrue(
            np.allclose(
                np.array(dynamic_loss_rec),
                np.array(static_loss_rec),
                rtol=rtol,
                atol=atol,
                equal_nan=True),
            msg='Failed to do the imperative qat.')
Пример #19
0
    def test_save_quantized_model(self):
        weight_quantize_type = 'abs_max'
        activation_quantize_type = 'moving_average_abs_max'
        load_param_path = "test_save_quantized_model/lenet.pdparams"
        path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"
        dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint"
        static_model_save_dir = "./static_outscale_infer_model"

        imperative_out_scale = ImperativeQuantAware(
            weight_quantize_type=weight_quantize_type,
            activation_quantize_type=activation_quantize_type)

        with fluid.dygraph.guard():
            lenet = ImperativeLenet()
            load_dict = paddle.load(load_param_path)
            imperative_out_scale.quantize(lenet)
            lenet.set_dict(load_dict)

        imperative_out_scale.save_quantized_model(
            layer=lenet,
            path=path,
            input_spec=[
                paddle.static.InputSpec(shape=[None, 1, 28, 28],
                                        dtype='float32')
            ])

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = fluid.Executor(place)

        # load dynamic model
        [dynamic_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=dynamic_model_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))
        # load static model
        [static_inference_program, feed_target_names,
         fetch_targets] = (fluid.io.load_inference_model(
             dirname=static_model_save_dir,
             executor=exe,
             model_filename="lenet" + INFER_MODEL_SUFFIX,
             params_filename="lenet" + INFER_PARAMS_SUFFIX))

        dynamic_ops = dynamic_inference_program.global_block().ops
        static_ops = static_inference_program.global_block().ops

        for op in dynamic_ops[:]:
            if op.type == "flatten2" or 'fake' in op.type:
                dynamic_ops.remove(op)

        for op in static_ops[:]:
            if 'fake' in op.type:
                static_ops.remove(op)

        op_count = 0
        for i in range(len(dynamic_ops)):
            if dynamic_ops[i].has_attr("out_threshold"):
                op_count += 1
                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
                                static_ops[i].attr("out_threshold"))

        _logger.info("op_cout: {}".format(op_count))
        self.assertTrue(op_count == 14)