Пример #1
0
def convert(program, place, config=None, scope=None, save_int8=False):
    """
    convert quantized and well-trained ``program`` to final  quantized
    ``program``that can be used to  save ``inference model``.
    
    Args:
        program(paddle.static.Program): quantized and well-trained ``test program``.
        place(paddle.CPUPlace or paddle.CUDAPlace): This parameter represents
                the executor run on which device.
        config(dict, optional): configs for convert. if set None, will use
                default config. It must be same with config that used in
                'quant_aware'. Default is None.
        scope(paddle.static.Scope, optional):  Scope records the mapping between
                variable names and variables, similar to brackets in
                programming languages. Usually users can use
                `paddle.static.global_scope <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_.
                When ``None`` will use 
                `paddle.static.global_scope() <https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/api_cn/executor_cn/global_scope_cn.html>`_
                . Default: ``None``.
        save_int8: Whether to return ``program`` which model parameters'
                dtype is ``int8``. This parameter can only be used to
                get model size. Default: ``False``.

    Returns:
        Tuple : freezed program which can be used for inference.
                when ``save_int8`` is False, return ``freezed_program(paddle.static.Program)``.
                when ``save_int8`` is True, return ``freezed_program(paddle.static.Program)``
                and ``freezed_program_int8(paddle.static.Program)``
    """
    scope = paddle.static.global_scope() if not scope else scope

    if config is None:
        config = _quant_config_default
    else:
        assert isinstance(config, dict), "config must be dict"
        config = _parse_configs(config)
    _logger.info("convert config {}".format(config))
    test_graph = IrGraph(core.Graph(program.desc), for_test=True)

    out_scale_infer_pass = OutScaleForInferencePass(scope=scope)
    out_scale_infer_pass.apply(test_graph)

    # Freeze the graph after training by adjusting the quantize
    # operators' order for the inference.
    freeze_pass = QuantizationFreezePass(
        scope=scope,
        place=place,
        weight_bits=config['weight_bits'],
        activation_bits=config['activation_bits'],
        weight_quantize_type=config['weight_quantize_type'])

    if os.path.exists(VARS_MAPPING_TABLE):
        test_graph.out_node_mapping_table = load_dict()

    freeze_pass.apply(test_graph)
    freezed_program = test_graph.to_program()

    if save_int8:
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
        freezed_program_int8 = test_graph.to_program()
        return freezed_program, freezed_program_int8
    else:
        return freezed_program
Пример #2
0
    def quantization_scale(self,
                           use_cuda,
                           seed,
                           activation_quant_type,
                           weight_quant_type='abs_max',
                           for_ci=False,
                           act_preprocess_func=None,
                           weight_preprocess_func=None,
                           act_quantize_func=None,
                           weight_quantize_func=None):
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(name='image',
                                            shape=[1, 28, 28],
                                            dtype='float32')
                    img.stop_gradient = False
                    label = fluid.layers.data(name='label',
                                              shape=[1],
                                              dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.SGD(learning_rate=0.0001)
                        opt.minimize(loss)
            return [img, label], loss

        def get_optimizer():
            return fluid.optimizer.MomentumOptimizer(0.0001, 0.9)

        def load_dict():
            with open('mapping_table_for_saving_inference_model', 'r') as file:
                data = file.read()
                data = json.loads(data)
                return data

        def save_dict(Dict):
            with open('mapping_table_for_saving_inference_model', 'w') as file:
                file.write(json.dumps(Dict))

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
        train_transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
            act_preprocess_func=act_preprocess_func,
            weight_preprocess_func=weight_preprocess_func,
            act_quantize_func=act_quantize_func,
            weight_quantize_func=weight_quantize_func,
            optimizer_func=get_optimizer,
            executor=exe)
        train_transform_pass.apply(main_graph)
        test_transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
            act_preprocess_func=act_preprocess_func,
            weight_preprocess_func=weight_preprocess_func,
            act_quantize_func=act_quantize_func,
            weight_quantize_func=weight_quantize_func,
            optimizer_func=get_optimizer,
            executor=exe)

        test_transform_pass.apply(test_graph)
        save_dict(test_graph.out_node_mapping_table)

        add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(test_graph)

        scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place)
        scale_training_pass.apply(main_graph)

        dev_name = '_gpu' if use_cuda else '_cpu'

        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        build_strategy.fuse_all_reduce_ops = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
        iters = 5
        batch_size = 8

        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
                                    batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
        with fluid.scope_guard(scope):
            for _ in range(iters):
                data = next(train_reader())
                loss_v = exe.run(binary,
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])

        out_scale_infer_pass = OutScaleForInferencePass(scope=scope)
        out_scale_infer_pass.apply(test_graph)

        freeze_pass = QuantizationFreezePass(
            scope=scope,
            place=place,
            weight_bits=8,
            activation_bits=8,
            weight_quantize_type=weight_quant_type)

        mapping_table = load_dict()
        test_graph.out_node_mapping_table = mapping_table
        if act_quantize_func == None and weight_quantize_func == None:
            freeze_pass.apply(test_graph)