Exemplo n.º 1
0
    def test_save_inference_model_with_auc(self):
        MODEL_DIR = "./tmp/inference_model4"
        init_program = Program()
        program = Program()

        # fake program without feed/fetch
        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='int32')
            predict = fluid.layers.fc(input=x, size=2, act='softmax')
            acc = fluid.layers.accuracy(input=predict, label=y)
            auc_var, batch_auc_var, auc_states = fluid.layers.auc(
                input=predict, label=y)
            cost = fluid.layers.cross_entropy(input=predict, label=y)
            avg_cost = fluid.layers.mean(x=cost)

        place = core.CPUPlace()
        exe = executor.Executor(place)
        exe.run(init_program, feed={}, fetch_list=[])
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe,
                                 program)
            expected_warn = "please ensure that you have set the auc states to zeros before saving inference model"
            self.assertTrue(len(w) > 0)
            self.assertTrue(expected_warn == str(w[0].message))
Exemplo n.º 2
0
    def test_save_inference_model(self):
        MODEL_DIR = "./tmp/inference_model3"
        init_program = Program()
        program = Program()

        # fake program without feed/fetch
        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

        place = core.CPUPlace()
        exe = executor.Executor(place)
        exe.run(init_program, feed={}, fetch_list=[])

        # will print warning message

        cp_prog = CompiledProgram(program).with_data_parallel(
            loss_name=avg_cost.name)

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, cp_prog)
        self.assertRaises(TypeError, save_inference_model,
                          [MODEL_DIR, ["x", "y"], [avg_cost], [], cp_prog])
Exemplo n.º 3
0
    def save_inference_model(self,
                             executor,
                             dirname,
                             feeded_var_names=None,
                             target_vars=None,
                             main_program=None,
                             export_for_deployment=True):
        """
        Prune the given `main_program` to build a new program especially for
        inference, and then save it and all related parameters to given
        `dirname` by the `executor`.
        """
        assert isinstance(executor, Executor), \
            "In fleet.save_inference_model() function, executor must be as" \
            " Executor type."

        if main_program is None:
            main_program = self._origin_program
        assert isinstance(main_program, Program), \
            "In fleet.save_inference_model() function, main_program " \
            "must be as Program type."

        io.save_inference_model(dirname, feeded_var_names, target_vars,
                                executor, main_program, None, None,
                                export_for_deployment)
Exemplo n.º 4
0
 def __save_offline_model(self):
     '''
     Save the quantized model to the disk.
     '''
     io.save_inference_model(self.output, self.feed_var_names,
                             self.fetch_list, self.exe,
                             self.sampling_program)
Exemplo n.º 5
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in xrange(100):
            tensor_x = np.array(
                [[1, 1], [1, 2], [3, 4], [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={'x': tensor_x,
                          'y': tensor_y},
                    fetch_list=[avg_cost])

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        expected = exe.run(program,
                           feed={'x': tensor_x,
                                 'y': tensor_y},
                           fetch_list=[avg_cost])[0]

        reload(executor)  # reload to build a new scope
        exe = executor.Executor(place)

        [infer_prog, feed_var_names, fetch_vars] = load_inference_model(
            MODEL_DIR, exe)

        outs = exe.run(
            infer_prog,
            feed={feed_var_names[0]: tensor_x,
                  feed_var_names[1]: tensor_y},
            fetch_list=fetch_vars)
        actual = outs[0]

        self.assertEqual(feed_var_names, ["x", "y"])
        self.assertEqual(len(fetch_vars), 1)
        self.assertEqual(str(fetch_vars[0]), str(avg_cost))
        self.assertEqual(expected, actual)
Exemplo n.º 6
0
 def save_inference_model(self,
                          executor,
                          dirname,
                          feeded_var_names=None,
                          target_vars=None,
                          main_program=None,
                          export_for_deployment=True):
     io.save_inference_model(dirname, feeded_var_names, target_vars,
                             executor, main_program, None, None,
                             export_for_deployment)
Exemplo n.º 7
0
def save_model(model_path, feeded_var_names, target_vars, exe):
    """
    保存预测模型,此函数仅用于预测模块!
    :param model_path:
    :param feeded_var_names: 预测模型的输入变量名的列表
    :param target_vars: 预测模型的输出参数列表
    :param exe:
    :return:
    """
    assert os.path.exists(model_path), "[%s] can't be found." % model_path
    io.save_inference_model(model_path, feeded_var_names, target_vars, exe)
Exemplo n.º 8
0
 def save_inference_model(self,
                          executor,
                          dirname,
                          feeded_var_names,
                          target_vars,
                          main_program=None,
                          export_for_deployment=True):
     """
     Prune the given `main_program` to build a new program especially for inference,
     and then save it and all related parameters to given `dirname` by the `executor`.
     """
     io.save_inference_model(dirname, feeded_var_names, target_vars,
                             executor, main_program, None, None,
                             export_for_deployment)
Exemplo n.º 9
0
    def save_inference_model(self, dirname, feed=None, fetch=None):
        """
        Save current model as the inference model.
        """
        program_cache = self.get_program_cache()
        if feed is None:
            feeded_var_names = [i.name for i in program_cache.inputs]
        else:
            feeded_var_names = [program_cache.inputs[i].name for i in feed]

        target_vars = program_cache.outputs
        from paddle.fluid.io import save_inference_model
        save_inference_model(dirname=dirname,
                             feeded_var_names=feeded_var_names,
                             target_vars=target_vars,
                             executor=self._exe,
                             main_program=self.main_program.clone())
Exemplo n.º 10
0
    def save_inference_model(self,
                             executor,
                             dirname,
                             feeded_var_names,
                             target_vars,
                             main_program=None,
                             export_for_deployment=True):
        """
        Prune the given `main_program` to build a new program especially for inference,
        and then save it and all related parameters to given `dirname` by the `executor`.
        """
        if isinstance(executor, ParallelExecutor):
            raise TypeError(
                "in fleet.save_inference_model() function, executor must be as Executor type, ParallelExecutor is not allowed"
            )

        if not isinstance(executor, Executor):
            raise TypeError(
                "in fleet.save_inference_model() function, executor must be as Executor type"
            )

        if main_program is not None:
            if isinstance(main_program, CompiledProgram):
                raise TypeError(
                    "in fleet.save_inference_model() function, main_program must be as Program type, CompiledProgram is not allowed"
                )
            io.save_inference_model(dirname, feeded_var_names, target_vars,
                                    executor, main_program, None, None,
                                    export_for_deployment)
        else:
            io.save_inference_model(dirname, feeded_var_names, target_vars,
                                    executor, self._origin_program, None, None,
                                    export_for_deployment, True)

            model_basename = "__model__"
            model_filename = os.path.join(dirname, model_basename)

            with open(model_filename, "rb") as f:
                program_desc_str = f.read()

            program = Program.parse_from_string(program_desc_str)
            program._copy_dist_param_info_from(self.main_program)
            self.save_persistables(executor, dirname, program)
Exemplo n.º 11
0
    def test_save_inference_model(self):
        MODEL_DIR = "./tmp/inference_model2"
        init_program = Program()
        program = Program()

        # fake program without feed/fetch
        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

        place = core.CPUPlace()
        exe = executor.Executor(place)
        exe.run(init_program, feed={}, fetch_list=[])

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
Exemplo n.º 12
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"
        UNI_MODEL_DIR = "./tmp/inference_model1"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in six.moves.xrange(100):
            tensor_x = np.array([[1, 1], [1, 2], [3, 4],
                                 [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={
                        'x': tensor_x,
                        'y': tensor_y
                    },
                    fetch_list=[avg_cost])

        # Separated model and unified model
        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        save_inference_model(UNI_MODEL_DIR, ["x", "y"], [avg_cost], exe,
                             program, 'model', 'params')
        main_program = program.clone()._prune_with_input(
            feeded_var_names=["x", "y"], targets=[avg_cost])
        params_str = save_persistables(exe, None, main_program, None)

        expected = exe.run(program,
                           feed={
                               'x': tensor_x,
                               'y': tensor_y
                           },
                           fetch_list=[avg_cost])[0]

        six.moves.reload_module(executor)  # reload to build a new scope

        model_0 = InferModel(load_inference_model(MODEL_DIR, exe))
        with open(os.path.join(UNI_MODEL_DIR, 'model'), "rb") as f:
            model_str = f.read()
        model_1 = InferModel(
            load_inference_model(None, exe, model_str, params_str))

        for model in [model_0, model_1]:
            outs = exe.run(model.program,
                           feed={
                               model.feed_var_names[0]: tensor_x,
                               model.feed_var_names[1]: tensor_y
                           },
                           fetch_list=model.fetch_vars)
            actual = outs[0]

            self.assertEqual(model.feed_var_names, ["x", "y"])
            self.assertEqual(len(model.fetch_vars), 1)
            print("fetch %s" % str(model.fetch_vars[0]))
            self.assertEqual(expected, actual)

        self.assertRaises(ValueError, fluid.io.load_inference_model, None, exe,
                          model_str, None)
Exemplo n.º 13
0
def save_model(server_model_folder,
               client_config_folder,
               feed_var_dict,
               fetch_var_dict,
               main_program=None):
    executor = Executor(place=CPUPlace())

    feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
    target_vars = []
    target_var_names = []
    for key in sorted(fetch_var_dict.keys()):
        target_vars.append(fetch_var_dict[key])
        target_var_names.append(key)

    save_inference_model(
        server_model_folder,
        feed_var_names,
        target_vars,
        executor,
        main_program=main_program)

    config = model_conf.GeneralModelConfig()

    #int64 = 0; float32 = 1; int32 = 2;
    for key in feed_var_dict:
        feed_var = model_conf.FeedVar()
        feed_var.alias_name = key
        feed_var.name = feed_var_dict[key].name
        feed_var.is_lod_tensor = feed_var_dict[key].lod_level >= 1
        if feed_var_dict[key].dtype == core.VarDesc.VarType.INT64:
            feed_var.feed_type = 0
        if feed_var_dict[key].dtype == core.VarDesc.VarType.FP32:
            feed_var.feed_type = 1
        if feed_var_dict[key].dtype == core.VarDesc.VarType.INT32:
            feed_var.feed_type = 2
        if feed_var.is_lod_tensor:
            feed_var.shape.extend([-1])
        else:
            tmp_shape = []
            for v in feed_var_dict[key].shape:
                if v >= 0:
                    tmp_shape.append(v)
            feed_var.shape.extend(tmp_shape)
        config.feed_var.extend([feed_var])

    for key in target_var_names:
        fetch_var = model_conf.FetchVar()
        fetch_var.alias_name = key
        fetch_var.name = fetch_var_dict[key].name
        fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
        if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64:
            fetch_var.fetch_type = 0
        if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32:
            fetch_var.fetch_type = 1
        if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT32:
            fetch_var.fetch_type = 2
        if fetch_var.is_lod_tensor:
            fetch_var.shape.extend([-1])
        else:
            tmp_shape = []
            for v in fetch_var_dict[key].shape:
                if v >= 0:
                    tmp_shape.append(v)
            fetch_var.shape.extend(tmp_shape)
        config.fetch_var.extend([fetch_var])

    cmd = "mkdir -p {}".format(client_config_folder)

    os.system(cmd)
    with open("{}/serving_client_conf.prototxt".format(client_config_folder),
              "w") as fout:
        fout.write(str(config))
    with open("{}/serving_server_conf.prototxt".format(server_model_folder),
              "w") as fout:
        fout.write(str(config))
    with open("{}/serving_client_conf.stream.prototxt".format(
            client_config_folder), "wb") as fout:
        fout.write(config.SerializeToString())
    with open("{}/serving_server_conf.stream.prototxt".format(
            server_model_folder), "wb") as fout:
        fout.write(config.SerializeToString())
Exemplo n.º 14
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(layer, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        self._gather_output_scale(layer)

        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if len(self._out_scale_dict) == 0:
            warnings.warn("Warning: No Layer of the model while to be " \
                          "saved contains the out_threshold attribute, so the " \
                          "generated inference model would not contain the " \
                          "out_threshold.")
            return

        # load static model
        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CUDAPlace(0) if core.is_compiled_with_cuda() \
            else core.CPUPlace()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # TODO(jc): analyse whether the dygraph model has
        # several blocks before applying qat
        assert infer_program.num_blocks == 1, \
            "Quantization aware training (QAT) requires the program " \
            "only has a block for now. When the model has if-else or " \
            "while, the program will have several blocks."

        # set output scales to the static model
        self._save_output_scale(infer_program)

        # process skip quant
        self._set_skip_quant_attr(infer_program)

        # save the final quantized model that has output scales
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()
Exemplo n.º 15
0
    def save_inference_model(self, dirname, feed=None, fetch=None):
        """
        Save the TracedLayer to a model for inference. The saved
        inference model can be loaded by C++ inference APIs.

        Args:
            dirname (str): the directory to save the inference model.
            feed (list[int], optional): the input variable indices of the saved
                inference model. If None, all input variables of the
                TracedLayer object would be the inputs of the saved inference
                model. Default None.
            fetch (list[int], optional): the output variable indices of the
                saved inference model. If None, all output variables of the
                TracedLayer object would be the outputs of the saved inference
                model. Default None.

        Returns:
            None

        Examples:
            .. code-block:: python:

                import paddle.fluid as fluid
                from paddle.fluid.dygraph import Linear, to_variable, TracedLayer
                import numpy as np

                class ExampleLayer(fluid.dygraph.Layer):
                    def __init__(self):
                        super(ExampleLayer, self).__init__()
                        self._fc = Linear(3, 10)

                    def forward(self, input):
                        return self._fc(input)

                save_dirname = './saved_infer_model'
                in_np = np.random.random([2, 3]).astype('float32')

                with fluid.dygraph.guard():
                    layer = ExampleLayer()
                    in_var = to_variable(in_np)
                    out_dygraph, static_layer = TracedLayer.trace(layer, inputs=[in_var])
                    static_layer.save_inference_model(save_dirname, feed=[0], fetch=[0])

                place = fluid.CPUPlace()
                exe = fluid.Executor(place)
                program, feed_vars, fetch_vars = fluid.io.load_inference_model(save_dirname,
                                                    exe)

                fetch, = exe.run(program, feed={feed_vars[0]: in_np}, fetch_list=fetch_vars)
                print(fetch.shape) # (2, 10)
        """
        from paddle.fluid.io import save_inference_model

        def get_feed_fetch(all_vars, partial_vars):
            if partial_vars is None:
                return all_vars

            return [all_vars[idx] for idx in partial_vars]

        with scope_guard(self._scope):
            feeded_var_names = get_feed_fetch(self._feed_names, feed)
            target_var_names = get_feed_fetch(self._fetch_names, fetch)
            target_vars = []
            for name in target_var_names:
                target_var = self._program.global_block().vars.get(name, None)
                assert target_var is not None, "{} cannot be found".format(name)
                target_vars.append(target_var)

            save_inference_model(
                dirname=dirname,
                feeded_var_names=feeded_var_names,
                target_vars=target_vars,
                executor=self._exe,
                main_program=self._program.clone())
Exemplo n.º 16
0
    def test_fit_line_inference_model(self):
        MODEL_DIR = "./tmp/inference_model"

        init_program = Program()
        program = Program()

        with program_guard(program, init_program):
            x = layers.data(name='x', shape=[2], dtype='float32')
            y = layers.data(name='y', shape=[1], dtype='float32')

            y_predict = layers.fc(input=x, size=1, act=None)

            cost = layers.square_error_cost(input=y_predict, label=y)
            avg_cost = layers.mean(cost)

            sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
            sgd_optimizer.minimize(avg_cost, init_program)

        place = core.CPUPlace()
        exe = executor.Executor(place)

        exe.run(init_program, feed={}, fetch_list=[])

        for i in six.moves.xrange(100):
            tensor_x = np.array([[1, 1], [1, 2], [3, 4],
                                 [5, 2]]).astype("float32")
            tensor_y = np.array([[-2], [-3], [-7], [-7]]).astype("float32")

            exe.run(program,
                    feed={
                        'x': tensor_x,
                        'y': tensor_y
                    },
                    fetch_list=[avg_cost])

        save_inference_model(MODEL_DIR, ["x", "y"], [avg_cost], exe, program)
        expected = exe.run(program,
                           feed={
                               'x': tensor_x,
                               'y': tensor_y
                           },
                           fetch_list=[avg_cost])[0]

        six.moves.reload_module(executor)  # reload to build a new scope
        exe = executor.Executor(place)

        [infer_prog, feed_var_names,
         fetch_vars] = load_inference_model(MODEL_DIR, exe)

        outs = exe.run(infer_prog,
                       feed={
                           feed_var_names[0]: tensor_x,
                           feed_var_names[1]: tensor_y
                       },
                       fetch_list=fetch_vars)
        actual = outs[0]

        self.assertEqual(feed_var_names, ["x", "y"])
        self.assertEqual(len(fetch_vars), 1)
        print("fetch %s" % str(fetch_vars[0]))
        self.assertTrue("scale" in str(fetch_vars[0]))
        self.assertEqual(expected, actual)
Exemplo n.º 17
0
    def save_quantized_model(self,
                             model,
                             path,
                             input_spec=None,
                             onnx_format=False,
                             **config):
        """
        Save the quantized model for the inference.

        Args:
            model (Layer): The model to be saved.
            path (str): The path prefix to save model. The format is 
                ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input
                of the saved model's forward method, which can be described by
                InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of
                the saved model. Default None.
            onnx_format (bool, optional): Whether to export the quantized model 
                with format of ONNX. Default is False.
            **configs (dict, optional): Other save configuration options for
                compatibility. We do not recommend using these configurations,
                they may be removed in the future. If not necessary, DO NOT use
                them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of
                the saved model. By default, all return variables of original
                Layer's forward method are kept as the output of the saved model.
                If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given
                ``output_spec`` list. 

        Returns:
            None
        """
        assert isinstance(model, dygraph.Layer), \
            "The model must be the instance of dygraph.Layer."

        paddle.jit.save(layer=model,
                        path=path,
                        input_spec=input_spec,
                        **config)

        is_dynamic_mode = False
        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        place = core.CPUPlace()
        scope = global_scope()
        exe = Executor(place)

        dirname = os.path.dirname(path)
        basename = os.path.basename(path)
        model_filename = basename + INFER_MODEL_SUFFIX
        params_filename = basename + INFER_PARAMS_SUFFIX

        [infer_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        self._gather_scales(infer_program, scope, fetch_targets)

        # Remove `moving_average_abs_max_scale` node in sub graphs.
        graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
        for sub_graph in graph.all_sub_graphs():
            for _op in sub_graph.all_op_nodes():
                if _op.name() == "moving_average_abs_max_scale":
                    sub_graph.safe_remove_nodes(_op)
            sub_graph.resolve_hazard()
        infer_program = graph.to_program()

        self._set_skip_quant_attr(infer_program)

        clip_extra = False
        if onnx_format:
            graph = IrGraph(core.Graph(infer_program.desc), for_test=False)
            transform_pass = ReplaceFakeQuantDequantPass(scope, place)
            transform_pass.apply(graph)

            quant_weight_pass = QuantWeightPass(scope, place)
            quant_weight_pass.apply(graph)
            infer_program = graph.to_program()

            clip_extra = True

        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=infer_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename,
                             clip_extra=clip_extra)

        if is_dynamic_mode:
            paddle.disable_static()
Exemplo n.º 18
0
    def save_quantized_model(self, layer, path, input_spec=None, **config):
        """
        Save the quantized model for the inference.

        Args:
            layer (Layer): The Layer to be saved.
            path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
            input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward 
                method, which can be described by InputSpec or example Tensor. If None, all input variables of 
                the original Layer's forward method would be the inputs of the saved model. Default None.
            **configs (dict, optional): Other save configuration options for compatibility. We do not 
                recommend using these configurations, they may be removed in the future. If not necessary, 
                DO NOT use them. Default None.
                The following options are currently supported:
                (1) output_spec (list[Tensor]): Selects the output targets of the saved model.
                By default, all return variables of original Layer's forward method are kept as the 
                output of the saved model. If the provided ``output_spec`` list is not all output variables, 
                the saved model will be pruned according to the given ``output_spec`` list. 

        Returns:
            None
        """

        assert isinstance(
            layer,
            dygraph.Layer), "model must be the instance of dygraph.Layer"
        is_dynamic_mode = False
        with dygraph.guard():
            layer.eval()
            for handle in self._register_hook_handle_list:
                handle.remove()
            for key in self._out_scale_dict:
                self._out_scale_dict[key] = float(
                    self._out_scale_dict[key].numpy())

        paddle.jit.save(layer=layer,
                        path=path,
                        input_spec=input_spec,
                        **config)

        if paddle.in_dynamic_mode():
            is_dynamic_mode = True
            paddle.enable_static()

        if core.is_compiled_with_cuda():
            place = core.CUDAPlace(0)
        else:
            place = core.CPUPlace()
        exe = Executor(place)

        file_prefix = os.path.basename(path)
        dirname = os.path.dirname(path)
        model_filename = file_prefix + INFER_MODEL_SUFFIX
        params_filename = file_prefix + INFER_PARAMS_SUFFIX

        [inference_program, feed_target_names, fetch_targets
         ] = (load_inference_model(dirname=dirname,
                                   executor=exe,
                                   model_filename=model_filename,
                                   params_filename=params_filename))

        # Traverse all ops in the program and find out the op matching
        # the Layer in the dynamic graph.
        layer_var_dict = {}
        for block in inference_program.blocks:
            for op in block.ops:
                if op.type in _op_real_in_out_name:
                    output_var_names = quantization_pass._get_op_output_var_names(
                        op)
                    for output_var_name in output_var_names:
                        output_var_tensor = block.var(output_var_name)
                        if output_var_tensor.dtype not in [
                                core.VarDesc.VarType.FP64,
                                core.VarDesc.VarType.FP32
                        ]:
                            continue
                        # Because the Layer in dygraph may correspond to multiple ops
                        # in static program after being saved. To ensure correctness,
                        # the outscale collected for output of dygraph Layer can only
                        # be set to the last op in the corresponding ops in static program.
                        #
                        # We can judge the execution order of the ops which corresponding
                        # to dygraph Layer by the name of output. And use dict to save
                        # the corresponding relationship between the dygraph Layer and the
                        # static graph op that needs to set the outscale attribute.
                        if '.' not in output_var_name:
                            continue
                        dynamic_layer_name, var_name_suffix = output_var_name.split(
                            ".")
                        if dynamic_layer_name in layer_var_dict:
                            if layer_var_dict[dynamic_layer_name][
                                    0] < var_name_suffix:
                                layer_var_dict[dynamic_layer_name] = [
                                    var_name_suffix, op
                                ]
                        else:
                            layer_var_dict[dynamic_layer_name] = [
                                var_name_suffix, op
                            ]

        # Because the naming styles of static and dynamic graph are different,
        # in order to avoid mistakes, we unify the name here.
        for (layer_name, var_name_op_list) in layer_var_dict.items():
            if 'prelu' in layer_name:
                layer_name = layer_name.replace('prelu', 'p_re_lu')
            if 'relu' in layer_name:
                layer_name = layer_name.replace('relu', 're_lu')
            if layer_name not in self._out_scale_dict:
                continue
            var_name_op_list[1]._set_attr('out_threshold',
                                          self._out_scale_dict[layer_name])

        # Save the processed program.
        save_inference_model(dirname=dirname,
                             feeded_var_names=feed_target_names,
                             target_vars=fetch_targets,
                             executor=exe,
                             main_program=inference_program.clone(),
                             model_filename=model_filename,
                             params_filename=params_filename)

        if is_dynamic_mode:
            paddle.disable_static()
Exemplo n.º 19
0
    def save_inference_model(self, dirname, feed=None, fetch=None):
        """
        Save the TracedLayer to an model for inference. The saved
        inference model can be loaded by C++ inference APIs. 

        Args:
            dirname (str): the directory to save the inference model.  
            feed (list[int], optional): the input variable indices of the saved
                inference model. If None, all input variables of the 
                TracedLayer object would be the inputs of the saved inference
                model. Default None.
            fetch (list[int], optional): the output variable indices of the
                saved inference model. If None, all output variables of the
                TracedLayer object would be the outputs of the saved inference
                model. Default None.

        Returns:
            The fetch variables' name list
        
        Return Type: 
            list(str)

        Examples:

            .. code-block:: python:

                import paddle.fluid as fluid
                from paddle.fluid.dygraph import FC, to_variable, TracedLayer
                import paddle.fluid.dygraph.jit as jit
                import numpy as np

                class ExampleLayer(fluid.dygraph.Layer):
                    def __init__(self, name_scope):
                        super(ExampleLayer, self).__init__(name_scope)
                        self._fc = FC(self.full_name(), 10) 

                    def forward(self, input):
                        return self._fc(input)

                with fluid.dygraph.guard():
                    layer = ExampleLayer("example_layer")
                    in_np = np.random.random([2, 3]).astype('float32')
                    in_var = to_variable(in_np)
                    out_dygraph, static_layer = TracedLayer.trace(layer, inputs=[in_var])
                    static_layer.save_inference_model('./saved_infer_model')
        """
        def get_feed_fetch(all_vars, partial_vars):
            if partial_vars is None:
                return all_vars

            return [all_vars[idx] for idx in partial_vars]

        with scope_guard(self._scope):
            feeded_var_names = get_feed_fetch(self._feed_names, feed)
            target_var_names = get_feed_fetch(self._fetch_names, fetch)
            target_vars = []
            for name in target_var_names:
                target_var = self._program.global_block().vars.get(name, None)
                assert target_var is not None, "{} cannot be found".format(
                    name)
                target_vars.append(target_var)

            return fluid_io.save_inference_model(
                dirname=dirname,
                feeded_var_names=feeded_var_names,
                target_vars=target_vars,
                executor=self._exe,
                main_program=self._program.clone())
Exemplo n.º 20
0
    def save_inference_model(self, dirname, feed=None, fetch=None):
        """
        Saves current model as the inference model. It will prune the main_program
        to build a new program especially for inference, and then save it and all
        related parameters to given `dirname` . The saved inference model can be
        loaded by `:ref:`api_fluid_io_load_inference_model` or `C++ inference APIs.

        Args:
            dirname (str): the directory to save the inference model.
            feed (list[int], optional): the input variable indices of the saved
                inference model. If None, all input variables of the
                ProgramTranslator would be the inputs of the saved inference
                model. Default None.
            fetch (list[int], optional): the output variable indices of the
                saved inference model. If None, all output variables of the
                TracedLayer object would be the outputs of the saved inference
                model. Default None.
        Returns:
            None
        Examples:
            .. code-block:: python
                import numpy as np
                import paddle.fluid as fluid
                from paddle.fluid.dygraph import Linear
                from paddle.fluid.dygraph import declarative
                from paddle.fluid.dygraph import ProgramTranslator

                class SimpleNet(fluid.dygraph.Layer):
                    def __init__(self, in_size, out_size):
                        super(SimpleNet, self).__init__()
                        self._linear = Linear(in_size, out_size)

                    @declarative
                    def forward(self, x):
                        y = self._linear(x)
                        z = self._linear(y)
                        loss = fluid.layers.mean(z)
                        return z, loss

                with fluid.dygraph.guard(fluid.CPUPlace()):
                    net = SimpleNet(8, 8)
                    adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters())
                    x = fluid.dygraph.to_variable(np.random.random((4, 8)).astype('float32'))
                    for i in range(10):
                        loss, out = net(x)
                        loss.backward()
                        adam.minimize(loss)
                        net.clear_gradients()
                # Save inference model.
                # Note that fetch=[0] means we set 'y' as the inference output.
                prog_trans = ProgramTranslator()
                prog_trans.save_inference_model("./dy2stat_infer_model", fetch=[0])

                # In this example, the inference model will be pruned based on input (x) and
                # output (y). The pruned inference program is going to be saved in the folder
                # "./dy2stat_infer_model" and parameters are going to be saved in separate
                # files in the folder.
        """
        def get_feed_fetch(var_list, partial_vars, return_name=False):
            vars = [
                var for var in var_list if isinstance(var, framework.Variable)
            ]
            if partial_vars:
                vars = [vars[idx] for idx in partial_vars]
            if return_name:
                vars = [var.name for var in vars]

            return vars

        func_spec, (concrete_program,
                    partial_layer) = self._program_cache.last()
        # share paramBase data with parameter
        scope = core.Scope()
        for param_base in concrete_program.parameters:
            param_tensor = scope.var(param_base.name).get_tensor()
            src_tensor = param_base.value().get_tensor()
            param_tensor._share_data_with(src_tensor)

        feed_var_names = get_feed_fetch(concrete_program.inputs, feed, True)
        fetch_vars = get_feed_fetch(concrete_program.outputs, fetch)

        from paddle.fluid.io import save_inference_model
        with scope_guard(scope):
            save_inference_model(
                dirname=dirname,
                feeded_var_names=feed_var_names,
                target_vars=fetch_vars,
                executor=executor.Executor(
                    framework._current_expected_place()),
                main_program=concrete_program.main_program.clone())