def data_layer_not_check(name, shape, dtype='float32', lod_level=0): """ This function creates a Tensor on the global block. The created Tensor doesn't check the dtype and the shape of feed data because dygraph input data can be various-length. This API is used in translating dygraph into static graph. Note: The default :code:`stop_gradient` attribute of the Tensor created by this API is true, which means the gradient won't be passed backward through the data Tensor. Set :code:`var.stop_gradient = False` If user would like to pass backward gradient. Args: name (str): The name/alias of the Tensor, see :ref:`api_guide_Name` for more details. shape (list|tuple): List|Tuple of integers declaring the shape. You can set "None" at a dimension to indicate the dimension can be of any size. For example, it is useful to set changeable batch size as "None" dtype (np.dtype|VarType|str, optional): The type of the data. Supported dtype: bool, float16, float32, float64, int8, int16, int32, int64, uint8. Default: float32 lod_level (int, optional): The LoD level of the LoDTensor. Usually users don't have to set this value. For more details about when and how to use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0 Returns: Tensor: The global Tensor that gives access to the data. """ helper = LayerHelper('data', **locals()) shape = list(shape) for i in six.moves.range(len(shape)): if shape[i] is None: shape[i] = -1 return helper.create_global_variable( name=name, shape=shape, dtype=dtype, type=core.VarDesc.VarType.LOD_TENSOR, stop_gradient=True, lod_level=lod_level, is_data=True, need_check_feed=False)
def data(name, shape, dtype='float32', lod_level=0): """ **Data Layer** This function creates a variable on the global block. The global variable can be accessed by all the following operators in the graph. The variable is a placeholder that could be fed with input, such as Executor can feed input into the variable. Note: `paddle.fluid.layers.data` is deprecated. It will be removed in a future version. Please use this `paddle.fluid.data`. The `paddle.fluid.layers.data` set shape and dtype at compile time but does NOT check the shape or the dtype of fed data, this `paddle.fluid.data` checks the shape and the dtype of data fed by Executor or ParallelExecutor during run time. To feed variable size inputs, users can set None or -1 on the variable dimension when using :code:`paddle.fluid.data`, or feed variable size inputs directly to :code:`paddle.fluid.layers.data` and PaddlePaddle will fit the size accordingly. The default :code:`stop_gradient` attribute of the Variable created by this API is true, which means the gradient won't be passed backward through the data Variable. Set :code:`var.stop_gradient = False` If user would like to pass backward gradient. Args: name (str): The name/alias of the variable, see :ref:`api_guide_Name` for more details. shape (list|tuple): List|Tuple of integers declaring the shape. You can set "None" or -1 at a dimension to indicate the dimension can be of any size. For example, it is useful to set changeable batch size as "None" or -1. dtype (np.dtype|VarType|str, optional): The type of the data. Supported dtype: bool, float16, float32, float64, int8, int16, int32, int64, uint8. Default: float32. lod_level (int, optional): The LoD level of the LoDTensor. Usually users don't have to set this value. For more details about when and how to use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0. Returns: Variable: The global variable that gives access to the data. Examples: .. code-block:: python import paddle.fluid as fluid import numpy as np # Creates a variable with fixed size [3, 2, 1] # User can only feed data of the same shape to x x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32') # Creates a variable with changeable batch size -1. # Users can feed data of any batch size into y, # but size of each data sample has to be [2, 1] y = fluid.data(name='y', shape=[-1, 2, 1], dtype='float32') z = x + y # In this example, we will feed x and y with np-ndarray "1" # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32) exe = fluid.Executor(fluid.CPUPlace()) out = exe.run(fluid.default_main_program(), feed={ 'x': feed_data, 'y': feed_data }, fetch_list=[z.name]) # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2 print(out) """ helper = LayerHelper('data', **locals()) check_type(name, 'name', (six.binary_type, six.text_type), 'data') check_type(shape, 'shape', (list, tuple), 'data') shape = list(shape) for i in six.moves.range(len(shape)): if shape[i] is None: shape[i] = -1 return helper.create_global_variable(name=name, shape=shape, dtype=dtype, type=core.VarDesc.VarType.LOD_TENSOR, stop_gradient=True, lod_level=lod_level, is_data=True, need_check_feed=True)
def data(name, shape, dtype=None, lod_level=0): """ **Data Layer** This function creates a variable on the global block. The global variable can be accessed by all the following operators in the graph. The variable is a placeholder that could be fed with input, such as Executor can feed input into the variable. When `dtype` is None, the dtype will get from the global dtype by `paddle.get_default_dtype()`. Args: name (str): The name/alias of the variable, see :ref:`api_guide_Name` for more details. shape (list|tuple): List|Tuple of integers declaring the shape. You can set "None" or -1 at a dimension to indicate the dimension can be of any size. For example, it is useful to set changeable batch size as "None" or -1. dtype (np.dtype|str, optional): The type of the data. Supported dtype: bool, float16, float32, float64, int8, int16, int32, int64, uint8. Default: None. When `dtype` is not set, the dtype will get from the global dtype by `paddle.get_default_dtype()`. lod_level (int, optional): The LoD level of the LoDTensor. Usually users don't have to set this value. For more details about when and how to use LoD level, see :ref:`user_guide_lod_tensor` . Default: 0. Returns: Variable: The global variable that gives access to the data. Examples: .. code-block:: python import numpy as np import paddle # Creates a variable with fixed size [3, 2, 1] # User can only feed data of the same shape to x # the dtype is not set, so it will set "float32" by # paddle.get_default_dtype(). You can use paddle.get_default_dtype() to # change the global dtype x = paddle.static.data(name='x', shape=[3, 2, 1]) # Creates a variable with changeable batch size -1. # Users can feed data of any batch size into y, # but size of each data sample has to be [2, 1] y = paddle.static.data(name='y', shape=[-1, 2, 1], dtype='float32') z = x + y # In this example, we will feed x and y with np-ndarray "1" # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32) exe = paddle.static.Executor(paddle.framework.CPUPlace()) out = exe.run(paddle.static.default_main_program(), feed={ 'x': feed_data, 'y': feed_data }, fetch_list=[z.name]) # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2 print(out) """ helper = LayerHelper('data', **locals()) check_type(name, 'name', (six.binary_type, six.text_type), 'data') check_type(shape, 'shape', (list, tuple), 'data') shape = list(shape) for i in six.moves.range(len(shape)): if shape[i] is None: shape[i] = -1 if dtype: return helper.create_global_variable( name=name, shape=shape, dtype=dtype, type=core.VarDesc.VarType.LOD_TENSOR, stop_gradient=True, lod_level=lod_level, is_data=True, need_check_feed=True) else: return helper.create_global_variable( name=name, shape=shape, dtype=paddle.get_default_dtype(), type=core.VarDesc.VarType.LOD_TENSOR, stop_gradient=True, lod_level=lod_level, is_data=True, need_check_feed=True)
class QuantizeTranspiler(object): def __init__(self, weight_bits=8, activation_bits=8, activation_quantize_type='abs_max', weight_quantize_type='abs_max', window_size=10000, moving_rate=0.9): """ Convert and rewrite the fluid Program according to weight and activation quantization type. Args: weight_bits (int): quantization bit number for weights, the bias is not quantized. activation_bits (int): quantization bit number for activation. activation_quantize_type (str): quantization type for activation, now support 'abs_max', 'range_abs_max'. If use 'abs_max' mode, the quantization scale will be calculated dynamically each step in both training and testing period. If use 'range_abs_max', a static quantization scale will be calculated during training and used in inference. weight_quantize_type (str): quantization type for weights, support 'abs_max'. The 'range_abs_max' usually is not used for weight, since weights are fixed once the model is well trained. window_size (int): the window size for 'range_abs_max' quantization. Examples: .. code-block:: python # the original program will be rewrite, if you don't want to # change it, please clone at first. # quantize_program = program.clone() t = fluid.QuantizeTranspiler() t.transpile(quantize_program) """ self.weight_bits = weight_bits self.activation_bits = activation_bits quant_type = ['abs_max', 'range_abs_max', 'moving_average_abs_max'] if weight_quantize_type not in quant_type: raise ValueError( "Unknown weight_quantize_type: '%s'. It can only be ", "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", str(weight_quantize_type)) if activation_quantize_type not in quant_type: raise ValueError( "Unknown activation_quantize_type : '%s'. It can only be ", "'abs_max' or 'range_abs_max' or 'moving_average_abs_max'.", str(activation_quantize_type)) self.weight_quantize_type = weight_quantize_type self.activation_quantize_type = activation_quantize_type self.window_size = window_size self.moving_rate = moving_rate self.helper = LayerHelper(self.__class__.__name__) self.fake_quant_op_types = [ 'fake_quantize_abs_max', 'fake_quantize_range_abs_max', 'fake_quantize_moving_average_abs_max' ] self.fake_dequant_op_types = ['fake_dequantize_max_abs'] self.is_test = None self.global_step = None def training_transpile(self, program=None, startup_program=None): """Rewrites a training input program in place for simulated quantization. Insert fake quantization and de-quantization ops into program to simulate the error introduced by quantization. And change the graident ops' input by using the faked quantization weights and activation. Since the program is transformed in place, the graph connection will change. Args: program (Program): the input program to be transpile. """ self.is_test = False program = default_main_program() if program is None else program startup_program = default_startup_program() if startup_program is \ None else startup_program # marked the variable which has been quantized and dequantized. dequanted_vars = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] grad_op_types = ['%s_grad' % (type) for type in _QUANTIZABLE_OP_TYPES] params = [p.name for p in program.global_block().iter_parameters()] def _transpile_forward(block, op): idx = block.ops.index(op) block_id = block.idx # insert quant op and dequant op for name in op.input_arg_names: #if share input between ops if name in dequanted_vars[block_id]: dequant_var = dequanted_vars[block_id][name] else: var = block.var(name) quant_bits = self.weight_bits if var.name in params \ else self.activation_bits quant_type = self.weight_quantize_type if var.name \ in params else self.activation_quantize_type quant_var, scale_var = self._insert_quant_op( block, idx, var, quant_bits, quant_type) dequant_var = self._insert_dequant_op( block, idx + 1, quant_var, scale_var, quant_bits) dequanted_vars[block_id][name] = dequant_var # rename the forward op inputs op._rename_input(name, dequant_var.name) def _transpile_backward(block, op): block_id = block.idx no_dequanted_input_vars = True for name in op.input_arg_names: if name in dequanted_vars[block_id]: dequant_var = dequanted_vars[block_id][name] op._rename_input(name, dequant_var.name) no_dequanted_input_vars = False if no_dequanted_input_vars: raise ValueError("There is no dequanted inputs for op %s." % (op.type)) with program_guard(program, startup_program): self._create_global_step() for block in program.blocks: ops = list(block.ops) block_id = block.idx for op in ops: # rewrite the forward ProgramDes if op.type in _QUANTIZABLE_OP_TYPES: _transpile_forward(block, op) # rename the backward op inputs if op.type in grad_op_types: _transpile_backward(block, op) def _create_global_step(self): if self.weight_quantize_type == 'range_abs_max' or \ self.activation_quantize_type == 'range_abs_max': self.global_step = autoincreased_step_counter() def freeze_program(self, program, place, scope=None): """Freeze input training program for inference. Args: program (Program): the input program to be transpile. """ self.is_test = True scope = global_scope() if scope is None else scope program = default_main_program() if program is None else program persistable_vars = [ v.name for v in filter(lambda var: var.persistable, program.list_vars()) ] op_in_rename_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] op_out_rename_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] var_scale_map = [ collections.OrderedDict() for _ in range(len(program.blocks)) ] def _remove_fake_quant_and_dequant_op(block, op): idx = block.ops.index(op) block_id = block.idx k = op.output('Out')[0] v = op.input('X')[0] if v not in op_in_rename_map[block_id]: op_in_rename_map[block_id][k] = v else: op_in_rename_map[block_id][k] = op_in_rename_map[block_id][v] block._remove_op(idx) def _insert_post_dequant_op(block, op): idx = block.ops.index(op) block_id = block.idx max_range = None scale_var = None for name in op.input_arg_names: #rename input name of the op to the input name of last op which has be removed if name in op_in_rename_map[block_id]: op._rename_input(name, op_in_rename_map[block_id][name]) scale_v = var_scale_map[block_id][_original_var_name(name)] if _original_var_name(name) in persistable_vars: param_range = (1 << (self.weight_bits - 1)) - 1 act_range = (1 << (self.activation_bits - 1)) - 1 assert _is_float(scale_v) max_range = param_range * act_range / scale_v else: assert isinstance(scale_v, Variable) scale_var = scale_v if len(op.output_arg_names) != 1: raise ValueError("Only support one output, but op %s has" " more than one output." % (op.type)) out_var = block.var(op.output_arg_names[0]) dequant_var = block.create_var(name=_dequantized_var_name( out_var.name), type=out_var.type, shape=out_var.shape, dtype=out_var.dtype) # insert fake_dequantize_op dequant_op = block._insert_op( idx + 1, type="fake_dequantize_max_abs", attrs={'max_range': float(max_range)}, inputs={ "X": out_var, 'Scale': scale_var }, outputs={"Out": dequant_var}) op_out_rename_map[block_id][out_var.name] = dequant_var.name return dequant_var def _load_var(name): return np.array(scope.find_var(name).get_tensor()) def _restore_var(name, arr): t = scope.find_var(name).get_tensor() t.set(arr, place) for block in program.blocks: ops = list(block.ops) block_id = block.idx for op in ops: op_type = op.type # insert dequant_op after fc/conv, need to rename # input of the followed ops(of fc/conv) to the dquant_op for name in op.input_arg_names: if name in op_out_rename_map[block_id]: op._rename_input(name, op_out_rename_map[block_id][name]) if op_type in self.fake_quant_op_types: in_arg_name = op.input('X')[0] if in_arg_name in persistable_vars: if self.weight_quantize_type == 'abs_max': param = _load_var(in_arg_name) scale_v = np.max(np.abs(param)) else: scale_v = _load_var(op.output('OutScale')[0]) var_scale_map[block_id][in_arg_name] = scale_v else: scale_v = block.var(op.output('OutScale')[0]) var_scale_map[block_id][in_arg_name] = scale_v if in_arg_name in persistable_vars: _remove_fake_quant_and_dequant_op(block, op) # quantize weight and restore param_t = _load_var(in_arg_name) param_q_t = quant(param_t, scale_v, self.weight_bits) _restore_var(in_arg_name, param_q_t) if op_type in self.fake_dequant_op_types: _remove_fake_quant_and_dequant_op(block, op) if op_type in _QUANTIZABLE_OP_TYPES: dequant_var = _insert_post_dequant_op(block, op) # remove the unused var in ProgramDesc self._remove_unused_var(program) #program = program.clone() def convert_to_int8(self, program, place, scope=None): scope = global_scope() if scope is None else scope program = default_main_program() if program is None else program def _load_var(name): return np.array(scope.find_var(name).get_tensor()) global_block = program.global_block() def convert_to_int8(var): int8_var_name = var.name + ".int8" int8_var = global_block.create_parameter( name=int8_var_name.encode('ascii'), type=var.type, dtype=core.VarDesc.VarType.INT8, shape=var.shape) tensor = _load_var(var.name) scope.var(int8_var_name) int8_tensor = scope.find_var(int8_var_name).get_tensor() int8_tensor.set(tensor.astype(np.int8), place) return int8_var input_map = {} for block in program.blocks: for op in list(block.ops): if op.type in _QUANTIZABLE_OP_TYPES: for name in op.input_arg_names: var = block.var(name) if var.persistable: if name not in input_map: int8_var = convert_to_int8(var) input_map[name] = int8_var.name op._rename_input(name, input_map[name]) self._remove_unused_var(program) def _remove_unused_var(self, program): all_remove_vars = [] for block in program.blocks: args = [] for op in block.ops: args += op.input_arg_names args += op.output_arg_names args = list(set(args)) #vals of all left ops var_names = block.vars.keys() # all vals sub_block_remove_vars = [] for var in var_names: if var not in args: sub_block_remove_vars.append(var) all_remove_vars.append(sub_block_remove_vars) remove_vars = [list(set(v)) for v in all_remove_vars] for i, block in enumerate(program.blocks): for v in remove_vars[i]: block._remove_var(v) def _insert_quant_abs_max_op(self, block, idx, var, quant_bits): """Insert fake_quantize_abs_max op. """ quant_var = block.create_var(name=_quantized_var_name(var.name), type=var.type, shape=var.shape, dtype=var.dtype) scale = block.create_var(name=_quantized_scale_name(var.name), type=var.type, shape=var.shape, dtype=var.dtype) quant_op = block._insert_op(idx, type='fake_quantize_abs_max', attrs={'bit_length': quant_bits}, inputs={'X': var}, outputs={ 'Out': quant_var, 'OutScale': scale }) return quant_var, scale def _insert_quant_range_abs_max_op(self, block, idx, var, quant_bits): """Insert fake_quantize_range_abs_max """ quant_var = block.create_var(name=_quantized_var_name(var.name), type=var.type, shape=var.shape, dtype=var.dtype) scale = self.helper.create_parameter(attr=ParamAttr( name=_quantized_scale_name(var.name), initializer=Constant(0.001), trainable=False), shape=[1], dtype=var.dtype) scale.stop_gradient = True ins = {'X': var, 'InScale': scale} outs = {'Out': quant_var, 'OutScale': scale} if not self.is_test: # A global step counter variable with type int64 scales = self.helper.create_global_variable( name=unique_name.generate('scales'), persistable=True, dtype=var.dtype, shape=[self.window_size]) self.helper.set_variable_initializer(scales, initializer=Constant(value=0)) ins['Iter'] = self.global_step outs['OutScales'] = scales attrs = { 'window_size': self.window_size, 'bit_length': quant_bits, 'is_test': self.is_test } quant_op = block._insert_op(idx, type='fake_quantize_range_abs_max', attrs=attrs, inputs=ins, outputs=outs) return quant_var, scale def _insert_quant_moving_average_abs_max_op(self, block, idx, var, quant_bits): """Insert fake_quantize_moving_average_abs_max """ quant_var = block.create_var(name=_quantized_var_name(var.name), type=var.type, shape=var.shape, dtype=var.dtype) state = self.helper.create_global_variable( name=unique_name.generate('state'), persistable=True, dtype=var.dtype, shape=[1]) self.helper.set_variable_initializer(state, initializer=Constant(value=1)) accum = self.helper.create_global_variable( name=unique_name.generate('accum'), persistable=True, dtype=var.dtype, shape=[1]) self.helper.set_variable_initializer(accum, initializer=Constant(value=1)) scale = self.helper.create_parameter(attr=ParamAttr( name=_quantized_scale_name(var.name), initializer=Constant(0.001), trainable=False), shape=[1], dtype=var.dtype) scale.stop_gradient = True ins = {'X': var, 'InScale': scale} outs = {'Out': quant_var, 'OutScale': scale} if not self.is_test: ins['InState'] = state ins['InAccum'] = accum outs['OutState'] = state outs['OutAccum'] = accum attrs = { 'bit_length': quant_bits, 'moving_rate': self.moving_rate, 'is_test': self.is_test } quant_op = block._insert_op( idx, type='fake_quantize_moving_average_abs_max', attrs=attrs, inputs=ins, outputs=outs) return quant_var, scale def _insert_quant_op(self, block, idx, var, quant_bits, quant_type): """ Insert fake_quantize_op """ if quant_type == 'abs_max': return self._insert_quant_abs_max_op(block, idx, var, quant_bits) elif quant_type == 'range_abs_max': return self._insert_quant_range_abs_max_op(block, idx, var, quant_bits) elif quant_type == 'moving_average_abs_max': return self._insert_quant_moving_average_abs_max_op( block, idx, var, quant_bits) def _insert_dequant_op(self, block, idx, var, scale, quant_bits): """ Insert fake_quantize_op """ dequant_var = block.create_var(name=_dequantized_var_name(var.name), type=var.type, shape=var.shape, dtype=var.dtype) # insert fake_dequantize_op max_range = (1 << (quant_bits - 1)) - 1 dequant_op = block._insert_op(idx, type="fake_dequantize_max_abs", attrs={'max_range': float(max_range)}, inputs={ "X": var, 'Scale': scale }, outputs={"Out": dequant_var}) return dequant_var
def ctr_metric_bundle(input, label): """ ctr related metric layer This function help compute the ctr related metrics: RMSE, MAE, predicted_ctr, q_value. To compute the final values of these metrics, we should do following computations using total instance number: MAE = local_abserr / instance number RMSE = sqrt(local_sqrerr / instance number) predicted_ctr = local_prob / instance number q = local_q / instance number Note that if you are doing distribute job, you should all reduce these metrics and instance number first Args: input(Variable): A floating-point 2D Variable, values are in the range [0, 1]. Each row is sorted in descending order. This input should be the output of topk. Typically, this Variable indicates the probability of each label. label(Variable): A 2D int Variable indicating the label of the training data. The height is batch size and width is always 1. Returns: local_sqrerr(Variable): Local sum of squared error local_abserr(Variable): Local sum of abs error local_prob(Variable): Local sum of predicted ctr local_q(Variable): Local sum of q value Examples: .. code-block:: python import paddle.fluid as fluid data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") label = fluid.layers.data(name="label", shape=[1], dtype="int32") predict = fluid.layers.sigmoid(fluid.layers.fc(input=data, size=1)) auc_out = fluid.contrib.layers.ctr_metric_bundle(input=predict, label=label) """ assert input.shape == label.shape helper = LayerHelper("ctr_metric_bundle", **locals()) local_abserr = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) local_sqrerr = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) local_prob = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) local_q = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) local_pos_num = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) local_ins_num = helper.create_global_variable(persistable=True, dtype='float32', shape=[1]) tmp_res_elesub = helper.create_global_variable(persistable=False, dtype='float32', shape=[-1]) tmp_res_sigmoid = helper.create_global_variable(persistable=False, dtype='float32', shape=[-1]) tmp_ones = helper.create_global_variable(persistable=False, dtype='float32', shape=[-1]) batch_prob = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) batch_abserr = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) batch_sqrerr = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) batch_q = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) batch_pos_num = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) batch_ins_num = helper.create_global_variable(persistable=False, dtype='float32', shape=[1]) for var in [ local_abserr, batch_abserr, local_sqrerr, batch_sqrerr, local_prob, batch_prob, local_q, batch_q, batch_pos_num, batch_ins_num, local_pos_num, local_ins_num ]: helper.set_variable_initializer(var, Constant(value=0.0, force_cpu=True)) helper.append_op(type="elementwise_sub", inputs={ "X": [input], "Y": [label] }, outputs={"Out": [tmp_res_elesub]}) helper.append_op(type="squared_l2_norm", inputs={"X": [tmp_res_elesub]}, outputs={"Out": [batch_sqrerr]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_sqrerr], "Y": [local_sqrerr] }, outputs={"Out": [local_sqrerr]}) helper.append_op(type="l1_norm", inputs={"X": [tmp_res_elesub]}, outputs={"Out": [batch_abserr]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_abserr], "Y": [local_abserr] }, outputs={"Out": [local_abserr]}) helper.append_op(type="reduce_sum", inputs={"X": [input]}, outputs={"Out": [batch_prob]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_prob], "Y": [local_prob] }, outputs={"Out": [local_prob]}) helper.append_op(type="sigmoid", inputs={"X": [input]}, outputs={"Out": [tmp_res_sigmoid]}) helper.append_op(type="reduce_sum", inputs={"X": [tmp_res_sigmoid]}, outputs={"Out": [batch_q]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_q], "Y": [local_q] }, outputs={"Out": [local_q]}) helper.append_op(type="reduce_sum", inputs={"X": [label]}, outputs={"Out": [batch_pos_num]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_pos_num], "Y": [local_pos_num] }, outputs={"Out": [local_pos_num]}) helper.append_op(type='fill_constant_batch_size_like', inputs={"Input": label}, outputs={'Out': [tmp_ones]}, attrs={ 'shape': [-1, 1], 'dtype': tmp_ones.dtype, 'value': float(1.0), }) helper.append_op(type="reduce_sum", inputs={"X": [tmp_ones]}, outputs={"Out": [batch_ins_num]}) helper.append_op(type="elementwise_add", inputs={ "X": [batch_ins_num], "Y": [local_ins_num] }, outputs={"Out": [local_ins_num]}) return local_sqrerr, local_abserr, local_prob, local_q, local_pos_num, local_ins_num