def net(self, input, is_infer=False): """ network""" text = input[0] pos_tag = input[1] neg_tag = input[2] text_emb = fluid.embedding(input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb") text_emb = fluid.layers.squeeze(input=text_emb, axes=[1]) pos_tag_emb = fluid.embedding(input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1]) neg_tag_emb = fluid.embedding(input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb") neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1]) conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=self.hid_dim, filter_size=self.win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=self.emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=self.neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) self._cost = avg_cost if is_infer: self._infer_results["correct"] = correct self._infer_results["cos_pos"] = cos_pos else: self._metrics["correct"] = correct self._metrics["cos_pos"] = cos_pos
def network(vocab_text_size, vocab_tag_size, emb_dim=10, hid_dim=1000, win_size=5, margin=0.1, neg_size=5): """ network definition """ text = io.data(name="text", shape=[1], lod_level=1, dtype='int64') pos_tag = io.data(name="pos_tag", shape=[1], lod_level=1, dtype='int64') neg_tag = io.data(name="neg_tag", shape=[1], lod_level=1, dtype='int64') text_emb = nn.embedding(input=text, size=[vocab_text_size, emb_dim], param_attr="text_emb") pos_tag_emb = nn.embedding(input=pos_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") neg_tag_emb = nn.embedding(input=neg_tag, size=[vocab_tag_size, emb_dim], param_attr="tag_emb") conv_1d = fluid.nets.sequence_conv_pool(input=text_emb, num_filters=hid_dim, filter_size=win_size, act="tanh", pool_type="max", param_attr="cnn") text_hid = fluid.layers.fc(input=conv_1d, size=emb_dim, param_attr="text_hid") cos_pos = nn.cos_sim(pos_tag_emb, text_hid) mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb) mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid) cos_neg_all = fluid.layers.sequence_reshape(input=mul_cos_neg, new_dim=neg_size) #choose max negtive cosine cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=margin, dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') correct = nn.reduce_sum(less) return avg_cost, correct, cos_pos
def forward(self, pos, neg): loss_part1 = fluid.layers.elementwise_sub( tensor.fill_constant_batch_size_like(input=pos, shape=[-1, 1], value=self.margin, dtype='float32'), pos) loss_part2 = fluid.layers.elementwise_add(loss_part1, neg) loss_part3 = fluid.layers.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) return loss_part3
def _select_table(condition, inputs, table_enc, table_len, table_mask_by_col, ptr_net, grammar, name=None): """select_table. Args: condition (TYPE): NULL inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1 table_enc (TYPE): NULL table_len (TYPE): NULL ptr_net (TYPE): NULL grammar (TYPE): NULL name (str): table_mask_by_col (Variable): Returns: TODO Raises: NULL """ condition = layers.cast(condition, dtype='float32') table_mask_by_len = layers.sequence_mask(table_len, maxlen=grammar.MAX_TABLE, dtype='float32') table_mask_by_len = layers.reshape(table_mask_by_len, [-1, grammar.MAX_TABLE]) table_mask_by_col = layers.reshape(table_mask_by_col, [-1, grammar.MAX_TABLE]) table_mask = layers.elementwise_mul(table_mask_by_len, table_mask_by_col) predicts = ptr_net.forward(inputs, table_enc, table_mask) zeros_l = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.grammar_size], dtype='float32', value=-INF) zeros_r = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.MAX_COLUMN + grammar.MAX_VALUE], dtype='float32', value=-INF) final_output = tensor.concat([zeros_l, predicts, zeros_r], axis=-1) true_final_output = layers.elementwise_mul(final_output, condition, axis=0) return true_final_output
def _select_column(condition, inputs, column_enc, column_len, ptr_net, grammar, column2table_mask, name=None): """select_column. Args: condition (TYPE): NULL inputs (Variable): shape = [batch_size, max_len, hidden_size]. infer 阶段 max_len 恒为1 column_enc (TYPE): NULL column_len (TYPE): NULL ptr_net (TYPE): NULL grammar (TYPE): NULL column2table_mask (Variable): name (str): Returns: TODO Raises: NULL """ condition = layers.cast(condition, dtype='float32') column_mask = layers.sequence_mask(column_len, maxlen=grammar.MAX_COLUMN, dtype='float32') column_mask = layers.reshape(column_mask, [-1, grammar.MAX_COLUMN]) predicts = ptr_net.forward(inputs, column_enc, column_mask) pred_ids = layers.argmax(predicts, axis=-1) valid_table_mask = nn_utils.batch_gather(column2table_mask, pred_ids) ## concat zeros to vocab size zeros_l = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.grammar_size + grammar.MAX_TABLE], dtype='float32', value=-INF) zeros_r = tensor.fill_constant_batch_size_like( predicts, shape=[-1, grammar.MAX_VALUE], dtype='float32', value=-INF) final_output = tensor.concat([zeros_l, predicts, zeros_r], axis=-1) true_final_output = layers.elementwise_mul(final_output, condition, axis=0) true_valid_table_mask = layers.elementwise_mul(valid_table_mask, condition, axis=0) return true_final_output, true_valid_table_mask
def entropy(self): r"""Shannon entropy in nats. The entropy is .. math:: entropy(\sigma) = 0.5 \\log (2 \pi e \sigma^2) In the above equation: * :math:`scale = \sigma`: is the std. Returns: Tensor: Shannon entropy of normal distribution.The data type is float32. """ name = self.name + '_entropy' batch_shape = list((self.loc + self.scale).shape) zero_tmp = tensor.fill_constant_batch_size_like( self.loc + self.scale, batch_shape, self.dtype, 0.) return elementwise_add(0.5 + zero_tmp, 0.5 * math.log(2 * math.pi) + nn.log( (self.scale + zero_tmp)), name=name)
def _check_finished(decoder, next_inputs, finished, outputs_array): """check finished instance by next_inputs.action, and update finished tag and write END to outputs Args: decoder (TYPE): NULL next_inputs (TYPE): NULL finished (TYPE): NULL outputs_array (TYPE): NULL Returns: TODO Raises: NULL """ act_stop = tensor.fill_constant_batch_size_like( next_inputs.action, shape=next_inputs.action.shape, value=decoder._grammar.ACTION_STOP, dtype='int64') new_finished = layers.logical_and( layers.equal(next_inputs.action, act_stop), layers.logical_not(finished)) end_token_id = tensor.fill_constant_batch_size_like( outputs_array.data, shape=[-1], value=decoder._grammar.END, dtype=outputs_array.data.dtype) out_data_tmp, out_pos_tmp = data_structure.Array.push(outputs_array, end_token_id, in_place=False) new_data, new_pos = nn_utils.ifelse( new_finished, [out_data_tmp, out_pos_tmp], [outputs_array.data, outputs_array.pos]) layers.assign(new_data, outputs_array.data) layers.assign(new_pos, outputs_array.pos) layers.logical_or(finished, new_finished, out=finished)
def sample(self, shape, seed=0): """Generate samples of the specified shape. Args: shape (list): 1D `int32`. Shape of the generated samples. seed (int): Python integer number. Returns: Tensor: A tensor with prepended dimensions shape.The data type is float32. """ if not _non_static_mode(): check_type(shape, 'shape', (list), 'sample') check_type(seed, 'seed', (int), 'sample') name = self.name + '_sample' batch_shape = list((self.low + self.high).shape) if self.batch_size_unknown: output_shape = shape + batch_shape zero_tmp = tensor.fill_constant_batch_size_like( self.low + self.high, batch_shape + shape, self.dtype, 0.) uniform_random_tmp = nn.uniform_random_batch_size_like( zero_tmp, zero_tmp.shape, dtype=self.dtype, min=0., max=1., seed=seed) zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) uniform_random_tmp_reshape = nn.reshape(uniform_random_tmp, output_shape) output = uniform_random_tmp_reshape * (zero_tmp_reshape + self.high - self.low) output = elementwise_add(output, self.low, name=name) return output else: output_shape = shape + batch_shape output = nn.uniform_random( output_shape, dtype=self.dtype, min=0., max=1., seed=seed) * (tensor.zeros(output_shape, dtype=self.dtype) + (self.high - self.low)) output = elementwise_add(output, self.low, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) else: return output
def sample(self, shape, seed=0): """Generate samples of the specified shape. Args: shape (list): 1D `int32`. Shape of the generated samples. seed (int): Python integer number. Returns: Tensor: A tensor with prepended dimensions shape.The data type is float32. """ if not _non_static_mode(): check_type(shape, 'shape', (list), 'sample') check_type(seed, 'seed', (int), 'sample') batch_shape = list((self.loc + self.scale).shape) name = self.name + '_sample' if self.batch_size_unknown: output_shape = shape + batch_shape zero_tmp = tensor.fill_constant_batch_size_like( self.loc + self.scale, batch_shape + shape, self.dtype, 0.) zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) zero_tmp_shape = nn.shape(zero_tmp_reshape) normal_random_tmp = nn.gaussian_random(zero_tmp_shape, mean=0., std=1., seed=seed, dtype=self.dtype) output = normal_random_tmp * (zero_tmp_reshape + self.scale) output = elementwise_add(output, self.loc, name=name) return output else: output_shape = shape + batch_shape output = nn.gaussian_random(output_shape, mean=0., std=1., seed=seed, dtype=self.dtype) * \ (tensor.zeros(output_shape, dtype=self.dtype) + self.scale) output = elementwise_add(output, self.loc, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) else: return output
def _select_value(condition, inputs, value_enc, value_len, ptr_net, grammar, name=None): """select_value. Args: condition (TYPE): NULL inputs (TYPE): NULL value_enc (TYPE): NULL value_len (TYPE): NULL ptr_net (TYPE): NULL grammar (TYPE): NULL Returns: TODO Raises: NULL """ condition = layers.cast(condition, dtype='float32') value_mask = layers.sequence_mask(value_len, maxlen=grammar.MAX_VALUE, dtype='float32') value_mask = layers.reshape(value_mask, [-1, grammar.MAX_VALUE]) predicts = ptr_net.forward(inputs, value_enc, value_mask) ## concat zeros to vocab size zeros_l = tensor.fill_constant_batch_size_like( predicts, shape=[ -1, grammar.grammar_size + grammar.MAX_TABLE + grammar.MAX_COLUMN ], dtype='float32', value=-INF) final_output = tensor.concat([zeros_l, predicts], axis=-1) true_final_output = layers.elementwise_mul(final_output, condition, axis=0) return true_final_output
def net(self, input, is_infer=False): factory = SimpleEncoderFactory() self.q_slots = self._sparse_data_var[0:1] self.query_encoders = [ factory.create(self.query_encoder, self.query_encode_dim) for _ in self.q_slots ] q_embs = [ fluid.embedding(input=query, size=self.emb_shape, param_attr="emb") for query in self.q_slots ] # encode each embedding field with encoder q_encodes = [ self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) ] # concat multi view for query, pos_title, neg_title q_concat = fluid.layers.concat(q_encodes) # projection of hidden layer q_hid = fluid.layers.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') self.pt_slots = self._sparse_data_var[1:2] self.title_encoders = [ factory.create(self.title_encoder, self.title_encode_dim) ] pt_embs = [ fluid.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in self.pt_slots ] pt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) ] pt_concat = fluid.layers.concat(pt_encodes) pt_hid = fluid.layers.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) if is_infer: self._infer_results['query_pt_sim'] = cos_pos return self.nt_slots = self._sparse_data_var[2:3] nt_embs = [ fluid.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in self.nt_slots ] nt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs) ] nt_concat = fluid.layers.concat(nt_encodes) nt_hid = fluid.layers.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) # pairwise hinge_loss loss_part1 = fluid.layers.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg) loss_part3 = fluid.layers.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) self._cost = fluid.layers.mean(loss_part3) self.acc = self.get_acc(cos_neg, cos_pos) self._metrics["loss"] = self._cost self._metrics["acc"] = self.acc
def net(self): q_embs = [ fluid.embedding(input=query, size=self.emb_shape, param_attr="emb") for query in self.q_slots ] pt_embs = [ fluid.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in self.pt_slots ] nt_embs = [ fluid.embedding(input=title, size=self.emb_shape, param_attr="emb") for title in self.nt_slots ] # encode each embedding field with encoder q_encodes = [ self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) ] pt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) ] nt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs) ] # concat multi view for query, pos_title, neg_title q_concat = fluid.layers.concat(q_encodes) pt_concat = fluid.layers.concat(pt_encodes) nt_concat = fluid.layers.concat(nt_encodes) # projection of hidden layer q_hid = fluid.layers.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') pt_hid = fluid.layers.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') nt_hid = fluid.layers.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) # pairwise hinge_loss loss_part1 = fluid.layers.elementwise_sub( tensor.fill_constant_batch_size_like(input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg) loss_part3 = fluid.layers.elementwise_max( tensor.fill_constant_batch_size_like(input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) self.avg_cost = fluid.layers.mean(loss_part3) self.acc = self.get_acc(cos_neg, cos_pos)
def decode_with_grammar(decoder, inits, decode_vocab, max_step_num, **kwargs): """A modification of paddle.fluid.layers.dynamic_decode(...). Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned Tensor indicating finished status contains all True values or the number of decoding step reachs to :attr:`max_step_num`. :code:`decoder.initialize()` would be called once before the decoding loop. If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` would be called once after the decoding loop. Args: decoder(Decoder): An instance of `Decoder`. inits(tuple): Argument passed to `decoder.initialize`. decode_vocab(DecoderDynamicVocab): namedtuple(table table_len column column_len value value_len) max_step_num(int): The maximum number of steps. **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. Returns: tuple: A tuple( :code:`(final_outputs, final_states)` ) including the final \ outputs and states, both are Tensor or nested structure of Tensor. \ `final_outputs` has the same structure and data types as \ :code:`decoder.output_dtype` , and each Tenser in `final_outputs` \ is the stacked of all decoding steps' outputs, which might be revised \ by :code:`decoder.finalize` . `final_states` is the counterpart \ at last time step of initial states returned by :code:`decoder.initialize` , \ thus has the same structure with it and has tensors with same shapes \ and data types. """ step_cnt = tensor.fill_constant(shape=[1], dtype="int64", value=1) max_step_num_tensor = tensor.fill_constant(shape=[1], dtype="int64", value=max_step_num - 2) # shape = [batch_size, beam_size, ...] initial_inputs, initial_states, initial_finished = decoder.initialize( inits, decode_vocab) global_inputs, global_states, global_finished = (initial_inputs, initial_states, initial_finished) inputs = initial_inputs states = initial_states # 保存输出结果 outputs_arr_data = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num], dtype=decoder.output_dtype.predicted_ids, value=0) outputs_arr_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) outputs_array = data_structure.ArrayData( decoder.merge_batch_beams(outputs_arr_data), decoder.merge_batch_beams(outputs_arr_pos)) sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64") # 按语法解码的相关约束数据结构 grammar_stack_dat = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, max_step_num * STACK_EXPAND_TIMES], dtype='int64', value=0) grammar_stack_pos = tensor.fill_constant_batch_size_like( inputs.input, shape=[-1, decoder.beam_size, 1], dtype='int64', value=0) grammar_stack = data_structure.StackData( decoder.merge_batch_beams(grammar_stack_dat), decoder.merge_batch_beams(grammar_stack_pos)) ############ 循环解码,直到全部为 finish 状态 ############ # finish 的判断:通过 global_finished/next_finished && max_step_num 判断 cond = layers.logical_not((layers.reduce_all(initial_finished))) while_op = layers.While(cond) with while_op.block(): # step_outputs --> OutputWrapper # next_states --> StateWrapper # next_inputs --> DecoderInputsWrapper step_outputs, next_states, next_inputs = decoder.step( inputs, states, **kwargs) predicted_ids = step_outputs.predicted_ids _save_predict_output(outputs_array, predicted_ids, next_states.finished) pred_gmr_type = decoder.grammar_type(predicted_ids) cond_type_leaf = layers.equal(pred_gmr_type, decoder.GMR_TYPE.LEAF) cond_type_midd = layers.equal(pred_gmr_type, decoder.GMR_TYPE.MID) _process_type_leaf(cond_type_leaf, decoder, grammar_stack, next_inputs, next_states.finished) _process_type_midd(cond_type_midd, decoder, grammar_stack, next_inputs, predicted_ids) ##next_sequence_lengths = layers.elementwise_add(sequence_lengths, ## tensor.cast(layers.logical_not(global_finished), sequence_lengths.dtype)) _check_finished(decoder, next_inputs, next_states.finished, outputs_array) layers.utils.map_structure(tensor.assign, next_inputs, global_inputs) layers.utils.map_structure(tensor.assign, next_states, global_states) tensor.assign(next_states.finished, global_finished) ##tensor.assign(next_sequence_lengths, sequence_lengths) # 更新循环条件 layers.increment(x=step_cnt, value=1.0, in_place=True) layers.logical_and( layers.logical_not(layers.reduce_all(next_states.finished)), layers.less_equal(step_cnt, max_step_num_tensor), cond) final_outputs = outputs_array.data final_states = global_states final_outputs, final_states = decoder.finalize(final_outputs, global_states, sequence_lengths) return final_outputs, final_states
def train_net(self): # input fields for query, pos_title, neg_title q_slots = [ fluid.data( name="q%d" % i, shape=[None, 1], lod_level=1, dtype='int64') for i in range(len(self.query_encoders)) ] pt_slots = [ fluid.data( name="pt%d" % i, shape=[None, 1], lod_level=1, dtype='int64') for i in range(len(self.title_encoders)) ] nt_slots = [ fluid.data( name="nt%d" % i, shape=[None, 1], lod_level=1, dtype='int64') for i in range(len(self.title_encoders)) ] # lookup embedding for each slot q_embs = [ fluid.embedding( input=query, size=self.emb_shape, param_attr="emb") for query in q_slots ] pt_embs = [ fluid.embedding( input=title, size=self.emb_shape, param_attr="emb") for title in pt_slots ] nt_embs = [ fluid.embedding( input=title, size=self.emb_shape, param_attr="emb") for title in nt_slots ] # encode each embedding field with encoder q_encodes = [ self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) ] pt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) ] nt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs) ] # concat multi view for query, pos_title, neg_title q_concat = fluid.layers.concat(q_encodes) pt_concat = fluid.layers.concat(pt_encodes) nt_concat = fluid.layers.concat(nt_encodes) # projection of hidden layer q_hid = fluid.layers.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b') pt_hid = fluid.layers.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') nt_hid = fluid.layers.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') # cosine of hidden layers cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) # pairwise hinge_loss loss_part1 = fluid.layers.elementwise_sub( tensor.fill_constant_batch_size_like( input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'), cos_pos) loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg) loss_part3 = fluid.layers.elementwise_max( tensor.fill_constant_batch_size_like( input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) avg_cost = fluid.layers.mean(loss_part3) correct = self.get_correct(cos_neg, cos_pos) return q_slots + pt_slots + nt_slots, avg_cost, correct