def set_program_and_run(self, main_program, case_num): with fluid.program_guard(main_program): x = [ fluid.data(name='x0', shape=self.shape, dtype="float32"), fluid.data(name='x1', shape=self.shape, dtype="float32"), fluid.data(name='x2', shape=self.shape, dtype="float32") ] for each_x in x: each_x.stop_gradient = False arr = layers.create_array(dtype="float32") for i in range(3): idx = layers.array_length(arr) arr = layers.array_write(x=x[i], i=idx, array=arr) if case_num == 1: self.sliced_arr = output = arr[0] elif case_num == 2: end = fluid.layers.array_length( arr) - 1 # dtype of end is int64 self.sliced_arr = slice_arr = arr[self.start:end] output, _ = fluid.layers.tensor_array_to_tensor(slice_arr, axis=self.axis, use_stack=True) elif case_num == 3: value_int64 = fluid.layers.fill_constant([1], "int64", 2147483648) self.sliced_arr = slice_arr = arr[self.start:value_int64] output, _ = fluid.layers.tensor_array_to_tensor(slice_arr, axis=self.axis, use_stack=True) loss = fluid.layers.reduce_sum(output) fluid.backward.append_backward(loss) g_vars = list( map(main_program.global_block().var, [each_x.name + "@GRAD" for each_x in x])) self.out, self.g_x0, self.g_x1, self.g_x2 = \ self.exe.run(main_program, feed = {'x0': self.data, 'x1': self.data, 'x2': self.data}, fetch_list=[output] + g_vars)
def decoder_decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data(name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding(input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=50) selected_ids, selected_scores = pd.beam_search(pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores
def decoder_decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the recursive_sequence_lengths of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=beam_size) # calculate accumulated scores after topk to reduce computation cost accu_scores = pd.elementwise_add( x=pd.log(topk_scores), y=pd.reshape( pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( pre_ids, pre_score, topk_indices, accu_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) # update the break condition: up to the max length or all candidates of # source sentences have ended. length_cond = pd.less_than(x=counter, y=array_len) finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) # return init_ids, init_scores return translation_ids, translation_scores
def true_fn(): null_array = create_array("float32") return null_array
def call(self, global_img_feat, p_img_feat, embedding_fn, words=None): # 图片特征 img_feat = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2, act='tanh') # [batch, k, hid] img_feat_emb = layers.fc(p_img_feat, self.hid_size, num_flatten_dims=2) if self.mode == 'eval': word = layers.fill_constant_batch_size_like(global_img_feat, [-1], dtype='int64', value=config.data['start_idx']) else: words = layers.transpose(words, [1, 0]) # [seq, batch] words.stop_gradient = True # lstm 初始化 hid, cell = create_zero_state(global_img_feat), create_zero_state(global_img_feat) # While loop 参数初始化 mx = decoder_config['sentence_length'] - 1 if self.mode == 'train' else decoder_config['infer_max_length'] if self.mode == 'eval': mx = decoder_config['infer_max_length'] while_op_output = layers.create_array('int64') else: while_op_output = layers.create_array('float32') max_step = layers.fill_constant(shape=[1], dtype='int64', value=mx) step = layers.fill_constant(shape=[1], dtype='int64', value=0) cond = layers.less_than(step, max_step) while_op = layers.While(cond) with while_op.block(): if self.mode == 'train': st = layers.cast(step, 'int32') word = layers.slice(words, axes=[0], starts=st, ends=st + 1) word = layers.squeeze(word, [0]) word.stop_gradient = True word_emb = embedding_fn(word) # 这里可能用+效果更好? xt = layers.concat([word_emb, global_img_feat], axis=-1) # [batch, feat] h, c = layers.lstm_unit(xt, hid, cell, param_attr=fluid.ParamAttr('lstm_w'), bias_attr=fluid.ParamAttr('lstm_b')) p_word_emb = layers.fc(xt, size=self.hid_size) p_hidden = layers.fc(hid, size=self.hid_size) sentinel_gate = layers.sigmoid(p_word_emb + p_hidden) # [batch, hidden] sentinel = layers.elementwise_mul(sentinel_gate, layers.tanh(c)) # [batch, hidden] layers.assign(h, hid) layers.assign(c, cell) k = layers.shape(p_img_feat)[1] p_hid = layers.fc(h, self.hid_size, act='tanh') # attention 部分 # alpha hid_emb = layers.fc(p_hid, self.hid_size) # [batch, hidden] exp_hid_emb = layers.expand(layers.unsqueeze(hid_emb, 1), [1, k + 1, 1]) # [batch, k+1, hidden] sentinel_emb = layers.unsqueeze(layers.fc(sentinel, self.hid_size), axes=1) # [batch, 1, hidden] feat_emb = layers.concat([img_feat_emb, sentinel_emb], axis=1) # [batch, k+1, hidden] z = layers.tanh(feat_emb + exp_hid_emb) # [batch, k+1, 1] alpha = layers.fc(z, size=1, num_flatten_dims=2, act='softmax') # [batch, k+1, 1] # context vector context = layers.concat([img_feat, layers.unsqueeze(sentinel, axes=1)], axis=1) # [batch, k+1, hidden] context = layers.elementwise_mul(context, alpha, axis=0) context = layers.reduce_mean(context, dim=1) # [batch, hidden] out = layers.fc(context + p_hid, self.hid_size, act='tanh') word_pred = weight_tying_fc(out) # [batch, vocab] if self.mode == 'eval': next_word = layers.argmax(word_pred, axis=-1) layers.assign(next_word, word) next_word = layers.cast(next_word, 'float32') layers.array_write(next_word, step, array=while_op_output) else: layers.array_write(word_pred, step, array=while_op_output) layers.increment(step) layers.less_than(step, max_step, cond=cond) if self.mode == 'train': output_time_major, _ = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True) output = layers.transpose(output_time_major, [1, 0, 2]) else: output_time_major = layers.tensor_array_to_tensor(while_op_output, axis=0, use_stack=True)[0] output = layers.transpose(output_time_major, [1, 0]) return output
def test_hybrid_parallel_inference_helper_mp1pp2(self): nranks = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) rank = int(os.getenv("PADDLE_TRAINER_ID", 0)) dev_id = int(os.getenv("FLAGS_selected_gpus", 0)) main_program = paddle.static.Program() startup_program = paddle.static.Program() device = "gpu" with paddle.static.program_guard(main_program, startup_program): with paddle.fluid.device_guard(f'{device}:0'): X = paddle.static.data( name='X', shape=[None, 2], dtype='float32') with paddle.fluid.device_guard(f'{device}:all'): max_len = layers.fill_constant( shape=[1], dtype="int64", value=2, force_cpu=False, name="n") step_idx = layers.fill_constant( shape=[1], dtype="int64", value=0, force_cpu=False, name="i") data = layers.array_write(X, step_idx) cond_int = layers.fill_constant( shape=[1], dtype="int64", value=0, force_cpu=False, name="cond_int") cond = layers.less_than(x=step_idx, y=max_len) while_op = layers.While(cond, is_test=True) with while_op.block(): with paddle.fluid.device_guard(f'{device}:all'): input = layers.array_read(array=data, i=step_idx) layers.increment(x=step_idx, value=1.0, in_place=True) layers.array_write(input, i=step_idx, array=data) with paddle.fluid.device_guard(f'{device}:0'): param_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(1.0)) weight1 = paddle.static.create_parameter( shape=[2, 5], dtype='float32', attr=param_attr, is_bias=False) hidden1 = paddle.matmul(input, weight1) with paddle.fluid.device_guard(f'{device}:1'): param_attr = paddle.ParamAttr( initializer=paddle.nn.initializer.Constant(2.0)) weight2 = paddle.static.create_parameter( shape=[5, 2], dtype='float32', attr=param_attr, is_bias=False) hidden2 = paddle.matmul(hidden1, weight2) layers.array_write(hidden2, i=step_idx, array=data) # update cond and assign to cond_int, we will sync cond_int layers.less_than(x=step_idx, y=max_len, cond=cond) layers.assign(layers.cast(cond, dtype="int32"), cond_int) with paddle.fluid.device_guard(f'{device}:all'): # the code below must at end of while block and exists in device:all layers.assign(layers.cast(cond_int, dtype='bool'), cond) with paddle.fluid.device_guard(f'{device}:all'): out = layers.create_array(data.dtype) layers.assign(data, out) with paddle.fluid.device_guard(f'{device}:all'): # use a empty lod_tensor_array to clear lod_tensor_array layers.assign(layers.create_array(data.dtype), data) helper = HybridParallelInferenceHelper( startup_program, main_program, micro_batch_size=2, num_mp=1, num_pp=2, init_comm=nranks > 1, ) helper.gen_infer_program( ['array_write_0.out'], ['cond_int.tmp_0'], debug=True) exe = paddle.static.Executor(paddle.CUDAPlace(dev_id)) exe.run(startup_program) for step in range(2): init_data = np.random.uniform( low=0.0, high=1.0, size=[2, 2]).astype('float32') [res] = exe.run(main_program, feed={"X": init_data}, fetch_list=[out]) res_np = numpy_while(init_data) assert len(res) == len(res_np) for d1, d2 in zip(res, res_np): np.testing.assert_allclose(d1, d2)
def decoder(self, init_state): """ implement decoder in inference mode """ # pd.Print(init_state) # define counter variable in the decoding array_len = pd.fill_constant(shape=[1], dtype='int64', value=self.max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) static_count = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # define tensor array to save content at each time step, and write initial id, score and state state_h_array = pd.create_array('float32') pd.array_write(self.h, array=state_h_array, i=counter) state_c_array = pd.create_array('float32') pd.array_write(self.c, array=state_c_array, i=counter) src_indexes = fluid.layers.data(name='source_index', shape=[1], dtype='int64', lod_level=1) src_index_array = pd.create_array('int64') pd.array_write(src_indexes, array=src_index_array, i=counter) ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = fluid.layers.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = fluid.layers.data(name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) encoder_vec_array = pd.create_array('float32') pd.array_write(self.encoder_vec, array=encoder_vec_array, i=static_count) encoder_vec_full_array = pd.create_array('float32') pd.array_write(self.encoder_vec_full, array=encoder_vec_full_array, i=static_count) encoder_proj_array = pd.create_array('float32') pd.array_write(self.encoder_proj, array=encoder_proj_array, i=static_count) event_embedding_array = pd.create_array('float32') pd.array_write(self.event_embedding, array=event_embedding_array, i=static_count) # define conditional variable to stop loop cond = pd.less_than(x=counter, y=array_len) # define while_op while_op = pd.While(cond=cond) with while_op.block(): # define the computing of each step # pd.Print(counter) # obtain input at present step of decoder, including id chosen at previous step, corresponding score and state at previous step. pre_ids = pd.array_read(array=ids_array, i=counter) pre_h_state = pd.array_read(array=state_h_array, i=counter) pre_c_state = pd.array_read(array=state_c_array, i=counter) # pre_score = pd.array_read(array=scores_array, i=counter) pre_score = pd.array_read(array=scores_array, i=static_count) _encoder_input_ids = pd.array_read(array=src_index_array, i=static_count) event_embedding = pd.array_read(array=event_embedding_array, i=static_count) # print("pre_h_state", pre_h_state) encoder_vec = pd.array_read(array=encoder_vec_array, i=static_count) encoder_vec_full = pd.array_read(array=encoder_vec_full_array, i=static_count) encoder_proj = pd.array_read(array=encoder_proj_array, i=static_count) # # update input state as state correspondent with id chosen at previous step # pre_h_state_expanded = pd.sequence_expand(pre_h_state, pre_score) # pre_c_state_expanded = pd.sequence_expand(pre_c_state, pre_score) # computing logic of decoder under the same train mode, including input vector and computing unit of decoder # compute predicting probability of normalized word pre_ids_emb = pd.embedding( input=pre_ids, size=[self.target_dict_dim, self.embedding_dim], dtype='float32', param_attr=fluid.ParamAttr(name="trg_embedding")) # pd.Print(pre_ids_emb) att_context = self.simple_attention(encoder_vec, encoder_proj, pre_h_state) # print("att_context", att_context) # print("pre_ids_emb", pre_ids_emb) # pd.Print(att_context) prob_c = fluid.layers.sequence_expand_as(pre_score, encoder_vec) # pd.Print(prob_c) current_score, current_h, current_c, this_prob_c = self.copy_decoder( pre_ids_emb, encoder_vec, encoder_vec_full, encoder_proj, _encoder_input_ids, pre_ids, prob_c, att_context, pre_h_state, pre_c_state, event_embedding) # decoder_inputs = fluid.layers.concat( # input=[att_context, pre_ids_emb], axis=1) # current_h, current_c = self.lstm_step( # decoder_inputs, pre_h_state, pre_c_state, self.decoder_size) # # compute predicting probability of nomarlized word # current_score = fluid.layers.fc(input=current_h, # size=self.target_dict_dim, # act='softmax', # param_attr=fluid.ParamAttr(name="out_softmax_w"), # bias_attr=fluid.ParamAttr(name="out_softmax_b")) # # current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], # # size=decoder_size, # # act='tanh') # current_state_with_lod = pd.lod_reset(x=current_h, y=pre_score) # current_score = pd.fc(input=current_state_with_lod, # size=self.target_dict_dim, # act='softmax', # param_attr=fluid.ParamAttr(name="out_softmax_w"), # bias_attr=fluid.ParamAttr(name="out_softmax_b")) # print(current_score) topk_scores, topk_indices = pd.topk(current_score, k=self.beam_size) # pd.Print(topk_indices) # pd.Print(topk_scores) selected_ids, selected_scores = topk_indices, topk_scores # # compute accumulated score and perform beam search # accu_scores = pd.elementwise_add( # x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) # selected_ids, selected_scores = pd.beam_search( # pre_ids, # pre_score, # topk_indices, # accu_scores, # self.beam_size, # # end_id=self.end_id, # end_id=999999, # level=0) # pd.Print(selected_ids) # pd.Print(selected_scores) pd.increment(x=counter, value=1, in_place=True) # write search result and corresponding hidden layer into tensor array pd.array_write(current_h, array=state_h_array, i=counter) pd.array_write(current_c, array=state_c_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) # pd.Print(selected_ids) # pd.Print(selected_scores) # update condition to stop loop length_cond = pd.less_than(x=counter, y=array_len) finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) pd.logical_and(x=length_cond, y=finish_cond, out=cond) # pd.Print(array_len) # translation_ids, translation_scores = pd.beam_search_decode( # ids=ids_array, scores=scores_array, beam_size=self.beam_size, end_id=self.end_id) # pd.Print(translation_ids) translation_ids, translation_ids_index = pd.tensor_array_to_tensor( ids_array, axis=1) translation_scores, translation_scores_index = pd.tensor_array_to_tensor( scores_array, axis=1) return translation_ids, translation_scores
def gru_attention_infer(self, decoder_boot, max_length, char_num, word_vector_dim, encoded_vector, encoded_proj, decoder_size): init_state = decoder_boot beam_size = 1 array_len = layers.fill_constant( shape=[1], dtype='int64', value=max_length) counter = layers.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = layers.create_array('float32') layers.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = layers.create_array('int64') scores_array = layers.create_array('float32') rois_shape = layers.shape(init_state) batch_size = layers.slice( rois_shape, axes=[0], starts=[0], ends=[1]) + 1 lod_level = layers.range( start=0, end=batch_size, step=1, dtype=batch_size.dtype) init_ids = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=0, dtype='int64') init_ids = layers.lod_reset(init_ids, lod_level) init_ids = layers.lod_append(init_ids, lod_level) init_scores = layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], value=1, dtype='float32') init_scores = layers.lod_reset(init_scores, init_ids) layers.array_write(init_ids, array=ids_array, i=counter) layers.array_write(init_scores, array=scores_array, i=counter) full_ids = fluid.layers.fill_constant_batch_size_like( input=init_state, shape=[-1, 1], dtype='int64', value=1) cond = layers.less_than(x=counter, y=array_len) while_op = layers.While(cond=cond) with while_op.block(): pre_ids = layers.array_read(array=ids_array, i=counter) pre_state = layers.array_read(array=state_array, i=counter) pre_score = layers.array_read(array=scores_array, i=counter) pre_ids_emb = layers.embedding( input=pre_ids, size=[char_num, word_vector_dim], dtype='float32') context = self.simple_attention(encoded_vector, encoded_proj, pre_state, decoder_size) # expand the recursive_sequence_lengths of pre_state # to be the same with pre_score pre_state_expanded = layers.sequence_expand(pre_state, pre_score) context_expanded = layers.sequence_expand(context, pre_score) fc_1 = layers.fc(input=context_expanded, size=decoder_size * 3, bias_attr=False, name="rnn_fc1") fc_2 = layers.fc(input=pre_ids_emb, size=decoder_size * 3, bias_attr=False, name="rnn_fc2") decoder_inputs = fc_1 + fc_2 current_state, _, _ = layers.gru_unit( input=decoder_inputs, hidden=pre_state_expanded, size=decoder_size * 3) current_state_with_lod = layers.lod_reset( x=current_state, y=pre_score) # use score to do beam search current_score = layers.fc(input=current_state_with_lod, size=char_num, bias_attr=True, act='softmax', name="rnn_out_fc") topk_scores, topk_indices = layers.topk(current_score, k=beam_size) new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1) fluid.layers.assign(new_ids, full_ids) layers.increment(x=counter, value=1, in_place=True) # update the memories layers.array_write(current_state, array=state_array, i=counter) layers.array_write(topk_indices, array=ids_array, i=counter) layers.array_write(topk_scores, array=scores_array, i=counter) # update the break condition: # up to the max length or all candidates of # source sentences have ended. length_cond = layers.less_than(x=counter, y=array_len) finish_cond = layers.logical_not(layers.is_empty(x=topk_indices)) layers.logical_and(x=length_cond, y=finish_cond, out=cond) return full_ids
def decode(context, is_sparse): init_state = context array_len = pd.fill_constant(shape=[1], dtype='int64', value=max_length) counter = pd.zeros(shape=[1], dtype='int64', force_cpu=True) # fill the first element with init_state state_array = pd.create_array('float32') pd.array_write(init_state, array=state_array, i=counter) # ids, scores as memory ids_array = pd.create_array('int64') scores_array = pd.create_array('float32') init_ids = pd.data(name="init_ids", shape=[1], dtype="int64", lod_level=2) init_scores = pd.data( name="init_scores", shape=[1], dtype="float32", lod_level=2) pd.array_write(init_ids, array=ids_array, i=counter) pd.array_write(init_scores, array=scores_array, i=counter) cond = pd.less_than(x=counter, y=array_len) while_op = pd.While(cond=cond) with while_op.block(): pre_ids = pd.array_read(array=ids_array, i=counter) pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) # expand the lod of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( input=pre_ids, size=[dict_size, word_dim], dtype='float32', is_sparse=is_sparse) # use rnn unit to update rnn current_state = pd.fc(input=[pre_state_expanded, pre_ids_emb], size=decoder_size, act='tanh') current_state_with_lod = pd.lod_reset(x=current_state, y=pre_score) # use score to do beam search current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') topk_scores, topk_indices = pd.topk(current_score, k=topk_size) selected_ids, selected_scores = pd.beam_search( pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pd.increment(x=counter, value=1, in_place=True) # update the memories pd.array_write(current_state, array=state_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) pd.less_than(x=counter, y=array_len, cond=cond) translation_ids, translation_scores = pd.beam_search_decode( ids=ids_array, scores=scores_array) # return init_ids, init_scores return translation_ids, translation_scores