def __call__(self, decoder_initial_states, encoder_output, encoder_padding_mask, **kwargs): output_layer = kwargs.pop("output_layer", None) if self.decoding_strategy == "train_greedy": # for teach-forcing MLE pre-training helper = layers.TrainingHelper(**kwargs) elif self.decoding_strategy == "infer_sample": helper = layers.SampleEmbeddingHelper(**kwargs) elif self.decoding_strategy == "infer_greedy": helper = layers.GreedyEmbeddingHelper(**kwargs) if self.decoding_strategy == "beam_search": beam_size = kwargs.get("beam_size", 4) encoder_output = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, beam_size) encoder_padding_mask = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, beam_size) decoder = layers.BeamSearchDecoder( cell=self.decoder_cell, output_fn=output_layer, **kwargs) else: decoder = layers.BasicDecoder( self.decoder_cell, helper, output_fn=output_layer) (decoder_output, decoder_final_state, dec_seq_lengths) = layers.dynamic_decode( decoder, inits=decoder_initial_states, max_step_num=self.max_decoding_length, encoder_output=encoder_output, encoder_padding_mask=encoder_padding_mask, impute_finished=False # for test coverage if self.decoding_strategy == "beam_search" else True, is_test=True if self.decoding_strategy == "beam_search" else False, return_length=True) return decoder_output, decoder_final_state, dec_seq_lengths
def decoder(encoder_output, encoder_output_proj, encoder_state, encoder_padding_mask, trg=None, is_train=True): """Decoder: GRU with Attention""" decoder_cell = DecoderCell(hidden_size=decoder_size) decoder_initial_states = layers.fc(encoder_state, size=decoder_size, act="tanh") trg_embeder = lambda x: fluid.embedding( input=x, size=[target_dict_size, hidden_dim], dtype="float32", param_attr=fluid.ParamAttr(name="trg_emb_table")) output_layer = lambda x: layers.fc(x, size=target_dict_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr(name= "output_w")) if is_train: decoder_output, _ = layers.rnn( cell=decoder_cell, inputs=trg_embeder(trg), initial_states=decoder_initial_states, time_major=False, encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_padding_mask=encoder_padding_mask) decoder_output = output_layer(decoder_output) else: encoder_output = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, beam_size) encoder_output_proj = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output_proj, beam_size) encoder_padding_mask = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, beam_size) beam_search_decoder = layers.BeamSearchDecoder( cell=decoder_cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=trg_embeder, output_fn=output_layer) decoder_output, _ = layers.dynamic_decode( decoder=beam_search_decoder, inits=decoder_initial_states, max_step_num=max_length, output_time_major=False, encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_padding_mask=encoder_padding_mask) return decoder_output
def decoder(encoder_output, encoder_output_proj, encoder_state, encoder_padding_mask, trg=None, is_train=True): # 定义 RNN 所需要的组件 decoder_cell = DecoderCell(hidden_size=decoder_size) decoder_initial_states = layers.fc(encoder_state, size=decoder_size, act="tanh") trg_embeder = lambda x: fluid.embedding( input=x, size=[target_dict_size, hidden_dim], dtype="float32", param_attr=fluid.ParamAttr(name="trg_emb_table")) output_layer = lambda x: layers.fc(input=x, size=target_dict_size, num_flatten_dims=len(x.shape) - 1, param_attr=fluid.ParamAttr(name= "output_w")) if is_train: # 训练 # 训练时使用 `layers.rnn` 构造由 `cell` 指定的循环神经网络 # 循环的每一步从 `inputs` 中切片产生输入,并执行 `cell.call` # [-1,-1,512,] , [-1,512,] decoder_output, _ = layers.rnn( cell=decoder_cell, inputs=trg_embeder(trg), initial_states=decoder_initial_states, time_major=False, encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_padding_mask=encoder_padding_mask) decoder_output = layers.fc(input=decoder_output, size=target_dict_size, num_flatten_dims=2, param_attr=fluid.ParamAttr(name="output_w")) else: # 基于 beam search 的预测生成 # beam search 时需要将用到的形为 `[batch_size, ...]` 的张量扩展为 `[batch_size* beam_size, ...]` encoder_output = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, beam_size) encoder_output_proj = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output_proj, beam_size) encoder_padding_mask = layers.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, beam_size) # BeamSearchDecoder 定义了单步解码的操作:`cell.call` + `beam_search_step` beam_search_decoder = layers.BeamSearchDecoder( cell=decoder_cell, start_token=bos_id, end_token=eos_id, beam_size=beam_size, embedding_fn=trg_embeder, output_fn=output_layer) # 使用 layers.dynamic_decode 动态解码 # 重复执行 `decoder.step()` 直到其返回的表示完成状态的张量中的值全部为True或解码步骤达到 `max_step_num` decoder_output, _ = layers.dynamic_decode( decoder=beam_search_decoder, inits=decoder_initial_states, max_step_num=max_length, output_time_major=False, encoder_output=encoder_output, encoder_output_proj=encoder_output_proj, encoder_padding_mask=encoder_padding_mask) return decoder_output