Exemplo n.º 1
0
    def forward(self, src, src_length):
        # encoding
        encoder_output, encoder_final_state = self.encoder(src, src_length)

        # decoder initial states
        decoder_initial_states = [
            encoder_final_state,
            self.decoder.lstm_attention.cell.get_initial_states(
                batch_ref=encoder_output, shape=[self.hidden_size])
        ]
        # attention mask to avoid paying attention on padddings
        src_mask = layers.sequence_mask(
            src_length,
            maxlen=layers.shape(src)[1],
            dtype=encoder_output.dtype)
        encoder_padding_mask = (src_mask - 1.0) * 1e9
        encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])

        # Tile the batch dimension with beam_size
        encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_output, self.beam_size)
        encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
            encoder_padding_mask, self.beam_size)

        # dynamic decoding with beam search
        rs, _ = self.beam_search_decoder(
            inits=decoder_initial_states,
            encoder_output=encoder_output,
            encoder_padding_mask=encoder_padding_mask)
        return rs
Exemplo n.º 2
0
    def _build_decoder(self, enc_final_state, mode='train', beam_size=10):
        output_layer = lambda x: layers.fc(
            x,
            size=self.tar_vocab_size,
            num_flatten_dims=len(x.shape) - 1,
            param_attr=fluid.ParamAttr(
                name="output_w",
                initializer=fluid.initializer.UniformInitializer(
                    low=-self.init_scale, high=self.init_scale)),
            bias_attr=False)

        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout, self.init_scale)
        dec_initial_states = [
            enc_final_state,
            dec_cell.get_initial_states(batch_ref=self.enc_output,
                                        shape=[self.hidden_size])
        ]
        max_src_seq_len = layers.shape(self.src)[1]
        src_mask = layers.sequence_mask(self.src_sequence_length,
                                        maxlen=max_src_seq_len,
                                        dtype='float32')
        enc_padding_mask = (src_mask - 1.0)
        if mode == 'train':
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=self.tar_emb,
                                initial_states=dec_initial_states,
                                sequence_length=None,
                                enc_output=self.enc_output,
                                enc_padding_mask=enc_padding_mask)

            dec_output = output_layer(dec_output)

        elif mode == 'beam_search':
            output_layer = lambda x: layers.fc(
                x,
                size=self.tar_vocab_size,
                num_flatten_dims=len(x.shape) - 1,
                param_attr=fluid.ParamAttr(name="output_w"),
                bias_attr=False)
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                self.beam_start_token,
                self.beam_end_token,
                beam_size,
                embedding_fn=self.tar_embeder,
                output_fn=output_layer)
            enc_output = beam_search_decoder.tile_beam_merge_with_batch(
                self.enc_output, beam_size)
            enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch(
                enc_padding_mask, beam_size)
            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=dec_initial_states,
                                        max_step_num=self.beam_max_step_num,
                                        enc_output=enc_output,
                                        enc_padding_mask=enc_padding_mask)
            return outputs

        return dec_output
Exemplo n.º 3
0
    def forward(self, inputs, *args):
        gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
        decoder_boot = self.fc(gru_backward[:, 0])

        if self.beam_size:
            # Tile the batch dimension with beam_size
            encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch(
                encoded_vector, self.beam_size)
            encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch(
                encoded_proj, self.beam_size)
        # dynamic decoding with beam search
        rs, _ = self.infer_decoder(inits=decoder_boot,
                                   encoder_vec=encoded_vector,
                                   encoder_proj=encoded_proj)
        return rs
Exemplo n.º 4
0
 def __init__(self,
              src_vocab_size,
              trg_vocab_size,
              embed_dim,
              hidden_size,
              num_layers,
              dropout_prob=0.,
              bos_id=0,
              eos_id=1,
              beam_size=4,
              max_out_len=256):
     args = dict(locals())
     args.pop("self")
     args.pop("__class__", None)  # py3
     self.bos_id = args.pop("bos_id")
     self.eos_id = args.pop("eos_id")
     self.beam_size = args.pop("beam_size")
     self.max_out_len = args.pop("max_out_len")
     super(AttentionInferModel, self).__init__(**args)
     # dynamic decoder for inference
     decoder = BeamSearchDecoder(
         self.decoder.lstm_attention.cell,
         start_token=bos_id,
         end_token=eos_id,
         beam_size=beam_size,
         embedding_fn=self.decoder.embedder,
         output_fn=self.decoder.output_layer)
     self.beam_search_decoder = DynamicDecode(
         decoder, max_step_num=max_out_len, is_test=True)
Exemplo n.º 5
0
 def __init__(
     self,
     in_channle=1,
     encoder_size=200,
     decoder_size=128,
     emb_dim=128,
     num_classes=None,
     beam_size=0,
     bos_id=0,
     eos_id=1,
     max_out_len=20,
 ):
     super(Seq2SeqAttInferModel,
           self).__init__(in_channle, encoder_size, decoder_size, emb_dim,
                          num_classes)
     self.beam_size = beam_size
     # dynamic decoder for inference
     decoder = BeamSearchDecoder(self.decoder.decoder_attention.cell,
                                 start_token=bos_id,
                                 end_token=eos_id,
                                 beam_size=beam_size,
                                 embedding_fn=self.embedding,
                                 output_fn=self.decoder.fc)
     self.infer_decoder = DynamicDecode(decoder,
                                        max_step_num=max_out_len,
                                        is_test=True)
Exemplo n.º 6
0
    def _build_decoder(self, enc_final_state, mode='train', beam_size=10):

        dec_cell = DecoderCell(self.num_layers, self.hidden_size, self.dropout,
                               self.init_scale)
        output_layer = lambda x: layers.fc(x,
                                           size=self.tar_vocab_size,
                                           num_flatten_dims=len(x.shape) - 1,
                                           param_attr=fluid.ParamAttr(
                                               name="output_w",
                                               initializer=uniform_initializer(
                                                   self.init_scale)),
                                           bias_attr=False)

        if mode == 'train':
            dec_output, dec_final_state = rnn(cell=dec_cell,
                                              inputs=self.tar_emb,
                                              initial_states=enc_final_state)

            dec_output = output_layer(dec_output)

            return dec_output
        elif mode == 'beam_search':
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                self.beam_start_token,
                self.beam_end_token,
                beam_size,
                embedding_fn=self.tar_embeder,
                output_fn=output_layer)

            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=enc_final_state,
                                        max_step_num=self.beam_max_step_num)
            return outputs
Exemplo n.º 7
0
 def model_init(self,
                vocab_size,
                embed_dim,
                hidden_size,
                bos_id=0,
                eos_id=1,
                beam_size=4,
                max_step_num=20):
     embedder = Embedding(size=[vocab_size, embed_dim])
     output_layer = Linear(hidden_size, vocab_size)
     cell = BasicLSTMCell(embed_dim, hidden_size)
     decoder = BeamSearchDecoder(cell,
                                 start_token=bos_id,
                                 end_token=eos_id,
                                 beam_size=beam_size,
                                 embedding_fn=embedder,
                                 output_fn=output_layer)
     self.beam_search_decoder = DynamicDecode(decoder,
                                              max_step_num=max_step_num,
                                              is_test=True)
Exemplo n.º 8
0
    def _build_net(self):
        self.seq_len = fluid.layers.data(name="seq_len",
                                         shape=[1],
                                         dtype='int64',
                                         lod_level=0)
        self.seq_len_used = fluid.layers.squeeze(self.seq_len, axes=[1])
        src_mask = fluid.layers.sequence_mask(self.seq_len_used,
                                              maxlen=self.max_seq_len,
                                              dtype='float32')
        enc_padding_mask = (src_mask - 1.0)

        # Define decoder and initialize it.
        dec_cell = AttentionDecoderCell(self.num_layers, self.hidden_size,
                                        self.dropout)
        dec_init_hidden = fluid.layers.fc(
            input=self.feature,
            size=self.hidden_size,
            num_flatten_dims=1,
            param_attr=fluid.ParamAttr(
                name="dec_init_hidden_w",
                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
            bias_attr=fluid.ParamAttr(
                name="dec_init_hidden_b",
                initializer=fluid.initializer.Constant(0.)))
        dec_initial_states = [[[
            dec_init_hidden,
            dec_cell.get_initial_states(batch_ref=self.feature,
                                        shape=[self.hidden_size])
        ]] * self.num_layers,
                              dec_cell.get_initial_states(
                                  batch_ref=self.feature,
                                  shape=[self.hidden_size])]
        tar_vocab_size = len(self._label_list)
        tar_embeder = lambda x: fluid.embedding(
            input=x,
            size=[tar_vocab_size, self.hidden_size],
            dtype='float32',
            is_sparse=False,
            param_attr=fluid.ParamAttr(name='target_embedding',
                                       initializer=fluid.initializer.
                                       UniformInitializer(low=-0.1, high=0.1)))
        start_token_id = self._label_list.index(self.start_token)
        end_token_id = self._label_list.index(self.end_token)
        if not self.is_predict_phase:
            self.dec_input = fluid.layers.data(name="dec_input",
                                               shape=[self.max_seq_len],
                                               dtype='int64')
            tar_emb = tar_embeder(self.dec_input)
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=tar_emb,
                                initial_states=dec_initial_states,
                                sequence_length=None,
                                enc_output=self.token_feature,
                                enc_padding_mask=enc_padding_mask)
            self.logits = fluid.layers.fc(
                dec_output,
                size=tar_vocab_size,
                num_flatten_dims=len(dec_output.shape) - 1,
                param_attr=fluid.ParamAttr(
                    name="output_w",
                    initializer=fluid.initializer.UniformInitializer(
                        low=-0.1, high=0.1)))
            self.ret_infers = fluid.layers.reshape(x=fluid.layers.argmax(
                self.logits, axis=2),
                                                   shape=[-1, 1])
            logits = self.logits
            logits = fluid.layers.softmax(logits)
            return [logits]
        else:
            output_layer = lambda x: fluid.layers.fc(
                x,
                size=tar_vocab_size,
                num_flatten_dims=len(x.shape) - 1,
                param_attr=fluid.ParamAttr(name="output_w"))
            beam_search_decoder = BeamSearchDecoder(dec_cell,
                                                    start_token_id,
                                                    end_token_id,
                                                    self.beam_size,
                                                    embedding_fn=tar_embeder,
                                                    output_fn=output_layer)
            enc_output = beam_search_decoder.tile_beam_merge_with_batch(
                self.token_feature, self.beam_size)
            enc_padding_mask = beam_search_decoder.tile_beam_merge_with_batch(
                enc_padding_mask, self.beam_size)
            self.ret_infers, _ = dynamic_decode(
                beam_search_decoder,
                inits=dec_initial_states,
                max_step_num=self.beam_max_step_num,
                enc_output=enc_output,
                enc_padding_mask=enc_padding_mask)
            return self.ret_infers
Exemplo n.º 9
0
    def _build_decoder(self,
                       z_mean=None,
                       z_log_var=None,
                       enc_output=None,
                       mode='train',
                       beam_size=10):
        dec_input = layers.dropout(self.tar_emb,
                                   dropout_prob=self.dec_dropout_in,
                                   dropout_implementation="upscale_in_train")

        # `output_layer` will be used within BeamSearchDecoder
        output_layer = lambda x: layers.fc(x,
                                           size=self.tar_vocab_size,
                                           num_flatten_dims=len(x.shape) - 1,
                                           name="output_w")

        # `sample_output_layer` samples an id from the logits distribution instead of argmax(logits)
        # it will be used within BeamSearchDecoder
        sample_output_layer = lambda x: layers.unsqueeze(
            fluid.one_hot(layers.unsqueeze(
                layers.sampling_id(layers.softmax(
                    layers.squeeze(output_layer(x), [1])),
                                   dtype='int'), [1]),
                          depth=self.tar_vocab_size), [1])

        if mode == 'train':
            latent_z = self._sampling(z_mean, z_log_var)
        else:
            latent_z = layers.gaussian_random_batch_size_like(
                self.tar, shape=[-1, self.latent_size])
        dec_first_hidden_cell = layers.fc(latent_z,
                                          2 * self.hidden_size *
                                          self.num_layers,
                                          name='fc_hc')
        dec_first_hidden, dec_first_cell = layers.split(
            dec_first_hidden_cell, 2)
        if self.num_layers > 1:
            dec_first_hidden = layers.split(dec_first_hidden, self.num_layers)
            dec_first_cell = layers.split(dec_first_cell, self.num_layers)
        else:
            dec_first_hidden = [dec_first_hidden]
            dec_first_cell = [dec_first_cell]
        dec_initial_states = [[h, c]
                              for h, c in zip(dec_first_hidden, dec_first_cell)
                              ]
        dec_cell = DecoderCell(self.num_layers, self.hidden_size, latent_z,
                               self.param_attr_initializer,
                               self.param_attr_scale, self.dec_dropout_out)

        if mode == 'train':
            dec_output, _ = rnn(cell=dec_cell,
                                inputs=dec_input,
                                initial_states=dec_initial_states,
                                sequence_length=self.tar_sequence_length)
            dec_output = output_layer(dec_output)

            return dec_output
        elif mode == 'greedy':
            start_token = 1
            end_token = 2
            max_length = 100
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                start_token,
                end_token,
                beam_size=1,
                embedding_fn=self.tar_embeder,
                output_fn=output_layer)
            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=dec_initial_states,
                                        max_step_num=max_length)
            return outputs

        elif mode == 'sampling':
            start_token = 1
            end_token = 2
            max_length = 100
            beam_search_decoder = BeamSearchDecoder(
                dec_cell,
                start_token,
                end_token,
                beam_size=1,
                embedding_fn=self.tar_embeder,
                output_fn=sample_output_layer)

            outputs, _ = dynamic_decode(beam_search_decoder,
                                        inits=dec_initial_states,
                                        max_step_num=max_length)
            return outputs
        else:
            print("mode not supprt", mode)