Пример #1
0
            )  # [num_sentence * 1]  이 단어들이 계속 가면서 다음 단어로 바뀜 처음에는 모두 [SOS]
        sentences = words  # 문장들이 저장되는 곳, 나중에 여기서 최고 확률의 [EOS]를 뽑는다.
        end_sentences = []  # NUM_TOP_PROB갯수만큼의 끝난 문장들만 모아둠
        end_sentences_score = [
        ]  # NUM_TOP_PROB갯수만큼의 끝난 문장들의 [END]가 떴을때 score를 모아둠.
        len_sentence = 1
        with torch.no_grad():
            encoded_img = encoder(img)
            encoded_img = encoded_img.reshape(1, -1, 2048).expand(
                num_sentence, -1,
                -1)  # [num_sentence * ENCODER_OUTPUT_SIZE^2 * 2048]
            hidden, cell = decoder.init_hidden_cell_state(encoded_img)
            is_first = True
            # decoder 시작. SOS를 제외하고 너무 길지 않게만 반복, 여러개 문장으로 시작해서 최종 END의
            while len_sentence < args.MAX_SENTENCE_LEN:
                embedded_words = decoder.embedding(words).squeeze(
                    1)  # [num_sentence * EMBED_SIZE]
                attentioned_encoder_output, _ = decoder.attention_module(
                    encoded_img, hidden)  # 이제부터는 그냥 무조건 다 PAD가 아니다.
                gs = torch.sigmoid(decoder.sag(hidden))
                attentioned_encoder_output = attentioned_encoder_output * gs
                new_inputs = torch.cat(
                    [embedded_words, attentioned_encoder_output], dim=1)
                hidden, cell = decoder.LSTMCell(new_inputs, (hidden, cell))
                preds = decoder.last_fc(
                    hidden
                )  # [(<num_sentence) * VOCAB_SIZE] => train과 마찬가지로 end가 나오면 배치가 작아짐.

                preds = preds + top_prev_prob.expand_as(
                    preds)  # 이전 확률과 새롭게 예측한 확률을 더해서 계속 누적해나감.

                if is_first:  # 만약 처음이면 5개의 모든 결과가 같으므로 처음 것에서만 5개를 뽑음
Пример #2
0
class Transformer_Pointer(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.encoder_word = Encoder(config, config.src_vocab_size)
        self.encoder_char = Encoder(config, config.tgt_vocab_size)
        self.pointer = Pointer(config)
        self.attention = Luong_Attention(config)
        self.decoder = Decoder(config)
        self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size)
        self.softmax = nn.Softmax(dim=-1)
        self.s_len = config.s_len
        self.bos = config.bos

    # add <bos> to sentence
    def convert(self, x):
        """
        :param x:(batch, s_len) (word_1, word_2, ... , word_n)
        :return:(batch, s_len) (<bos>, word_1, ... , word_n-1)
        """
        if torch.cuda.is_available():
            start = (torch.ones(x.size(0), 1) * self.bos).type(
                torch.cuda.LongTensor)
        else:
            start = (torch.ones(x.size(0), 1) * self.bos).type(
                torch.LongTensor)
        x = torch.cat((start, x), dim=1)
        return x[:, :-1]

    def forward(self, x_w, x_c, y):
        """
        :param x_w:
        :param x_c:
        :param y:
        :return: (batch, s_len, vocab_size)
        """
        y_s = self.convert(y)
        encoder_out = self.encoder_word(x_w)
        encoder_attn = self.encoder_char(x_c)
        final = []
        for i in range(self.s_len):
            dec_output = self.decoder(x_w, y_s[:, :i + 1], encoder_out)
            emb = self.decoder.embedding(y_s[:, i].unsqueeze(1))
            output = self.linear_out(dec_output[:, -1, :])
            # gen (batch, vocab_size)
            gen = self.softmax(output)
            # pointer
            # ptr (batch, c_len)
            # context (batch, 1, model_size)
            ptr, context = self.attention(dec_output[:, -1, :].unsqueeze(1),
                                          encoder_attn)
            # prob (batch, )
            prob = self.pointer(emb, dec_output[:, -1, :].unsqueeze(1),
                                context).unsqueeze(1)
            final_out = (1 - prob) * gen
            final_out = final_out.scatter_add_(1, x_c, prob * ptr)
            final.append(final_out)
        return torch.stack(final)

    def sample(self, x_w, x_c):
        encoder_out = self.encoder_word(x_w)
        encoder_attn = self.encoder_char(x_c)

        start = torch.ones(x_w.size(0)) * self.bos
        start = start.unsqueeze(1)
        if torch.cuda.is_available():
            start = start.type(torch.cuda.LongTensor)
        else:
            start = start.type(torch.LongTensor)
        # the first <start>
        out = torch.ones(x_w.size(0)) * self.bos
        out = out.unsqueeze(1)
        final = []
        for i in range(self.s_len):
            if torch.cuda.is_available():
                out = out.type(torch.cuda.LongTensor)
            else:
                out = out.type(torch.LongTensor)
            dec_output = self.decoder(x_w, out, encoder_out)
            emb = self.decoder.embedding(out[:, -1].unsqueeze(1))
            output = self.linear_out(dec_output[:, -1, :])
            gen = self.softmax(output)
            ptr, context = self.attention(dec_output[:, -1, :].unsqueeze(1),
                                          encoder_attn)
            # prob (batch, )
            prob = self.pointer(emb, dec_output[:, -1, :].unsqueeze(1),
                                context).unsqueeze(1)
            final_out = (1 - prob) * gen
            final_out = final_out.scatter_add_(1, x_c, prob * ptr)
            final.append(final_out)
            gen = torch.argmax(gen, dim=-1).unsqueeze(1)
            out = torch.cat((out, gen), dim=1)
        return torch.stack(final), out