Exemplo n.º 1
0
        def train(i, batch_size, use_cuda, dropout):
            input = batch_loader.next_batch(batch_size, 'train')
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input

            logits, _, kld = self(dropout,
                                  encoder_word_input,
                                  encoder_character_input,
                                  decoder_word_input,
                                  decoder_character_input,
                                  z=None)

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)
        def train(i, batch_size, use_cuda, dropout, start_index):
            input = batch_loader.next_batch(batch_size, 'train', start_index)
            input = [Variable(t.from_numpy(var)) for var in input]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]
            #这里是data/train.txt,转换变成embedding,用pand补齐,
            #其中encoder_word_input, encoder_character_input是将 xo原始句输入倒过来前面加若干占位符,
            # decoder_word_input, decoder_character_input是 xo原始句加了开始符号末端补齐
            # target,结束句子后面加了结束符,target是xo原始句加结束符后面加若干占位符
            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, decoder_character_input, target
            ] = input
            ''' =================================================== Input for Encoder-2 ========================================================
            '''

            input_2 = batch_loader_2.next_batch(batch_size, 'train',
                                                start_index)
            input_2 = [Variable(t.from_numpy(var)) for var in input_2]
            input_2 = [var.long() for var in input_2]
            input_2 = [var.cuda() if use_cuda else var for var in input_2]
            #这里是data/super/train.txt,转换变成embedding,用pand补齐,
            #其中encoder_word_input, encoder_character_input是将 释义句xp输入倒过来前面加若干占位符,
            # decoder_word_input, decoder_character_input是 释义句xp加了开始符号末端补齐
            # target,结束句子后面加了结束符,target是释义句xp加结束符后面加若干占位符
            [
                encoder_word_input_2, encoder_character_input_2,
                decoder_word_input_2, decoder_character_input_2, target
            ] = input_2
            ''' ================================================================================================================================
            '''
            # exit()
            #这里encoder-input是原始句子xo的输入(句子翻转),encoder-input2是释义句xp的输入(句子翻转),decoder-input是释义句加加开始符号
            logits, _, kld, _, _ = self(dropout,
                                        encoder_word_input,
                                        encoder_character_input,
                                        encoder_word_input_2,
                                        encoder_character_input_2,
                                        decoder_word_input_2,
                                        decoder_character_input_2,
                                        z=None)

            # logits = logits.view(-1, self.params.word_vocab_size)
            logits = logits.view(-1, self.params_2.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(
                logits, target
            )  #前面logit 是每一步输出的词汇表所有词的概率, target是每一步对应的词的索引不用变成onehot,函数内部做变换

            loss = 79 * cross_entropy + kld_coef(i) * kld  #79应该是作者拍脑袋的

            optimizer.zero_grad()  #标准用法先计算损失函数值,然后初始化梯度为0,
            loss.backward()  #然后反向传递
            optimizer.step()  #反向跟新梯度

            return cross_entropy, kld, kld_coef(
                i)  # 交叉熵,kl-devergence,kld-coef是为了让他
Exemplo n.º 3
0
        def train(i, batch_size, use_cuda, dropout):
            input = batch_loader.next_batch(batch_size, 'train')
            input = [(Variable(t.from_numpy(var)) if var is not None else None)
                     for var in input]
            input = [(var.long() if var is not None else None)
                     for var in input]
            input = [(var.cuda() if var is not None and use_cuda else var)
                     for var in input]

            [
                encoder_word_input, encoder_character_input,
                decoder_word_input, _, target
            ] = input

            logits_out, kld, _, _ = self(dropout,
                                         encoder_word_input,
                                         encoder_character_input,
                                         decoder_word_input,
                                         z=None,
                                         initial_state=None)
            if self.params.decoder_type == 'dilation' or self.params.decoder_type == 'gru' or self.params.decoder_type == 'lstm':
                logits = logits_out.view(-1, self.params.word_vocab_size)
                target = target.view(-1)
                cross_entropy = F.cross_entropy(logits, target)

                # since cross enctropy is averaged over seq_len, it is necessary to approximate new kld
                loss = 79 * cross_entropy + kld_coef(i) * kld

                logits = logits.view(batch_size, -1,
                                     self.params.word_vocab_size)
                target = target.view(batch_size, -1)
                ppl = perplexity(logits, target).mean()

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                return ppl, kld, None
            elif self.params.decoder_type == 'gru_emb':
                decoder_target = self.embedding(target, None)
                error = t.pow(logits_out - decoder_target, 2).mean()
                '''
                loss is constructed fromaveraged over whole batches error 
                formed from squared error between output and target
                and KL Divergence between p(z) and q(z|x)
                '''
                loss = 400 * error + kld_coef(i) * kld

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                return error, kld, kld_coef(i)
Exemplo n.º 4
0
        def train(i, input, use_cuda, dropout):
            input = [
                Variable(torch.from_numpy(var.astype(np.float)))
                for var in input
            ]
            input = [var.long() for var in input]
            input = [var.cuda() if use_cuda else var for var in input]

            [
                original_encoder_word_input, original_encoder_character_input,
                paraphrse_encoder_word_input,
                paraphrse_encoder_character_input, decoder_word_input,
                decoder_character_input, target
            ] = input

            logits, _, kld = self(dropout,
                                  original_encoder_word_input,
                                  original_encoder_character_input,
                                  paraphrse_encoder_word_input,
                                  paraphrse_encoder_character_input,
                                  decoder_word_input,
                                  decoder_character_input,
                                  z=None,
                                  initial_state=None)

            logits = logits.view(-1, self.params.word_vocab_size)
            target = target.view(-1)
            cross_entropy = F.cross_entropy(logits, target)

            loss = 79 * cross_entropy + kld_coef(i) * kld

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            return cross_entropy, kld, kld_coef(i)