示例#1
0
 def run (self, data_loader, batch_size, beam_size=3): #data is either a list of lists or a dataset_loader
     self.encoder.eval()
     self.decoder.eval()
     self.vae.eval()            
     
     pbar = ProgressBar()
     pbar.set(total_steps=len(data_loader)) 
    
     total_loss = 0.
     with torch.no_grad():
         for counter, (x, y) in enumerate(data_loader):                
             pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(data_loader), total_loss/(counter+1)))  
             
             if x.size(0) != batch_size:
                 print("\t Incomplete batch, skipping.")
                 continue
             
             if(self.train_on_gpu):
                 x, y = x.cuda(), y.cuda()
             
             x = x[0:1,:]                
             y = y[0:1,:]
             results, scores, loss = self._run_instance(x, y, beam_size)
     
     pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(data_loader)))     
     return total_loss/len(data_loader)
示例#2
0
 def _eval(self, valid_loader, batch_size):                
     self.encoder.eval()
     self.decoder.eval()
     self.vae.eval()            
     
     pbar = ProgressBar()
     pbar.set(total_steps=len(valid_loader)) 
    
     counter = 0 
     total_loss = 0.
     with torch.no_grad():
         for counter, (x, y) in enumerate(valid_loader):                
             #if counter > 5:
             #    break
             pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(valid_loader), total_loss/(counter+1)))   
             batch_size = x.size(0)
             max_seq_len_x = x.size(1)
             max_seq_len_y = y.size(1)
             loss = 0
             #print("  Epoch {}, batch: {}/{}, max_seq_len_x: {}, max_seq_len_y: {}".format(self.epoch, counter, len(valid_loader), max_seq_len_x, max_seq_len_y))
             if x.size(0) != batch_size:
                 print("\t Incomplete batch, skipping.")
                 continue
             
             if(self.train_on_gpu):
                 x, y = x.cuda(), y.cuda()
             
             encoder_hidden = self.encoder.init_hidden(batch_size)
             decoder_hidden = self.decoder.init_hidden(batch_size)
     
             encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)                 
             encoder_last_output = torch.zeros(batch_size, self.encoder_hidden_dim*2, device=self.device)
             for j in range(batch_size):
                 encoder_last_output[j] = encoder_output[j][-1]
             
             # VAE
             z, mu, logvar = self.vae(encoder_last_output)
             
             word_softmax_projection = torch.zeros(batch_size, 5, dtype = torch.float, device=self.device)
             word_softmax_projection[:,2] = 1. # beginning of sentence value is 2, set it  #XXX
             
             decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
             decoder_output = decoder_output[-1].permute(1,0,2) 
                             
             loss = 0             
             print_example = True
             example_array = [2]
             
             for i in range(max_seq_len_y): 
                 #print("\t Decoder step {}/{}".format(i, max_seq_len_y))                        
                 _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                 decoder_input = decoder_input.unsqueeze(1)          
                 
                 decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, z)                    
                 word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                 if print_example:                        
                     _, mi = word_softmax_projection[0].max(0)
                     example_array.append(mi.item())
                     
                 target_y = y[:,i] # select from y the ith column and shape as an array                    
                 loss += self.criterion(word_softmax_projection, target_y) 
             
             loss /= batch_size
             KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
             loss += KLD            
             
             total_loss += loss.data.item()    
             
             #print("\t\t\t Eval Loss: {}".format(loss.data.item()))
             if print_example:
                 print_example = False 
                 print()    
                 print("\n\n----- X:")
                 print(" ".join([self.src_i2w[str(wi.data.item())] for wi in x[0]]))                                            
                 print("----- Y:")
                 print(" ".join([self.tgt_i2w[str(wi.data.item())] for wi in y[0]]))                    
                 print("----- OUR PREDICTION:")
                 print(" ".join([self.tgt_i2w[str(wi)] for wi in example_array]))
                 print()
                 print(" ".join([str(wi.data.item()) for wi in y[0]]))
                 print(" ".join([str(wi) for wi in example_array]))
                 print()
                 #self.writer.add_text('EvalText', " ".join([self.i2w[str(wi.data.item())] for wi in y[0]]) + " --vs-- "+" ".join([self.i2w[str(wi)] for wi in example_array]), self.epoch)                    
     
     pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(valid_loader))) 
 
     return total_loss/len(valid_loader)
示例#3
0
    def _eval(self, valid_loader):                
        self.encoder.eval()
        self.decoder.eval()
        self.attention.eval()            
         
        pbar = ProgressBar()
        pbar.set(total_steps=len(valid_loader)) 
       
        counter = 0 
        total_loss = 0.
        with torch.no_grad():
            for counter, (x, y) in enumerate(valid_loader):                
                #if counter > 5:
                #    break
                pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, eval average loss \033[93m{:.6f}\033[0m ... ".format(self.epoch, counter, len(valid_loader), total_loss/(counter+1)))   
                
                batch_size = x.size(0)
                max_seq_len_x = x.size(1)
                max_seq_len_y = y.size(1)

                loss = 0
                                
                if(self.train_on_gpu):
                    x, y = x.cuda(), y.cuda()
                
                encoder_hidden = self.encoder.init_hidden(batch_size)
                decoder_hidden = self.decoder.init_hidden(batch_size)
       
                encoder_output, encoder_hidden = self.encoder(x, encoder_hidden) 
                word_softmax_projection = torch.zeros(batch_size, 5, dtype = torch.float, device=self.device)
                word_softmax_projection[:,2] = 1. # beginning of sentence value is 2, set it  #XXX
                
                decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
                decoder_output = decoder_output[-1].permute(1,0,2) 
                                
                loss = 0             
                print_example = True
                example_array = []
                
                for i in range(max_seq_len_y): 
                    #print("\t Decoder step {}/{}".format(i, max_seq_len_y))                        
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1)                                           
                    context = self.attention(encoder_output, decoder_output)
                    
                    decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, context)                    
                    word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                    if print_example:                        
                        _, mi = word_softmax_projection[0].max(0)
                        example_array.append(mi.item())
                        
                    target_y = y[:,i] # select from y the ith column and shape as an array                    
                    loss += self.criterion(word_softmax_projection, target_y) 
                
                total_loss += loss.data.item() / batch_size    
                
                #print("\t\t\t Eval Loss: {}".format(loss.data.item()))
                if print_example:
                    print_example = False 
                    print()    
                    print("\n\n----- X:")
                    print(" ".join([self.src_i2w[str(wi.data.item())] for wi in x[0]]))                                            
                    print("----- Y:")
                    print(" ".join([self.tgt_i2w[str(wi.data.item())] for wi in y[0]]))                    
                    print("----- OUR PREDICTION:")
                    print(" ".join([self.tgt_i2w[str(wi)] for wi in example_array]))
                    print()
                    print(" ".join([str(wi.data.item()) for wi in y[0]]))
                    print(" ".join([str(wi) for wi in example_array]))
                    print()
            
        self.log.var("Loss|Train loss|Validation loss", self.epoch, total_loss, y_index=1)
        self.log.draw()        
        
        pbar.update(text="Epoch {:d}, eval done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss/len(valid_loader))) 
    
        return total_loss/len(valid_loader)
示例#4
0
    def _train_epoch(self, train_loader):                       
        self.epoch += 1
        self.encoder.train()
        self.decoder.train()
        self.vae.train()        
        
        #encoder_hidden = self.encoder.init_hidden(batch_size)
        #decoder_hidden = self.decoder.init_hidden(batch_size)
        total_loss = 0.
        pbar = ProgressBar()
        pbar.set(total_steps=len(train_loader)) 
        
        for counter, (x, y) in enumerate(train_loader):
            batch_size = x.size(0)
            max_seq_len_x = x.size(1) # x este 64 x 399 (variabil)
            max_seq_len_y = y.size(1) # y este 64 x variabil
            
            pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, train average loss \033[93m{:.6f}\033[0m (bs/mx/my = {}/{}/{}) ... ".format(self.epoch, counter, len(train_loader), total_loss/(counter+1), batch_size, max_seq_len_x, max_seq_len_y))                         
                        
            #if counter > 1:               
            #    break                
            if counter % 500 == 0 and counter > 0:
                self.save_checkpoint("last")
                
            loss = 0   
            """            
            if x.size(0) != batch_size:
                print("\t Incomplete batch, skipping.")
                continue
            """
            # print(x.size()) # x is a 64 * 399 tensor (batch*max_seq_len_x)               

            if(self.train_on_gpu):
                x, y = x.cuda(), y.cuda()

            encoder_hidden = self.encoder.init_hidden(batch_size)
            decoder_hidden = self.decoder.init_hidden(batch_size)        
            #print(decoder_hidden[0].size())
            
            # zero grads in optimizer
            self.optimizer.zero_grad()                
            
            # encoder
            # x is batch_size x max_seq_len_x            
            encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)             
            # encoder_output is batch_size x max_seq_len_x x encoder_hidden (where encoder_hidden is double because it is bidirectional)
            # print(encoder_output.size())
            
            # take last state of encoder as encoder_last_output # not necessary when using attention                
            encoder_last_output = torch.zeros(batch_size, self.encoder_hidden_dim*2, device=self.device) # was with ,1, in middle ?
            for j in range(batch_size):
                encoder_last_output[j] = encoder_output[j][-1]
            # encoder_last_output is last state of the encoder batch_size * encoder_hidden_dim
        
            # VAE
            z, mu, logvar = self.vae(encoder_last_output) # all are (batch_size, encoder_hidden_dim)
            
            # create first decoder output for initial attention call, extract from decoder_hidden
            decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
            # it should look like batch_size x 1 x decoder_hidden_size, so tranform it
            decoder_output = decoder_output[-1].permute(1,0,2) 
            #print(decoder_output.size())
                
            recon_loss = 0                 
            for i in range(max_seq_len_y): 
                #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                
                # teacher forcing (or it is first word which always is start-of-sentence)
                if random.random()<=self.teacher_forcing_ratio or i==0:
                    decoder_input = torch.zeros(batch_size, 1, dtype = torch.long, device=self.device) # 1 in middle is because lstm expects (batch, seq_len, input_size): 
                    for j in range(batch_size):
                        decoder_input[j]=y[j][i]                
                    #print(decoder_input.size()) # batch_size x 1                            
                else: # feed own previous prediction extracted from word_softmax_projection
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1) # from batch_size to batch_size x 1                    
                    #print(decoder_input.size()) # batch_size x 1                            

                
                # z context is batch_size * encoder_hidden_dim            
                decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, z)
                # first, reduce word_softmax_projection which is torch.Size([64, 1, 50004]) to 64 * 50004
                word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                
                # now, select target y
                # y looks like batch_size * max_seq_len_y : tensor([[    2, 10890, 48108,  ...,     0,     0,     0], ... ... ..
                target_y = y[:,i] # select from y the ith column and shape as an array 
                # target_y now looks like [ 10, 2323, 5739, 24, 9785 ... ] of size 64 (batch_size)
                #print(word_softmax_projection.size())
                #print(target_y.size())
                recon_loss += self.criterion(word_softmax_projection, target_y)
                # end decoder individual step
                
            global_minibatch_step = (self.epoch-1)*len(train_loader)+counter   
            #print("epoch {}, counter {}, global_minibatch_step {}".format(self.epoch, counter, global_minibatch_step))        
            
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, recon_loss.data.item(), y_index=1)
            
            KL_weight = self.vae.kl_anneal_function(step=global_minibatch_step, k=self.vae_kld_anneal_k, x0=self.vae_kld_anneal_x0, anneal_function=self.vae_kld_anneal_function)
            self.log.var("KLD weight", global_minibatch_step, KL_weight, y_index=0)
            
            KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
            self.log.var("KLD", global_minibatch_step, KLD.data.item(), y_index=0)
            
            KLD *= KL_weight
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, KLD.data.item(), y_index=2)
            
            loss = recon_loss + KLD
            self.log.var("train_loss|Total loss|Recon loss|Weighted KLD loss", global_minibatch_step, loss.data.item(), y_index=0)
                        
            total_loss += loss.data.item() / batch_size 
            loss.backward() # calculate the loss and perform backprop
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(self.encoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.decoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.vae.parameters(), self.gradient_clip)
            self.optimizer.step()
            
            #self.writer.add_scalar('Train/Loss', loss.data.item())            
            #break
            self.log.draw()
            self.log.draw(last_quarter = True)
            # end batch
        
        #end epoch
        pbar.update(text="Epoch {:d}, train done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss))  #/len(train_loader)
        
        
        return total_loss #/len(train_loader)
示例#5
0
    def _train_epoch(self, train_loader):                       
        self.epoch += 1
        self.encoder.train()
        self.decoder.train()
        self.attention.train()        
        
        total_loss = 0.
        pbar = ProgressBar()
        pbar.set(total_steps=len(train_loader)) 
        
        for counter, (x, y) in enumerate(train_loader):
            batch_size = x.size(0)
            max_seq_len_x = x.size(1) # x este 64 x 399 (variabil)
            max_seq_len_y = y.size(1) # y este 64 x variabil

            
            pbar.update(progress=counter, text="Epoch {:d}, progress {}/{}, train average loss \033[93m{:.6f}\033[0m (mx/my = {}/{}) ... ".format(self.epoch, counter, len(train_loader), total_loss/(counter+1), max_seq_len_x, max_seq_len_y))                         
                        
            #if counter > 1:               
            #    break                
            if counter % 1000 == 0 and counter > 0:
                self.save_checkpoint("last")
            
            
            loss = 0            
            # print(x.size()) # x is a 64 * 399 tensor (batch*max_seq_len_x)               

            if(self.train_on_gpu):
                x, y = x.cuda(), y.cuda()
            
            encoder_hidden = self.encoder.init_hidden(batch_size)
            decoder_hidden = self.decoder.init_hidden(batch_size)
            #print(decoder_hidden[0].size())
            
            # zero grads in optimizer
            self.optimizer.zero_grad()                
            
            # encoder
            # x is batch_size x max_seq_len_x            
            encoder_output, encoder_hidden = self.encoder(x, encoder_hidden)             
            # encoder_output is batch_size x max_seq_len_x x encoder_hidden
            #print(encoder_output.size())
            
            # create first decoder output for initial attention call, extract from decoder_hidden
            decoder_output = decoder_hidden[0].view(self.decoder_n_layers, 1, batch_size, self.decoder_hidden_dim) #torch.Size([2, 1, 64, 512])
            # it should look like batch_size x 1 x decoder_hidden_size, so tranform it
            decoder_output = decoder_output[-1].permute(1,0,2) 
            #print(decoder_output.size())
                
            loss = 0                 
            for i in range(max_seq_len_y): # why decoder_hidden is initialized in epoch and not in batch??
                #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                
                # teacher forcing (or it is first word which always is start-of-sentence)
                if random.random()<=self.teacher_forcing_ratio or i==0:
                    decoder_input = torch.zeros(batch_size, 1, dtype = torch.long, device=self.device) # 1 in middle is because lstm expects (batch, seq_len, input_size): 
                    for j in range(batch_size):
                        decoder_input[j]=y[j][i]                
                    #print(decoder_input.size()) # batch_size x 1                            
                else: # feed own previous prediction extracted from word_softmax_projection
                    _, decoder_input = word_softmax_projection.max(1) # no need for values, just indexes 
                    decoder_input = decoder_input.unsqueeze(1) # from batch_size to batch_size x 1                    
                    #print(decoder_input.size()) # batch_size x 1                            

                # remove me, for printing attention
                if counter == 1:
                    self.attention.should_print = False#True
                    #print("\t Decoder step {}/{}".format(i, max_seq_len_y))    
                else:
                    self.attention.should_print = False
                    self.attention.att_mat = []
                context = self.attention(encoder_output, decoder_output)
                
                # context is batch_size * encoder_hidden_dim            
                decoder_output, decoder_hidden, word_softmax_projection = self.decoder.forward_step(decoder_input, decoder_hidden, context)
                # first, reduce word_softmax_projection which is torch.Size([64, 1, 50004]) to 64 * 50004
                word_softmax_projection = word_softmax_projection.squeeze(1) # eliminate dim 1
                
                # now, select target y
                # y looks like batch_size * max_seq_len_y : tensor([[    2, 10890, 48108,  ...,     0,     0,     0], ... ... ..
                target_y = y[:,i] # select from y the ith column and shape as an array 
                # target_y now looks like [ 10, 2323, 5739, 24, 9785 ... ] of size 64 (batch_size)
                #print(word_softmax_projection.size())
                #print(target_y.size())
                loss += self.criterion(word_softmax_projection, target_y) # ignore index not set as we want 0 to count to error too
            
            # remove me, attention printing
            """if counter == 1:
                fig = plt.figure(figsize=(12, 10))
                sns.heatmap(self.attention.att_mat,cmap="gist_heat")                
                plt.tight_layout()            
                fig.savefig('img/__'+str(self.epoch)+'.png')
                plt.clf()
            """    
            total_loss += loss.data.item()/batch_size
            loss.backward() # calculate the loss and perform backprop
            
            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            nn.utils.clip_grad_norm_(self.encoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.decoder.parameters(), self.gradient_clip)
            nn.utils.clip_grad_norm_(self.attention.parameters(), self.gradient_clip)
            self.optimizer.step()
            # end batch
            
        # end current epoch
        pbar.update(text="Epoch {:d}, train done, average loss \033[93m{:.6f}\033[0m".format(self.epoch, total_loss)) 
        self.log.var("Loss|Train loss|Validation loss", self.epoch, total_loss, y_index=0)
        self.log.draw()
        
        return total_loss