def evaluate(opts, device, corpus, model, criterion, epoch): """ Parameters ---------- opts: command line arguments device: device type corpus: Corpus model: Model criterion: loss function epoch: current epoch Return ------ total_loss: float """ epoch_start_time = time.time() # Turn on evaluation mode which disables dropout. model.eval() #total_loss = 0.0 val_loss = Metric('val_loss') # Do not back propagation with torch.no_grad(): for batch_id, batch in enumerate( data.data2batch(corpus.valid, corpus.dictionary, opts.batch_size, flag_shuf=True)): hidden = model.init_hidden(batch) # Cut the computation graph (Initialize) hidden = models.repackage_hidden(hidden) # LongTensor of token_ids [seq_len, batch_size] input = model.batch2input(batch, device) # target_flat: LongTensor of token_ids [seq_len*batch_size] target_flat = model.batch2flat(batch, device) # clear previous gradients model.zero_grad() # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid] output, hidden = model(input, hidden) # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken] output_flat = output.view(-1, output.shape[2]) # target_flat: LongTensor of token_ids [seq_len*batch_size] #total_loss += criterion(output_flat, target_flat).item() val_loss.update(criterion(output_flat, target_flat)) total_num = batch_id + 1 #total_loss /= total_num total_loss = val_loss.avg.item() if verbose == 1: print('-' * 89) try: print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), total_loss, math.exp(total_loss))) except: print("Warning: math error") print('-' * 89) return total_loss
def pred2batch(dictionary, model, word_ids, bsz, device): input_data = [[[word_ids[sid]], [[ dictionary.char_conv2id(char) for char in dictionary.conv2word(word_ids[sid]) ]]] for sid in range(bsz)] for input_batch in data.data2batch(input_data, dictionary, bsz): hidden = model.init_hidden(input_batch) hidden = models.repackage_hidden(hidden) input_dict = model.word2input(input_batch, device) return input_dict, hidden
def evaluate(opts, corpus, input_texts, model, criterion, device): """ Parameter --------- corpus: Corpus Return ------ total_loss: float """ # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = corpus.dictionary.tok_len() results = [] # Do not propagate gradients with torch.no_grad(): for batch_id, batch in enumerate( data.data2batch(input_texts, corpus.dictionary, opts.batch_size, flag_shuf=False)): hidden = model.init_hidden(batch) # Cut the computation graph (Initialize) hidden = models.repackage_hidden(hidden) # LongTensor of token_ids [seq_len, batch_size] input = model.batch2input(batch, device) seq_len = input["word"].shape[0] batch_size = input["word"].shape[1] # target_flat: LongTensor of token_ids [seq_len*batch_size] target_flat = model.batch2flat(batch, device) # clear previous gradients model.zero_grad() # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid] output, hidden = model(input, hidden) # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken] output_flat = output.view(-1, output.shape[2]) # batch_loss: LongTensor of token_ids [seq_len*batch_size] batch_loss = criterion(output_flat, target_flat) # batch_loss: LongTensor of token_ids [seq_len, batch_size] batch_loss = batch_loss.reshape(seq_len, batch_size) # batch_loss: LongTensor of token_ids [batch_size] batch_loss = torch.mean(batch_loss, 0) for sent_loss in batch_loss: ppl = math.exp(sent_loss) results.append(ppl) return results
def train(opts, device, corpus, model, criterion, optimizer, lr, epoch): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() for batch_id, batch in enumerate( data.data2batch(corpus.train, corpus.dictionary, opts.batch_size, flag_shuf=True)): input = model.batch2input(batch, device) target = model.batch2target(batch, device) # clear previous gradients model.zero_grad() output = model(input) loss = criterion(output, target) loss.backward() optimizer.step() optimizer.zero_grad() total_loss += loss.item() if batch_id % opts.log_interval == 0 and batch_id > 0: cur_loss = total_loss / opts.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch_id, len(corpus.train) // opts.batch_size, lr, elapsed * 1000 / opts.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def evaluate(opts, device, corpus, model, criterion, epoch): epoch_start_time = time.time() model.eval() total_loss = 0.0 with torch.no_grad(): for batch_id, batch in enumerate( data.data2batch(corpus.valid, corpus.dictionary, opts.batch_size, flag_shuf=True)): input = model.batch2input(batch, device) target = model.batch2target(batch, device) model.zero_grad() output = model(input) total_loss += criterion(output, target).item() total_num = batch_id + 1 total_loss /= total_num print('-' * 89) try: print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), total_loss, math.exp(total_loss))) except: print("Warning: math error") print('-' * 89) return total_loss
def train(opts, device, corpus, model, criterion, optimizer, lr, epoch): """ Parameters ---------- opts: command line arguments device: device type corpus: Corpus model: Model criterion: loss function optimizer: optimizer lr: learning rate (float) epoch: current epoch """ # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() for batch_id, batch in enumerate( data.data2batch(corpus.train, corpus.dictionary, opts.batch_size, flag_shuf=True)): # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. # batch[0].shape[1]: nbatch, hidden: [nlayer, nbatch, nhid] hidden = model.init_hidden(batch) # Cut the computation graph (Initialize) hidden = models.repackage_hidden(hidden) # LongTensor of token_ids [seq_len, batch_size] input = model.batch2input(batch, device) # target_flat: LongTensor of token_ids [seq_len*batch_size] target_flat = model.batch2flat(batch, device) # clear previous gradients model.zero_grad() # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid] output, hidden = model(input, hidden) # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken] output_flat = output.view(-1, output.shape[2]) # Calculate the mean of all losses. # loss: float loss = criterion(output_flat, target_flat) # Do back propagetion loss.backward() # Gradient clipping # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clip) # Update weights """ # SGD for p in model.parameters(): p.data.add_(-lr, p.grad.data) """ optimizer.step() optimizer.zero_grad() total_loss += loss.item() if batch_id % opts.log_interval == 0 and batch_id > 0: cur_loss = total_loss / opts.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch_id, len(corpus.train) // opts.batch_size, lr, elapsed * 1000 / opts.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()