def train(): model.train() # Turn on the train mode total_loss = 0. start_time = time.time() for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)): data, targets = get_batch(train_data, i, bptt) optimizer.zero_grad() output = model(data) loss = criterion(output.view(-1, ntokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss.item() log_interval = 200 if batch % log_interval == 0 and batch > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ' 'lr {:02.2f} | ' # ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} |'.format( epoch, batch, len(train_data) // bptt, # scheduler.get_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss))) sys.stdout.flush() total_loss = 0 start_time = time.time()
def evaluate(eval_model, data_source): eval_model.eval() # Turn on the evaluation mode total_loss = 0. with torch.no_grad(): for i in range(0, data_source.size(0) - 1, bptt): data, targets = get_batch(data_source, i, bptt) output = eval_model(data) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
def train(): import numpy as np model.train() # Turn on the train mode total_loss = 0. start_time = time.time() ntokens = len(TEXT.vocab.stoi) times = [] for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)): data, targets = get_batch(train_data, i, bptt) optimizer.zero_grad() start_time2 = time.time() output = model(data) loss = criterion(output.view(-1, ntokens), targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() end_time2 = time.time() times.append(end_time2 - start_time2) total_loss += loss.item() log_interval = 200 if batch % log_interval == 0 and batch > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ' 'lr {:02.2f} | ' # ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} |' 'perplexity {:8.2f} |'.format( epoch, batch, len(train_data) // bptt, # scheduler.get_lr()[0], elapsed * 1000 / log_interval, cur_loss, math.exp(cur_loss), math.exp(cur_loss * emsize / bptt))) total_loss = 0 start_time = time.time() # print('average time:', np.mean(times)) times = []