def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source)
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += len(data) * loss hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden, _ = model(data, hidden, reset_experience=True) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data if 'dnc' not in args.model.lower(): hidden = repackage_hidden(hidden) else: hidden = repackage_hidden_dnc(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) hidden_previous = hidden for tn_timestep in range(args.tn_timesteps): output, hidden = model(data, tn_m_hidden(hidden, hidden_previous), decoded=True) hidden_previous = hidden output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source_words, data_source_langs, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source_words.size(0) - 1, args.bptt): data, targets = get_batch(data_source_words, i, args, evaluation=True) langData, langTargets = get_batch(data_source_langs, i, args, evaluation=True) output, _, hidden = model(data, langData, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source_words)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) targets = targets.view(-1) ### culprit in memory leak log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus['words'].idx2word) for i in range(0, len(data_source['sentences']) - 1, batch_size): data, lengths, max_length, targets = get_batch(data_source, i, batch_size) cur_batch_size = data.size(1) hidden = model.init_hidden(cur_batch_size) output, hidden = model(data, lengths, max_length, hidden) loss = batch_size * criterion(output, targets.long()) total_loss += loss hidden = repackage_hidden(hidden) # return total_loss.item() / batch_size return total_loss.item() / len(data_source['sentences'])
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) logits = model.decoder(output) # logProba = nn.functional.log_softmax(logits, dim=1) # pred_idxs = torch.argmax(logProba, dim=1) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN' and getattr(model, 'reset', None): model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = None mems = None with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) #output, hidden = model(data, hidden) output, hidden, mems = model(data, hidden, mems=mems, return_h=False) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets.view(-1)).data if hidden is not None: hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def validate(model, val_loader, loss_fn, n_batchs, eval_batch_size=50): model.eval() batch_index = 0 hidden = model.init_hidden(eval_batch_size) val_loss = 0 counter = 0 while (batch_index < n_batchs - 1): X, y, seq_len = next(val_loader) out, hidden = model(X, hidden) val_loss += loss_fn(out, y) hidden = utils.repackage_hidden(hidden) batch_index += seq_len counter += 1 return val_loss / counter
def evaluate(genotype, data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 hidden = model.init_hidden(batch_size) logging.info('Genotype: {}'.format(genotype)) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden, genotype) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss / len(data_source)
def evaluate_dist(x, support): y_vec = np.arange(x - 7.5, x + 7.5, 0.05) y_givenx = np.zeros(len(y_vec)) for k, y in enumerate(y_vec): batch_size = 1 # Creating test data - for x,y respectively data_xy = { 'features': (torch.ones(50, batch_size, args.ndim) * x), 'labels': (torch.ones(50, batch_size, args.ndim) * y) } test_data_f2 = batchify_f2(data_xy, batch_size, args) # setting uniform distribution for y~ test_randata_f2 = batchify_f2(data_xy, batch_size, args, uniformly=True) # Turn on evaluation mode which disables dropout. model_f2.eval() hidden_f2 = model_f2.init_hidden(batch_size, model_f2.ncell) dist_vector = torch.FloatTensor() for i in range(0, test_data_f2.size(0) - 1, args.bptt): data_f2 = get_batch_dine(test_data_f2, i, args, evaluation=True) randata_f2 = get_batch_dine(test_randata_f2, i, args, evaluation=True) # forward out_f2, out_reused_f2, hidden_f2 = parallel_model_f2( data_f2, randata_f2, hidden_f2) # distribution calculation if i: dist = torch.exp(out_f2) * (1 / support) dist_vector = torch.cat((dist_vector, dist), 0) else: dist_vector = torch.exp(out_f2) * (1 / support) # hidden repackage hidden_f2 = repackage_hidden(hidden_f2) y_givenx[k] = torch.mean(dist_vector).detach().cpu().numpy() y_givenx = y_givenx / np.sum(y_givenx) return y_vec, y_givenx
def evaluate(model, test_data, test_data_loader, batch_size): model.eval() sum_losses_syll = AverageMeter() sum_losses_lyric = AverageMeter() """ Build Optimizers """ # lr = 0.001 # optimizer = torch.optim.Adam(model.parameters(), lr=lr) # lr = 0.001 loss_criterion = nn.CrossEntropyLoss() # Combines LogSoftmax() and NLLLoss() (Negative log likelihood loss) hidden = model.init_hidden(batch_size) for i, (syllable, lyric, melody, lengths) in enumerate(test_data_loader): local_bs = lyric.size(0) if local_bs != batch_size: continue """ Move dataloaders to GPU """ syllable = syllable.to(device) lyric = lyric.to(device) melody = melody.to(device).float() lengths = lengths.to(device) """ Remove first melody feature """ melody = melody[:, 1:] # We dont really want to do this? """ Detach hidden layers """ hidden = repackage_hidden(hidden) # Function from PyTorch NLP official example """ Feedforward """ # Feedforward syllable_output, lyrics_output, hidden = model(lyric[:, :-1], melody, lengths, hidden) # Define packed padded targets target_syllable = pack_padded_sequence(syllable[:, 1:], lengths-1, batch_first=True)[0] target_lyrics = pack_padded_sequence(lyric[:, 1:], lengths-1, batch_first=True)[0] # Calculate and update Cross-Entropy loss loss_syllable = loss_criterion(syllable_output, target_syllable) sum_losses_syll.update(loss_syllable) loss_lyrics = loss_criterion(lyrics_output, target_lyrics) sum_losses_lyric.update(loss_lyrics) return sum_losses_lyric, sum_losses_syll
def train_dream(): dr_model.train() # turn on training mode for dropout dr_hidden = dr_model.init_hidden(dr_config.batch_size) total_loss = 0 start_time = time() num_batchs = ceil(len(train_ub) / dr_config.batch_size) for i, x in enumerate(batchify(train_ub, dr_config.batch_size)): baskets, lens, _ = x dr_hidden = repackage_hidden(dr_hidden) # repackage hidden state for RNN dr_model.zero_grad() # optim.zero_grad() dynamic_user, _ = dr_model(baskets, lens, dr_hidden) loss = bpr_loss(baskets, dynamic_user, dr_model.encode.weight, dr_config) loss.backward() # Clip to avoid gradient exploding torch.nn.utils.clip_grad_norm(dr_model.parameters(), dr_config.clip) # Parameter updating # manual SGD # for p in dr_model.parameters(): # Update parameters by -lr*grad # p.data.add_(- dr_config.learning_rate, p.grad.data) # adam grad_norm = get_grad_norm(dr_model) previous_params = deepcopy(list(dr_model.parameters())) optim.step() total_loss += loss.data params = deepcopy(list(dr_model.parameters())) delta = get_weight_update(previous_params, params) weight_update_ratio = get_ratio_update(delta, params) # Logging if i % dr_config.log_interval == 0 and i > 0: elapsed = (time() - start_time) * 1000 / dr_config.log_interval cur_loss = total_loss[0] / dr_config.log_interval / dr_config.batch_size # turn tensor into float total_loss = 0 start_time = time() print( '[Training]| Epochs {:3d} | Batch {:5d} / {:5d} | ms/batch {:02.2f} | Loss {:05.2f} |'.format(epoch, i, num_batchs, elapsed, cur_loss)) writer.add_scalar('model/train_loss', cur_loss, epoch * num_batchs + i) writer.add_scalar('model/grad_norm', grad_norm, epoch * num_batchs + i) writer.add_scalar('model/weight_update_ratio', weight_update_ratio, epoch * num_batchs + i)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() model_now = model.module criterion_now = criterion.module if args.model == 'QRNN': model_now.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model_now.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model_now(data, hidden) criterion_now.replicate_weight_and_bias(model.module.decoder.weight, model.module.decoder.bias) total_loss += len(data) * criterion_now(hiddens=output, targets=targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source_src, data_source_trg, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_eval_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source_src.size(0) - 1, args.bptt): data, prev_targets, targets = get_batch(data_source_src, data_source_trg, i, args, evaluation=True) output, hidden = model(data, prev_targets, hidden) output_flat = output.view(-1, ntokens) total_eval_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_eval_loss.item() / len(data_source_src)
def evaluate(data_source, batch_size=10, temperature=1.0): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): # model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output = output / temperature total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss / len(data_source)
def evaluate(data_source, use_dropout=False, batch_size=10): # Turn on evaluation mode which disables dropout. if not use_dropout: model.eval() else: model.train() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) # turn on eval mode at the end because we expect eval mode model.eval() return total_loss.item() / len(data_source)
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args.bptt, evaluation=True) #> output has size seq_length x batch_size x vocab_size output, hidden = model(data, hidden) #> output_flat has size num_targets x vocab_size (batches are stacked together) #> ! important, otherwise softmax computation (e.g. with F.softmax()) is incorrect output_flat = output.view(-1, ntokens) #output_candidates_info(output_flat.data, targets.data) total_loss += len(data) * nn.CrossEntropyLoss()(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] /len(data_source)
def evaluate(test, batch_size=1): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(1) for i in range(0, test.size(0) - 1, 70): data, targets = get_batch(test, i, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return math.exp(total_loss[0] / len(test))
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) if isinstance(criterion, SplitCrossEntropyLoss): total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data else: output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(args, model, data_iterator, criterion): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. example_count = 0 hidden = model.init_hidden(args.batch_size) with torch.no_grad(): for _, batch in tqdm(enumerate(data_iterator), total=len(data_iterator), disable=True): data, targets = batch.text.t(), batch.target.t().contiguous() output, hidden = model(data, hidden) output_flat = output.view(-1, model.vocab_size) total_loss += len(data) * criterion(output_flat, targets.view(-1)).item() example_count += len(data) hidden = repackage_hidden(hidden) model.train() return total_loss / example_count
def init_hidden(self, batch_size: int, init_batch: Tensor) -> HiddenState: padded_aliases = init_batch batched_aliases = [] max_len_aliases = padded_aliases.size(1) for i in range((max_len_aliases - 1) // self.bptt_size + 1): batched_aliases.append( padded_aliases[:, i * self.bptt_size:(i + 1) * self.bptt_size]) hidden = self.rnn.init_hidden(batch_size) for batched_alias in batched_aliases: batched_alias = batched_alias.to(self.device) if hidden is not None: hidden = repackage_hidden(hidden) alias_embeds = self.word_embed(batched_alias) _, hidden = self.rnn.forward(alias_embeds, hidden) return hidden
def evaluate(data_source, source_sampler, target_sampler, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 hidden = model.init_hidden(batch_size) for source_sample, target_sample in zip(source_sampler, target_sampler): model.train() data = torch.stack([data_source[i] for i in source_sample]) targets = torch.stack([data_source[i] for i in target_sample]).view(-1) with torch.no_grad(): output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).item() hidden = repackage_hidden(hidden) return total_loss / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model_lm.eval() # model_mlp.eval() if args.model == 'QRNN': model_lm.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model_lm.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets, _ = get_batch(data_source, i, args, evaluation=True) output, hidden, _, all_outputs = model_lm(data, hidden, return_h=True) # output = model_mlp(all_outputs[-1]) + all_outputs[-1] # output = output.view(output.size(0)*output.size(1), output.size(2)) total_loss += len(data) * criterion(model_lm.decoder.weight, model_lm.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): # Jump forwards in bptt (70) increments data, targets = get_batch( data_source, i, args, evaluation=True ) # Gets the data and the target data to be produced output, hidden = model(data, hidden) total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) if args.split_cross: total_loss += len(data) * criterion(model.decoder, output, targets).data else: total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i, args.bptt) sflg = corpus.dictionary.idx2sflg[data] # truncated BPP hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model((data, sflg), hidden) #logging.info("sizes") #logging.info(model.emb_size) #logging.info(model.input_size) #logging.info(model.output_size) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time logging.info( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def evaluate(model, data_source, batch_size=10): model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.seq_len): data, targets = get_batch(data_source, i, args, evaluation=True) output = model(data, hidden) if isinstance(output, tuple): output, hidden = output output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) input = torch.mm(model.decoder.weight, output.transpose( 0, 1)).transpose(0, 1) + model.decoder.bias if args.loss == 'splitcrossentropy': total_loss += len(data) * criterion( model.decoder.weight, model.decoder.bias, output, targets).data elif args.loss == 'focal': total_loss += len(data) * criterion(input, targets, test=True).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
def evaluate_reorder_dream(): dr_model.eval() dr_hidden = dr_model.init_hidden(dr_config.batch_size) total_loss = 0 start_time = time() num_batchs = ceil(len(test_ub) / dr_config.batch_size) for i,x in enumerate(batchify(test_ub, dr_config.batch_size, is_reordered = True)): baskets, lens, _, r_baskets, h_baskets = x dynamic_user, _ = dr_model(baskets, lens, dr_hidden) loss = reorder_bpr_loss(r_baskets, h_baskets, dynamic_user, dr_model.encode.weight, dr_config) dr_hidden = repackage_hidden(dr_hidden) total_loss += loss.data # Logging elapsed = (time() - start_time) * 1000 / num_batchs total_loss = total_loss[0] / num_batchs print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'.format(epoch, elapsed, total_loss)) return total_loss
def train(): assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size' # Turn on training mode which enables dropout. total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = [model.init_hidden(args.small_batch_size) for _ in range(args.batch_size // args.small_batch_size)] batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + args.max_seq_len_delta) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) optimizer.zero_grad() start, end, s_id = 0, args.small_batch_size, 0 while start < args.batch_size: cur_data, cur_targets = data[:, start: end], targets[:, start: end].contiguous().view(-1) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden[s_id] = repackage_hidden(hidden[s_id]) log_prob, hidden[s_id], rnn_hs, dropped_rnn_hs = parallel_model(cur_data, hidden[s_id], return_h=True) raw_loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), cur_targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss *= args.small_batch_size / args.batch_size total_loss += raw_loss.data * args.small_batch_size / args.batch_size loss.backward() s_id += 1 start = end end = start + args.small_batch_size gc.collect() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() # total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time logging('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len