hidden_size=config.hidden_size, n_layers=config.n_layers, dropout_p=config.dropout, max_length=config.max_length) # Let criterion cannot count PAD as right prediction, because PAD is easy to predict. loss_weight = torch.ones(len(loader.text.vocab)) loss_weight[data_loader.PAD] = 0 criterion = nn.NLLLoss(weight=loss_weight, size_average=False) print(model) print(criterion) if config.gpu_id >= 0: model.cuda(config.gpu_id) criterion.cuda(config.gpu_id) if config.n_epochs > 0: trainer.train_epoch(model, criterion, loader.train_iter, loader.valid_iter, config) if config.gen > 0: total_gen = 0 while total_gen < config.gen: current_gen = min(config.batch_size, config.gen - total_gen) _, indice = model.search(batch_size=current_gen) total_gen += current_gen lines = to_text(indice, loader.text.vocab) print('\n'.join(lines))