if __name__ == '__main__': config = define_argparser() loader = DataLoader(config.train, config.valid, batch_size=config.batch_size, device=config.gpu_id, max_length=config.max_length) model = LM(len(loader.text.vocab), word_vec_dim=config.word_vec_dim, hidden_size=config.hidden_size, n_layers=config.n_layers, dropout_p=config.dropout, max_length=config.max_length) # Let criterion cannot count PAD as right prediction, because PAD is easy to predict. loss_weight = torch.ones(len(loader.text.vocab)) loss_weight[data_loader.PAD] = 0 criterion = nn.NLLLoss(weight=loss_weight, size_average=False) print(model) print(criterion) if config.gpu_id >= 0: model.cuda(config.gpu_id) criterion.cuda(config.gpu_id) trainer.train_epoch(model, criterion, loader.train_iter, loader.valid_iter, config)