def get_model(input_size, output_size, train_config): model = Transformer( input_size, train_config.hidden_size, output_size, n_splits=train_config.n_splits, n_enc_blocks=train_config.n_layers, n_dec_blocks=train_config.n_layers, dropout_p=train_config.dropout, ) model.load_state_dict(saved_data['model']) model.eval() return model
data[:, 0] = 1 data[:, -2:] = 0 data = data.to(device) yield Batch(data, data) if __name__ == '__main__': n_vocab = 10 model = Transformer(n_vocab) criterion = LabelSmoothing(n_vocab, 0.) optimizer = scheduled_adam_optimizer(model) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #device = 'cpu' model.to(device) for epoch in range(10): print("Epoch: {}".format(epoch)) data_iter = data_gen(n_vocab, 128, 10000, device) run_epoch(data_iter, model, criterion, optimizer) in_seq = torch.LongTensor([[1, 7, 5, 2, 3, 4, 5, 0]]).to(device) out_seq = torch.zeros([1, 20], dtype=torch.int64).to(device) out_seq[:, 0] = 1 model.eval() for i in range(19): pred = model(in_seq, out_seq[:, :i + 1]) out_seq[0, i + 1] = torch.argmax(pred, dim=2)[0][-1] print(out_seq)