import torch import torch.nn as nn import time import math import sys import utils as d2l (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') def one_hot(x, n_class, dtype=torch.float32): result = torch.zeros(x.shape[0], n_class, dtype=dtype, device=x.device) # shape: (n, n_class) -> [2, 1027] result.scatter_(1, x.long().view(-1, 1), 1) # result[i, x[i, 0]] = 1 return result x = torch.tensor([0, 2]) x_one_hot = one_hot(x, vocab_size) # [2, 1027] print(x_one_hot) print(x_one_hot.shape) print(x_one_hot.sum(dim=1)) def to_onehot(X, n_class): # [32, 35], 1027 return [one_hot(X[:, i], n_class) for i in range(X.shape[1])] X = torch.arange(10).view(2, 5) # [2, 5]
gru_layer = nn.LSTM(input_size=vocab_size, hidden_size=num_hiddens, num_layers=6) model = RNNModel(gru_layer, vocab_size).to(device) train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes) logging.info('双向循环神经网络...') gru_layer = nn.GRU(input_size=vocab_size, hidden_size=num_hiddens, bidirectional=True) model = RNNModel(gru_layer, vocab_size).to(device) train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes) if __name__ == '__main__': logging.info('加载数据集并初始化全局变量...') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') (corpus_indices, char_to_idx, idx_to_char, vocab_size) = load_data_jay_lyrics() num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size logging.info(f'will use: {device}') train()
import time import math import numpy as np import torch from torch import nn, optim import torch.nn.functional as F import utils device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('载入数据') (corpus_indices, char_to_idx, idx_to_char, vocab_size) = utils.load_data_jay_lyrics() print('初始化模型参数') num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size print('will use', device) def get_params(): def _one(shape): ts = torch.tensor(np.random.normal(0, 0.01, size=shape), device=device, dtype=torch.float32) return torch.nn.Parameter(ts, requires_grad=True) def _three(): return (_one( (num_inputs, num_hiddens)), _one((num_hiddens, num_hiddens)), torch.nn.Parameter(torch.zeros(num_hiddens, device=device,
import time import math import numpy as np import torch from torch import nn, optim import torch.nn.functional as F import sys sys.path.append("..") import utils as d2l device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') (corpus_indices, char_to_idx, idx_to_char, vocab_size) = d2l.load_data_jay_lyrics() num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size print('will use', device) num_epochs, num_steps, batch_size, lr, clipping_theta = 160, 35, 32, 1e2, 1e-2 pred_period, pred_len, prefixes = 40, 50, ['分开', '不分开'] lr = 1e-2 # 注意调整学习率 gru_layer = nn.GRU(input_size=vocab_size, hidden_size=num_hiddens) model = d2l.RNNModel(gru_layer, vocab_size).to(device) d2l.train_and_predict_rnn_pytorch(model, num_hiddens, vocab_size, device, corpus_indices, idx_to_char, char_to_idx, num_epochs, num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes)