def train(train_features, test_features, train_labels, test_labels, num_epochs=400): loss = nn.MSELoss() input_shape = train_features.shape[-1] # Switch off the bias since we already catered for it in the polynomial # features net = nn.Sequential(nn.Linear(input_shape, 1, bias=False)) batch_size = min(10, train_labels.shape[0]) train_iter = d2l.load_array((train_features, train_labels.reshape(-1, 1)), batch_size) test_iter = d2l.load_array((test_features, test_labels.reshape(-1, 1)), batch_size, is_train=False) trainer = torch.optim.SGD(net.parameters(), lr=0.01) animator = d2l.Animator(xlabel='epoch', ylabel='loss', yscale='log', xlim=[1, num_epochs], ylim=[1e-3, 1e2], legend=['train', 'test']) for epoch in range(num_epochs): d2l.train_epoch_ch3(net, train_iter, loss, trainer) if epoch == 0 or (epoch + 1) % 20 == 0: animator.add(epoch + 1, (evaluate_loss( net, train_iter, loss), evaluate_loss(net, test_iter, loss))) final_training_loss = evaluate_loss(net, train_iter, loss) print(f'final training loss: {final_training_loss}') print('weight:', net[0].weight.data.numpy()) return final_training_loss
def load_data_imdb(batch_size, num_steps=500): data_dir = d2l.download_extract('aclImdb','aclImdb') train_data = read_imdb(data_dir, True) test_data = read_imdb(data_dir, False) train_tokens = d2l.tokenize(train_data[0], token='word') test_tokens = d2l.tokenize(test_data[0], token='word') vocab = d2l.Vocab(train_tokens, min_freq=5) train_features = torch.tensor([d2l.truncate_pad( vocab[line], num_steps, vocab['<pad>']) for line in train_tokens]) test_features = torch.tensor([d2l.truncate_pad( vocab[line], num_steps, vocab['<pad>']) for line in test_tokens]) train_iter = d2l.load_array((train_features, torch.tensor(train_data[1])), batch_size) test_iter = d2l.load_array((test_features, torch.tensor(test_data[1])), batch_size, is_train=False) return train_iter, test_iter, vocab
def load_data_nmt(batch_size, num_steps, num_examples=600): """Return the iterator and the vocabularies of the translation dataset.""" text = preprocess_nmt(read_data_nmt()) source, target = tokenize_nmt(text, num_examples) src_vocab = d2l.Vocab(source, min_freq=2, reserved_tokens=['<pad>', '<bos>', '<eos>']) tgt_vocab = d2l.Vocab(target, min_freq=2, reserved_tokens=['<pad>', '<bos>', '<eos>']) src_array, src_valid_len = build_array_nmt(source, src_vocab, num_steps) tgt_array, tgt_valid_len = build_array_nmt(target, tgt_vocab, num_steps) data_arrays = (src_array, src_valid_len, tgt_array, tgt_valid_len) data_iter = d2l.load_array(data_arrays, batch_size) return data_iter, src_vocab, tgt_vocab
def train(net, train_features, train_labels, test_features, test_labels, num_epochs, learning_rate, weight_decay, batch_size): train_ls, test_ls = [], [] train_iter = d2l.load_array((train_features, train_labels), batch_size) # The Adam optimization algorithm is used here optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): for X, y in train_iter: optimizer.zero_grad() l = loss(net(X), y) l.backward() optimizer.step() train_ls.append(log_rmse(net, train_features, train_labels)) if test_labels is not None: test_ls.append(log_rmse(net, test_features, test_labels)) return train_ls, test_ls
def train(net, train_features, train_labels, test_features, test_labels, num_epochs, learning_rate, weight_decay, batch_size): train_ls, test_ls = [], [] train_iter = d2l.load_array((train_features, train_labels), batch_size) # 这里使用的是Adam优化算法 optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): for X, y in train_iter: optimizer.zero_grad() l = loss(net(X), y) #l = log_rmse(net(X), y) #l = myloss(net(X), y) l.backward() optimizer.step() #train_ls.append(log_rmse(net(train_features), train_labels).item()) train_ls.append(loss(net(train_features), train_labels).item()) if test_labels is not None: #test_ls.append(log_rmse(net(test_features), test_labels).item()) test_ls.append(loss(net(test_features), test_labels).item()) return train_ls, test_ls
train(float(λ), animator) for λ in torch.range(2, 20, 5): train(float(λ), animator) plt.show() """ # ---------------------------------------------------------------------------- # 1.1.2 # What do you observe? Use a validation set to find the optimal value of λ. # Is it really the optimal value? Does this matter? n_train, n_test, num_inputs, batch_size = 20, 100, 200, 5 n_eval = 20 true_w, true_b = torch.ones((num_inputs, 1)) * 0.01, 0.05 train_data = d2l.synthetic_data(true_w, true_b, n_train) train_iter = d2l.load_array(train_data, batch_size) test_data = d2l.synthetic_data(true_w, true_b, n_test) test_iter = d2l.load_array(test_data, batch_size, is_train=False) eval_data = d2l.synthetic_data(true_w, true_b, n_eval) eval_iter = d2l.load_array(eval_data, batch_size, is_train=False) eval_epochs, num_epochs = 5, 10 def init_params(): w = torch.normal(0, 1, size=(num_inputs, 1), requires_grad=True) b = torch.zeros(1, requires_grad=True) λ = torch.zeros(1, requires_grad=True) return [w, b, λ] def sgd(params, lr, batch_size): with torch.no_grad(): for param in params:
import torch.nn as nn T = 1000 # Generate a total of 1000 points time = torch.arange(1, T + 1, dtype=torch.float32) x = torch.sin(0.01 * time) + torch.normal(0, 0.2, (T,)) d2l.plot(time, [x], 'time', 'x', xlim=[1, 1000], figsize=(6, 3)) tau = 4 features = torch.zeros((T - tau, tau)) for i in range(tau): features[:, i] = x[i: T - tau + i] labels = d2l.reshape(x[tau:], (-1, 1)) batch_size, n_train = 16, 600 # Only the first `n_train` examples are used for training train_iter = d2l.load_array((features[:n_train], labels[:n_train]),batch_size, is_train=True) # Function for initializing the weights of the network def init_weights(m): if type(m) == nn.Linear: torch.nn.init.xavier_uniform_(m.weight) # A simple MLP def get_net(): net = nn.Sequential(nn.Linear(4, 10), nn.ReLU(), nn.Linear(10, 1)) net.apply(init_weights) return net # Square loss
# %% train_tokens = d2l.tokenize(train_data[0], token='word') vocab = d2l.Vocab(train_tokens, min_freq=5, reserved_tokens=['<pad>']) d2l.set_figsize() d2l.plt.hist([len(line) for line in train_tokens], bins=range(0,1000,50)) #%% num_steps = 500 train_features = torch.tensor([d2l.truncate_pad( vocab[line], num_steps, vocab['<pad>']) for line in train_tokens]) print(train_features.shape) #%% train_iter = d2l.load_array((train_features, torch.tensor(train_data[1])), 64) for X, y in train_iter: print('X:', X.shape, ',y:', y.shape) break print('#batches:', len(train_iter)) #%% def read_imdb(data_dir, is_train): data, labels = [], [] for label in ('pos','neg'): folder_name = os.path.join(data_dir, 'train' if is_train else 'test', label) for file in os.listdir(folder_name): with open(os.path.join(folder_name, file), 'rb') as f: review = f.read().decode('utf-8').replace('\n','') data.append(review)