def fit_and_plot_elegant(wd): net = nn.Linear(num_inputs, 1) nn.init.normal_(net.weight, mean=0, std=1) nn.init.constant_(net.bias, val=0) # 对权重参数衰减 optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) # 不对偏差参数衰减 optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) train_loss, test_loss = [], [] for _ in range(num_epochs): for X, y in train_iter: batch_loss = loss(net(X), y).mean() optimizer_w.zero_grad() optimizer_b.zero_grad() batch_loss.backward() optimizer_w.step() optimizer_b.step() train_loss.append( loss(net(train_features), train_labels).mean().item()) test_loss.append( loss(net(test_features), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_loss, "epoch", "loss", range(1, num_epochs + 1), test_loss, ["train", "test"]) print("L2 norm of w: ", net.weight.data.norm().item())
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net(train_features.shape[1]) train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') plt.show() print('train rmse %f' % train_ls[-1]) preds = net(test_features).detach().numpy() # detach()的主要用途是将有梯度的变量变成没有梯度 # 因为网络内的值或输出的值都有梯度,所以要想将值转换成其他类型,都需要先去掉梯度。 ## 以下if语句下的内容为个人增补部分,其目的在于计算预测值preds与实际标签的差距 ## E-mail:[email protected] ## 若不想运行此代码将if True:更改为 ## if False: # if True: # test_labels = torch.tensor(test_data.SalePrice.values, dtype=torch.float).view(-1, 1) # test_rmse = torch.sqrt(loss(torch.from_numpy(preds).log(), test_labels.log())) # print('test rmse %f' % test_rmse) # 针对以上增加的部分,如果运行,会报错,究其原因,原来是我们从网站上下载下来的test.csv数据中就不包含有房价SalePrice项 # 所以test_labels的赋值一步就会出错,但是若测试数据中提供了房价预测SalePrice项,运行上述代码,不会出错。 test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('./House_price_prediction/submission.csv', index=False)
def fit_and_plot(train_features, test_features, train_labels, test_labels): net = torch.nn.Linear(train_features.shape[-1], 1) # 通过Linear文档可知,pytorch已经将参数初始化了,所以我们这里就不手动初始化了 batch_size = min(10, train_labels.shape[0]) dataset = torch.utils.data.TensorDataset(train_features, train_labels) train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True) optimizer = torch.optim.SGD(net.parameters(), lr=0.01) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X), y.view(-1, 1)) optimizer.zero_grad() l.backward() optimizer.step() train_labels = train_labels.view(-1, 1) test_labels = test_labels.view(-1, 1) train_ls.append(loss(net(train_features), train_labels).item()) test_ls.append(loss(net(test_features), test_labels).item()) print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1]) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('weight:', net.weight.data, '\nbias:', net.bias.data)
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] # times_sum = 0 #记录次数,Debug时用 for _ in range(num_epochs): for X, y in train_iter: # times_sum += 1 # l2_penalty(w)为添加了L2范数惩罚项 # net(X, w, b)为求X×w+b,即代入参数求模型输出 # loss求的是模型输出和训练的label(标签)之间的方差 l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() d2l.sgd([w, b], lr, batch_size) # print(times_sum) train_ls.append( loss(net(train_features, w, b), train_labels).mean().item()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().item()) #上述循环for _ in range(num_epochs):走完后 #train_ls的长度为100;test_ls长度也为100 #之所以都为100,是因为我们设定的num_epochs(迭代次数)为100 #train_ls和test_ls每个元素表示,在一个迭代后,训练集和测试集的模型偏差大小。 d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', w.norm().item())
def fit_and_plot_pytorch(wd): # 对权重参数衰减,权重名称一般以Weight结尾 net = nn.Linear(num_inputs, 1) nn.init.normal_(net.weight, mean=0, std=1) nn.init.normal_(net.bias, mean=0, std=1) optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) # 对权重衰减 optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X), y).mean() optimizer_w.zero_grad() optimizer_b.zero_grad() l.backward() #对Optimize实例分别调用step函数, 分别更新权重和偏差 optimizer_w.step() optimizer_b.step() train_ls.append(loss(net(train_features), train_labels).mean().item()) test_ls.append(loss(net(test_features), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net.weight.data.norm().item())
def fit_and_plot_pytorch(wd): # wd = weight_dency net = torch.nn.Linear(num_inputs, 1) # 构造不同优化器对权重和偏差不同处理 optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) train_ls, test_ls = [], [] for epoch in range(num_epochs): for X, y in data_iter: # 因为优化器已说明是否进行 regularization # 所以这里就不需要了 l = loss(net(X), y) optimizer_b.zero_grad() optimizer_w.zero_grad() l.backward() optimizer_w.step() optimizer_b.step() train_ls.append( loss(net(features[:n_train, :]), labels[:n_train]).item()) test_ls.append( loss(net(features[n_train:, :]), labels[n_train:]).item()) print('final epoch: train_loss ', train_ls[-1], 'test_loss ', test_ls[-1]) print('L2 norm of w', net.weight.data.norm()) # 绘制误差曲线 d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epoch', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) # data元组中包含四个张量: # 从前往后依次是:通过K折交叉验证得到的 训练集特征;训练集标签;验证集特征;验证集标签 # 其中训练用的数据集长度为K-1个子数据集长度(本案例中为1168);验证用的数据集长度为1个子数据集长度(本案例中为292) net = get_net(X_train.shape[1]) train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) # data前的*作用在于将data元组进行解包 # 对于本案例来说,相当如对train()函数传入了,四个张量,分别是:训练集特征;训练集标签;验证集特征;验证集标签 # train_ls, valid_ls分别为第i折的训练集和测试集经过num_epochs次迭代后的网络偏差值 train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] # 将最新的偏差值进行累加,得到目前网络在训练集和测试集上的偏差之和 if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) plt.show() # 这个判断语句的意义在于:绘制出第一次K折交叉验证后当前网络的偏差随着迭代次数的增加而变化的规律 print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) #打印出第i次K折交叉验证后当前网络在训练集和测试集上的偏差 return train_l_sum / k, valid_l_sum / k
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, weight_decay, batch_size): net = get_net(train_features.shape[1]) train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) preds = net(test_features).detach().numpy() test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0]) submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1) submission.to_csv('./submission.csv', index=False)
def k_fold(k, X_train, y_train, num_epochs, lr, wd, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net(X_train.shape[1]) train_ls, valid_ls = train(net, *data, num_epochs, lr, wd, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['epochs', 'valid']) print("fold %d,train rmse %f, valid rmse %f" % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def train_and_pred(train_features, test_features, train_labels, test_data, num_epochs, lr, wd, batch_size): # net为线性回归 net = get_net(train_features.shape[1]) train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, wd, batch_size) # print("parameters:", list(net.parameters())) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse') print('train rmse %f' % train_ls[-1]) pred = net( test_features).detach().numpy() # detach() 切断向前传播,requires_grad=false test_data['SalePrice'] = pd.Series(pred.reshape(1, -1)[0]) submission = pd.concat([test_data["Id"], test_data['SalePrice']], axis=1) submission.to_csv('../../test/sources/data/kaggle_house/submission.csv', index=False)
def k_fold(k, x_train, y_train, num_epochs, lr, weight_decay, batch_size): train_l_sum, valid_l_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, x_train, y_train) net = regNet(x_train.shape[1], 1) train_ls, valid_ls = train(net, *data, num_epochs, lr, weight_decay, batch_size) train_l_sum += train_ls[-1] # 加入最后一轮的损失 valid_l_sum += valid_ls[-1] # if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) print('fold %d, train rmse %f, valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def train_and_pred(train_features, train_labels, test_features, test_data, num_epochs, learning_rate, weight_decay, batch_size): net = get_net(train_features.shape[1]) # 训练模型 train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, learning_rate, weight_decay, batch_size) print('train rmse %f' % (train_ls[-1])) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epoch', 'rmse') # 得到结果并保存 # detach()切断反向传播,即返回一个不具有梯度grad的复制变量,但仍指向原变量的存储位置 preds = net(test_features).detach().numpy() test_data['SalePrice'] = pd.Series(preds.reshape(-1)) # 将结果拼接到原数据上 submission = pd.concat((test_data['Id'], test_data['SalePrice']), axis=1) # 抽取Id和SalePrice列合并 submission.to_csv('./Datasets/kaggle_house/submission.csv', index=False) # index(bool): Write row names (index).
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, valid_l_sum = 0.0, 0.0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net(X_train.shape[1]) train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] valid_l_sum += valid_ls[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epoch', 'rmse', range(1, num_epochs + 1), valid_ls, ['train', 'valid']) d2l.plt.show() print('fold %d,train rmse %f,valid rmse %f' % (i, train_ls[-1], valid_ls[-1])) return train_l_sum / k, valid_l_sum / k
def fit_and_plot(lambd): w, b = init_params() train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() d2l.sgd([w, b], lr, batch_size) train_ls.append(loss(net(train_features, w, b), train_labels).mean().item()) test_ls.append(loss(net(test_features, w, b), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', w.norm().item())
def fit_and_plot(lamb): w, b = init_params() train_ls, test_ls = [], [] for epoch in range(num_epochs): for X, y in data_iter: # 添了L2范数惩罚项 l = loss(net(X, w, b), y) + lamb * l2_penalty(w) if w.grad is not None: # 第一次求导前参数是没梯度的 w.grad.data.zero_() b.grad.data.zero_() l.backward() # 求导 d2l.sgd([w, b], lr) # 更新参数 train_ls.append( loss(net(features[:n_train, :], w, b), labels[:n_train]).item()) test_ls.append( loss(net(features[n_train:, :], w, b), labels[n_train:]).item()) print('final epoch: train_loss ', train_ls[-1], 'test_loss ', test_ls[-1]) print('L2 norm of w', w.norm().item()) # 绘制误差曲线 d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epoch', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
def k_fold_train(k, train_features, train_labels, num_epochs, learning_rate, weight_decay, batch_size): train_l_sum, val_l_sum = 0.0, 0.0 for i in range(k): # data:(train_features, train_labels, test_features, test_labels) data = get_k_fold_data(k, i, train_features, train_labels) net = get_net(train_features.shape[1]) # *data 可以一次将数据传给四个参数,想一下指针 train_ls, val_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_l_sum += train_ls[-1] # 累加每一折的误差,最后计算平均值 val_l_sum += val_ls[-1] # 同上 print('fold %d, train rmse %f, valid rmse %f' % (i + 1, train_ls[-1], val_ls[-1])) if i == 0: # 取k折中的1折,绘制 epoch-rmse 图观察模型训练效果 d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epoch', 'rmse', range(1, num_epochs + 1), val_ls, ['train', 'valid']) return train_l_sum / k, val_l_sum / k
def fit_and_plot_pytorch(lambd): net = nn.Linear(num_inputs, 1) nn.init.normal_(net.weight, mean=0, std=1) nn.init.normal_(net.bias, mean=0, std=1) optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=lambd) # 只对权重参数衰减 optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) # 不对偏差参数衰减 train_ls, test_ls = [], [] for _ in range(num_epochs): for feature, label in train_iter: output = net(feature) l = loss(output, label) l.backward() optimizer_w.step() optimizer_b.step() optimizer_w.zero_grad() optimizer_b.zero_grad() train_ls.append(loss(net(train_features), train_labels).mean().item()) test_ls.append(loss(net(test_features), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) print('L2 norm of w:', net.weight.data.norm().item())
def fit_and_plot(lambd): w, b = init_params() train_loss, test_loss = [], [] for _ in range(num_epochs): for X, y in train_iter: batch_loss = (loss(net(X, w, b), y) + lambd * l2_penalty(w)) batch_loss = batch_loss.mean() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() batch_loss.backward() d2l.sgd([w, b], lr) train_loss.append( loss(net(train_features, w, b), train_labels).mean().item()) test_loss.append( loss(net(test_features, w, b), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_loss, "epochs", "loss", range(1, num_epochs + 1), test_loss, ["train", "test"]) print("L2 norm of w: ", w.norm().item())
def fit_and_plot(lambd): w, b = init_params() optimizer = optim.SGD([w, b], lr=lr) train_ls, test_ls = [], [] for _ in range(num_epochs): for X, y in train_iter: # 添加了L2范数惩罚项 l = loss(net(X, w, b), y) + lambd * l2_penalty(w) l = l.sum() if w.grad is not None: w.grad.data.zero_() b.grad.data.zero_() l.backward() # d2l.sgd([w, b], lr, batch_size) optimizer.step() train_ls.append( loss(net(train_features, w, b), train_labels).mean().item()) test_ls.append( loss(net(test_features, w, b), test_labels).mean().item()) d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test']) # plt.semilogx函数,其中常用的是semilogy函数,即后标为x的是在x轴取对数,为y的是y轴坐标取对数。loglog是x y轴都取对数。 print('L2 norm of w:', w.norm().item())
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): """做 K 折交叉验证 """ train_loss_sum, valid_loss_sum = 0, 0 for i in range(k): data = get_k_fold_data(k, i, X_train, y_train) net = get_net(X_train.shape[1]) train_loss, valid_loss = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_loss_sum += train_loss[-1] valid_loss_sum += valid_loss[-1] if i == 0: d2l.semilogy(range(1, num_epochs + 1), train_loss, "epochs", "rmse", range(1, num_epochs + 1), valid_loss, ["train", "valid"]) print("fold %d, train rmse %f, valid rmse %f" % (i, train_loss[-1], valid_loss[-1])) return train_loss_sum / k, valid_loss_sum / k