def main(): # 3.6.1 获取和读取数据 batch_size = 256 train_iter, test_iter = d2l.load_data_from_fashion_mnist(batch_size) # 3.6.2 初始化模型参数 num_inputs = 784 num_outputs = 10 W = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float32) b = torch.zeros(num_outputs, dtype=torch.float32) W.requires_grad_(True) b.requires_grad_(True) # 3.6.3 实现 softmax 运算 def softmax(X): X_exp = X.exp() partition = X_exp.sum(dim=1, keepdim=True) return X_exp / partition # 3.6.4 定义模型 def net(X): return softmax(torch.mm(X.view((-1, num_inputs)), W) + b) # 3.6.5 定义损失函数 def cross_entropy(y_hat, y): return -torch.log(y_hat.gather(1, y.view(-1, 1))) # 3.6.6 计算分类准确率(已在 d2lzh 包中实现) def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item() # 3.6.7 训练模型 num_epochs, lr = 5, 0.1 d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) # 3.6.8 预测 X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [ true + '\n' + pred for true, pred in zip(true_labels, pred_labels) ] d2l.show_fashion_mnist(X[0:9], titles[0:9])
def main(): # 3.5.1 获取数据集 mnist_train = torchvision.datasets.FashionMNIST( root="./data/FashionMNIST", train=True, download=True, transform=transforms.ToTensor()) mnist_test = torchvision.datasets.FashionMNIST( root="./data/FashionMNIST", train=False, download=True, transform=transforms.ToTensor()) feature, label = mnist_train[0] print(feature.shape, label) X, y = [], [] for i in range(10): X.append(mnist_train[i][0]) y.append(mnist_train[i][1]) d2l.show_fashion_mnist(X, d2l.get_fashion_mnist_labels(y)) # 3.5.2 读取小批量 batch_size = 256 train_iter = Data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=0) test_iter = Data.DataLoader(mnist_test, batch_size=batch_size, shuffle=True, num_workers=0) # 查看读取一遍数据需要的时间 start = time.time() for X, y in train_iter: continue for X, y in test_iter: continue print("%.2f secs" % (time.time() - start))
if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: d2l.sgd(params, lr, batch_size) else: optimizer.step() # “softmax回归的简洁实现”一节将用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[0:9], titles[0:9])
l = loss(y_hat, y).sum() if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: d2l.sgd(params, lr, batch_size) else: optimizer.step() train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) '''X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[0:9], titles[0:9])'''
num_workers=num_workers) print('train len is ', len(mnist_train), 'test len is ', len(mnist_test)) # 可以使用下标访问任意一个样本 feature, label = mnist_train[0] print(feature.shape, label) # 将数值标签转换成对应的类别文本标签 def get_fashion_mnist_labels(labels): text_labels = [ 't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot' ] return [text_labels[int(i)] for i in labels] x, y = [], [] for i in range(10): x.append(mnist_train[i][0]) y.append(mnist_test[i][1]) d2l.show_fashion_mnist(x, get_fashion_mnist_labels(y)) start = time.time() for x, y in train_iter: continue print('load train data spend %.2f sec' % (time.time() - start))
import torchvision.transforms as transforms import matplotlib.pyplot as plt import time import sys sys.path.append("..") # 为了导⼊上层⽬录的d2lzh_pytorch import d2lzh_pytorch as d2l # %% mnist_train = torchvision.datasets.FashionMNIST( root='Datasets', train=True, download=True, transform=transforms.ToTensor()) mnist_test = torchvision.datasets.FashionMNIST(root='Datasets', train=False, download=True, transform=transforms.ToTensor()) print(type(mnist_train)) print(len(mnist_train), len(mnist_test)) # %% feature, label = mnist_train[0] print(feature.shape, label) # %% X, y = [], [] for i in range(10): X.append(mnist_train[i][0]) y.append(mnist_train[i][1]) d2l.show_fashion_mnist(X, d2l.get_fashion_mnist_labels(y)) # %%
# 定义交叉熵损失函数 def cross_entropy(y_hat, y): # -1 表示该维度大小由其他维度决定 # gather(dim, index) 可以理解为映射, index(即y)是一个tensor,它的dim和必须和y_hat一样 # 以y的值为下标,从y_hat中抽取对应的值 # 比如 torch.gather(t, 1, torch.tensor([[0,0],[1,0]])) 的结果为 tensor([[1, 1], [4, 3]]) return -torch.log(y_hat.gather(1, y.view(-1, 1))).sum() / y_hat.shape[0] # 设置参数 batch_size, num_epochs, lr = 256, 3, 0.1 # 获取数据 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 训练模型 d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr) # 预测 X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) # 绘制 titles = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] for i in range(2): # test[:] 记住冒号右边的下标的元素不会包含在目标列表中! d2l.show_fashion_mnist(X[(5 * i):(5 * (i + 1))], titles[(5 * i):(5 * (i + 1))])
X_train_tensor = torch.from_numpy(X_train).to(torch.float32).view( -1, 1, 28, 28) * (1 / 255.0) X_test_tensor = torch.from_numpy(X_test).to(torch.float32).view( -1, 1, 28, 28) * (1 / 255.0) y_train_tensor = torch.from_numpy(y_train).to(torch.int64).view(-1, 1) y_test_tensor = torch.from_numpy(y_test).to(torch.int64).view(-1, 1) import torch.utils.data as Data mnist_train = Data.TensorDataset(X_train_tensor, y_train_tensor) mnist_test = Data.TensorDataset(X_test_tensor, y_test_tensor) feature, label = mnist_train[0] X, y = [], [] for i in range(10): X.append(mnist_train[i][0]) y.append(mnist_test[i][1]) d2l.show_fashion_mnist(X, d2l.get_fashion_mnist_labels(y)) #%% batch_size = 256 if sys.platform.startswith('win'): num_workers = 0 #表示不用额外的进程来加速读取数据 else: num_workers = 4 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) #%%
## Define model. def net(X): ## -1 stands for single dimension return softmax(torch.mm(X.view((-1, num_inputs)), W)) ## Define loss function. def cross_entropy(y_hat, y): return -torch.log(y_hat.gather(1, y.view(-1, 1))) def accuracy(y_hat, y): return (y_hat.argmax(dim=1) == y).float().mean().item() num_epoches, lr = 5, 0.1 ## Train model. d2l.train_ch3(net, train_iter, test_iter, cross_entropy, num_epoches, batch_size, [W, b], lr) ## Eval model. X, y = iter(test_iter).next() true_labels = d2l.get_fashion_mnist_labels(y.numpy()) pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy()) title = [true + '\n' + pred for true, pred in zip(true_labels, pred_labels)] d2l.show_fashion_mnist(X[:9], title[:9], 10)