# -*- coding:utf-8 -*- import torch from IPython import display from d2l import torch as d2l batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) # 1、初始化模型参数 # 使用正态分布初始化权重W,偏置初始化为0 num_imputs = 784 num_outputs = 10 W = torch.normal(0, 0.01, size=(num_imputs, num_outputs), requires_grad=True) b = torch.zeros(num_outputs, requires_grad=True) # 2、定义softmax操作 # X = torch.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # X.sum(0, keepdim=True), X.sum(1, keepdim=True) def softmax(X): X_exp = torch.exp(X) partition = X_exp.sum(1, keepdim=True) return X_exp / partition # 3、定义模型 def net(X): return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b) # 4、定义损失函数
timer.stop() train_loss = metric[0] / metric[2] train_acc = metric[1] / metric[2] if (i + 1) % 50 == 0: print(f"epoch: {epoch} --- iter: {i} --- of {len(train_iter)}") print(f"train loss: {train_loss} --- train acc: {train_acc}") test_acc = evaluate_accuracy_gpu(net, test_iter) print(f'loss {train_loss:.3f}, train acc {train_acc:.3f}, ' f'test acc {test_acc:.3f}') print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec ' f'on {str(device)}') print("train") batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size) lr, num_epochs = 0.9, 2 train_func(net, train_iter, test_iter, num_epochs, lr) print("-------------------------------------------") print("alex net modifications") net = nn.Sequential(nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(), nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5),