def vgg(conv_arch, fc_features, fc_hidden_units=4096): net = nn.Sequential() # 卷积层部分 for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch): # 每经过一个vgg_block都会使宽高减半 net.add_module("vgg_block_" + str(i + 1), vgg_block(num_convs, in_channels, out_channels)) # 全连接层部分 net.add_module( "fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, fc_hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_units, 10))) return net
return F.avg_pool2d(x, kernel_size=x.size()[2:]) net = nn.Sequential( nin_block(1, 96, kernel_size=11, stride=4, padding=0), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(96, 256, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(256, 384, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.Dropout(0.5), # 标签类别数是10 nin_block(384, 10, kernel_size=3, stride=1, padding=1), GlobalAvgPool2d(), # 将四维的输出转成二维的输出,其形状为(批量大小, 10) d2l.FlattenLayer()) X = torch.rand(1, 1, 224, 224) for name, blk in net.named_children(): X = blk(X) print(name, 'output shape: ', X.shape) batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize #train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs = 0.002, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
# 保存更新过的moving_mean和moving_var, Module实例的traning属性默认为true, 调用.eval()后设成false Y, self.moving_mean, self.moving_var = batch_norm(self.training, X, self.gamma, self.beta, self.moving_mean, self.moving_var, eps=1e-5, momentum=0.9) return Y 基于LeNet的应用 net = nn.Sequential( nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size BatchNorm(6, num_dims=4), nn.Sigmoid(), nn.MaxPool2d(2, 2), # kernel_size, stride nn.Conv2d(6, 16, 5), BatchNorm(16, num_dims=4), nn.Sigmoid(), nn.MaxPool2d(2, 2), d2l.FlattenLayer(), nn.Linear(16*4*4, 120), BatchNorm(120, num_dims=2), nn.Sigmoid(), nn.Linear(120, 84), BatchNorm(84, num_dims=2), nn.Sigmoid(), nn.Linear(84, 10) ) print(net) Sequential( (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) (1): BatchNorm() (2): Sigmoid() (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
def forward(self, x): return F.avg_pool2d(x, kernel_size=x.size()[2:]) net = nn.Sequential( nin_block(1, 96, kernel_size=11, stride=4, padding=0), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(96, 256, kernel_size=5, stride=1, padding=2), nn.MaxPool2d(kernel_size=3, stride=2), nin_block(256, 384, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(kernel_size=3, stride=2), nn.Dropout(0.5), # 标签类别数是10 nin_block(384, 10, kernel_size=3, stride=1, padding=1), GlobalAvgPool2d(), # 将四维的输出转成二维的输出,其形状为(批量大小, 10) d2l.FlattenLayer()) X = torch.rand(1, 1, 224, 224) for name, blk in net.named_children(): X = blk(X) print(name, 'output shape: ', X.shape) 0 output shape: torch.Size([1, 96, 54, 54]) 1 output shape: torch.Size([1, 96, 26, 26]) 2 output shape: torch.Size([1, 256, 26, 26]) 3 output shape: torch.Size([1, 256, 12, 12]) 4 output shape: torch.Size([1, 384, 12, 12]) 5 output shape: torch.Size([1, 384, 5, 5]) 6 output shape: torch.Size([1, 384, 5, 5]) 7 output shape: torch.Size([1, 10, 5, 5]) 8 output shape: torch.Size([1, 10, 1, 1]) 9 output shape: torch.Size([1, 10]) batch_size = 128
net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) epoch 1, loss 0.0046, train acc 0.549, test acc 0.704 epoch 2, loss 0.0023, train acc 0.785, test acc 0.737 epoch 3, loss 0.0019, train acc 0.825, test acc 0.834 epoch 4, loss 0.0017, train acc 0.842, test acc 0.763 epoch 5, loss 0.0016, train acc 0.848, test acc 0.813 简洁实现 net = nn.Sequential( d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, 10) ) for param in net.parameters(): nn.init.normal_(param, mean=0, std=0.01) optimizer = torch.optim.SGD(net.parameters(), lr=0.5) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) epoch 1, loss 0.0046, train acc 0.553, test acc 0.736 epoch 2, loss 0.0023, train acc 0.785, test acc 0.803
blk = [] for i in range(num_residuals): if i == 0 and not first_block: blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2)) #这里由于更改了stride参数,输出矩阵大小变为输入的一半 else: blk.append(Residual(out_channels, out_channels)) return nn.Sequential(*blk) net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True)) net.add_module("resnet_block2", resnet_block(64, 128, 2)) net.add_module("resnet_block3", resnet_block(128, 256, 2)) net.add_module("resnet_block4", resnet_block(256, 512, 2)) net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 512, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10))) #依据计算能力选择 batch_size = 0 #batch_size = 256 batch_size=16 assert batch_size > 0, '未选择batch_size' #读取数据,使用其他数据集时更改该部分 def load_data_fashion_mnist(batch_size, resize=None, root='/home/kesci/input/FashionMNIST2065'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: trans.append(torchvision.transforms.Resize(size=resize)) trans.append(torchvision.transforms.ToTensor())
net.add_module("DenseBlosk_%d" % i, DB) # 上一个稠密块的输出通道数 num_channels = DB.out_channels # 在稠密块之间加入通道数减半的过渡层 if i != len(num_convs_in_dense_blocks) - 1: net.add_module("transition_block_%d" % i, transition_block(num_channels, num_channels // 2)) num_channels = num_channels // 2 net.add_module("BN", nn.BatchNorm2d(num_channels)) net.add_module("relu", nn.ReLU()) net.add_module( "global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, num_channels, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_channels, 10))) #依据计算能力选择 batch_size = 0 #batch_size = 256 batch_size = 16 assert batch_size > 0, '未选择batch_size' #读取数据,使用其他数据集时更改该部分 def load_data_fashion_mnist(batch_size, resize=None, root='/home/kesci/input/FashionMNIST2065'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: