def vgg(conv_arch, fc_features, fc_hidden_utils=4096): net = nn.Sequential() # 卷积层部分 for i, (num_convs, in_channels, out_channels) in enumerate(conv_arch): # 每经过一个vgg_block都会使得高宽减半 net.add_module('vgg_block_' + str(i + 1), vgg_block(num_convs, in_channels, out_channels)) # 全连接层 net.add_module( "fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(fc_features, fc_hidden_utils), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_utils, fc_hidden_utils), nn.ReLU(), nn.Dropout(0.5), nn.Linear(fc_hidden_utils, 10))) return net
self.moving_var, eps=1e-5, momentum=0.9) return Y net = nn.Sequential( nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size BatchNorm(num_features=6, num_dims=4), nn.Sigmoid(), nn.MaxPool2d(2, 2), nn.Conv2d(6, 16, 5), BatchNorm(num_features=16, num_dims=4), nn.Sigmoid(), nn.MaxPool2d(2, 2), d2l.FlattenLayer(), nn.Linear(16 * 4 * 4, 120), BatchNorm(num_features=120, num_dims=2), nn.Sigmoid(), nn.Linear(120, 84), BatchNorm(num_features=84, num_dims=2), nn.Sigmoid(), nn.Linear(84, 10) ) batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size) lr, num_epochs = 0.001, 5
b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32), Inception(256, 128, (128, 192), (32, 96), 64), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64), Inception(512, 160, (112, 224), (24, 64), 64), Inception(512, 128, (128, 256), (24, 64), 64), Inception(512, 112, (144, 288), (32, 64), 64), Inception(528, 256, (160, 320), (32, 128), 128), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128), Inception(832, 384, (192, 384), (48, 128), 128), d2l.GlobalAvgPool2d()) net = nn.Sequential(b1, b2, b3, b4, b5, d2l.FlattenLayer(), nn.Linear(1024, 10)) X = torch.rand(1, 1, 96, 96) for blk in net.children(): X = blk(X) print('output shape: ', X.shape) # 5.9.3 获取数据和训练模型 batch_size = 128 # 如出现“out of memory”的报错信息,可减小batch_size或resize train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
dim=1) == y_f).float().sum().item() else: acc_sum += (net(X).argmax(dim=1) == y_f).float().sum().item() n += y_f.shape[0] return acc_sum / n # 3.13.2.3 训练和测试模型 num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr) # 3.13.3 简洁实现 net = nn.Sequential(d2l.FlattenLayer(), nn.Linear(num_inputs, num_hiddens1), nn.ReLU(), nn.Dropout(drop_prob1), nn.Linear(num_hiddens1, num_hiddens2), nn.ReLU(), nn.Dropout(drop_prob2), nn.Linear(num_hiddens2, 10)) for param in net.parameters(): nn.init.normal_(param, mean=0, std=0.01) optimizer = torch.optim.SGD(net.parameters(), lr=0.5) d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer) # X = torch.arange(16).view(2, 8) # res = dropout(X, 0.5) # print(res)
print(blk(X).shape) # ResNet # 5.11.2 ResNet模型 net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) net.add_module("resnet_block1", Func.resnet_block(64, 64, 2, first_block=True)) net.add_module("resnet_block2", Func.resnet_block(64, 128, 2)) net.add_module("resnet_block3", Func.resnet_block(128, 256, 2)) net.add_module("resnet_block4", Func.resnet_block(256, 512, 2)) net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 512, 1, 1) net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10))) # 四维到二维 X = torch.rand((1, 1, 224, 224)) # batch_size, in_channel, n_H, n_w for name, layer in net.named_children(): X = layer(X) print(name, ' output shape: \t', X.shape) # 5.11.3 获取数据和训练模型 batch_size = 256 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96) lr, num_epochs = 0.001, 5 optimizer = torch.optim.Adam(net.parameters(), lr=lr) d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)