def _createFeatures(self, opt): layers = [ InitialBlock(opt=opt, out_channels=128, kernel_size=5, stride=1, padding=2) ] layers += [ ConvBlock(opt=opt, in_channels=128, out_channels=96, kernel_size=1) ] layers += [ ConvBlock(opt=opt, in_channels=96, out_channels=48, kernel_size=1) ] layers += [getattr(nn, opt.pooltype)(kernel_size=3, stride=2)] #layers += [nn.Dropout(opt.drop_rate)] layers += [ ConvBlock(opt=opt, in_channels=48, out_channels=128, kernel_size=5, stride=1, padding=2) ] layers += [ ConvBlock(opt=opt, in_channels=128, out_channels=96, kernel_size=1) ] layers += [ ConvBlock(opt=opt, in_channels=96, out_channels=48, kernel_size=1) ] layers += [getattr(nn, opt.pooltype)(kernel_size=3, stride=2)] #layers += [nn.Dropout(opt.drop_rate)] layers += [ ConvBlock(opt=opt, in_channels=48, out_channels=128, kernel_size=3, stride=1, padding=1) ] layers += [ ConvBlock(opt=opt, in_channels=128, out_channels=96, kernel_size=1) ] layers += [ ConvBlock(opt=opt, in_channels=96, out_channels=opt.num_classes, kernel_size=1) ] layers += [nn.AdaptiveAvgPool2d(1)] return layers
def __init__(self, opt, block, layers, zero_init_residual=False, groups=1, width_per_group=64): super(ResNetBase, self).__init__() self.inplanes = 64 self.opt = opt self.groups = groups self.base_width = width_per_group self.conv1block = InitialBlock(opt, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(opt=opt, block=block, planes=64, blocks=layers[0]) self.layer2 = self._make_layer(opt=opt, block=block, planes=128, blocks=layers[1], stride=2) self.layer3 = self._make_layer(opt=opt, block=block, planes=256, blocks=layers[2], stride=2) self.layer4 = self._make_layer(opt=opt, block=block, planes=512, blocks=layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dim_out = in_channels = 512 * block.expansion self.final = FinalBlock(opt=opt, in_channels=512 * block.expansion) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, opt): super(ResNet, self).__init__() depth = opt.depth if depth in [20, 32, 44, 56, 110, 1202]: blocktype, self.nettype = 'BasicBlock', 'cifar' elif depth in [164, 1001]: blocktype, self.nettype = 'BottleneckBlock', 'cifar' elif depth in [18, 34]: blocktype, self.nettype = 'BasicBlock', 'imagenet' elif depth in [50, 101, 152]: blocktype, self.nettype = 'BottleneckBlock', 'imagenet' assert depth in [ 20, 32, 44, 56, 110, 1202, 164, 1001, 18, 34, 50, 101, 152 ] if blocktype == 'BasicBlock' and self.nettype == 'cifar': assert ( depth - 2 ) % 6 == 0, 'Depth should be 6n+2, and preferably one of 20, 32, 44, 56, 110, 1202' n = (depth - 2) // 6 block = BasicBlock in_planes, out_planes = 16, 64 elif blocktype == 'BottleneckBlock' and self.nettype == 'cifar': assert ( depth - 2 ) % 9 == 0, 'Depth should be 9n+2, and preferably one of 164 or 1001' n = (depth - 2) // 9 block = BottleneckBlock in_planes, out_planes = 16, 64 elif blocktype == 'BasicBlock' and self.nettype == 'imagenet': assert (depth in [18, 34]) num_blocks = [2, 2, 2, 2] if depth == 18 else [3, 4, 6, 3] block = BasicBlock in_planes, out_planes = 64, 512 #20, 160 elif blocktype == 'BottleneckBlock' and self.nettype == 'imagenet': assert (depth in [50, 101, 152]) if depth == 50: num_blocks = [3, 4, 6, 3] elif depth == 101: num_blocks = [3, 4, 23, 3] elif depth == 152: num_blocks = [3, 8, 36, 3] block = BottleneckBlock in_planes, out_planes = 64, 512 else: assert (1 == 2) self.num_classes = opt.num_classes self.initial = InitialBlock(opt=opt, out_channels=in_planes, kernel_size=3, stride=1, padding=1) if self.nettype == 'cifar': self.group1 = ResidualBlock(opt, block, 16, 16, n, stride=1) self.group2 = ResidualBlock(opt, block, 16 * block.expansion, 32, n, stride=2) self.group3 = ResidualBlock(opt, block, 32 * block.expansion, 64, n, stride=2) elif self.nettype == 'imagenet': self.group1 = ResidualBlock( opt, block, 64, 64, num_blocks[0], stride=1) #For ResNet-S, convert this to 20,20 self.group2 = ResidualBlock( opt, block, 64 * block.expansion, 128, num_blocks[1], stride=2) #For ResNet-S, convert this to 20,40 self.group3 = ResidualBlock( opt, block, 128 * block.expansion, 256, num_blocks[2], stride=2) #For ResNet-S, convert this to 40,80 self.group4 = ResidualBlock( opt, block, 256 * block.expansion, 512, num_blocks[3], stride=2) #For ResNet-S, convert this to 80,160 else: assert (1 == 2) self.pool = nn.AdaptiveAvgPool2d(1) self.dim_out = out_planes * block.expansion self.final = FinalBlock(opt=opt, in_channels=out_planes * block.expansion) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)