def __init__(self, n_class=19, in_size=(448, 896), width_mult=1., out_sec=256, aspp_sec=(12, 24, 36), norm_act=InPlaceABN): """ MobileNetV2Plus: MobileNetV2 based Semantic Segmentation :param n_class: (int) Number of classes :param in_size: (tuple or int) Size of the input image feed to the network :param width_mult: (float) Network width multiplier :param out_sec: (tuple) Number of the output channels of the ASPP Block :param aspp_sec: (tuple) Dilation rates used in ASPP """ super(MobileNetV2Plus, self).__init__() self.n_class = n_class # setting of inverted residual blocks self.interverted_residual_setting = [ # t, c, n, s, d [1, 16, 1, 1, 1], # 1/2 [6, 24, 2, 2, 1], # 1/4 [6, 32, 3, 2, 1], # 1/8 [6, 64, 4, 1, 2], # 1/8 [6, 96, 3, 1, 4], # 1/8 [6, 160, 3, 1, 8], # 1/8 [6, 320, 1, 1, 16], # 1/8 ] # building first layer assert in_size[0] % 8 == 0 assert in_size[1] % 8 == 0 self.input_size = in_size input_channel = int(32 * width_mult) self.mod1 = nn.Sequential( OrderedDict([("conv1", conv_bn(inp=3, oup=input_channel, stride=2))])) # building inverted residual blocks mod_id = 0 for t, c, n, s, d in self.interverted_residual_setting: output_channel = int(c * width_mult) # Create blocks for module blocks = [] for block_id in range(n): if block_id == 0 and s == 2: blocks.append(("block%d" % (block_id + 1), InvertedResidual(inp=input_channel, oup=output_channel, stride=s, dilate=1, expand_ratio=t))) else: blocks.append(("block%d" % (block_id + 1), InvertedResidual(inp=input_channel, oup=output_channel, stride=1, dilate=d, expand_ratio=t))) input_channel = output_channel self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) mod_id += 1 # building last several layers org_last_chns = (self.interverted_residual_setting[0][1] + self.interverted_residual_setting[1][1] + self.interverted_residual_setting[2][1] + self.interverted_residual_setting[3][1] + self.interverted_residual_setting[4][1] + self.interverted_residual_setting[5][1] + self.interverted_residual_setting[6][1]) self.last_channel = int( org_last_chns * width_mult) if width_mult > 1.0 else org_last_chns self.out_se = nn.Sequential( SCSEBlock(channel=self.last_channel, reduction=16)) if self.n_class != 0: self.aspp = nn.Sequential( ASPPInPlaceABNBlock(self.last_channel, out_sec, feat_res=(int(in_size[0] / 8), int(in_size[1] / 8)), aspp_sec=aspp_sec, norm_act=norm_act)) in_stag2_up_chs = self.interverted_residual_setting[1][ 1] + self.interverted_residual_setting[0][1] self.score_se = nn.Sequential( SCSEBlock(channel=out_sec + in_stag2_up_chs, reduction=16)) self.score = nn.Sequential( OrderedDict([("norm.1", norm_act(out_sec + in_stag2_up_chs)), ("conv.1", nn.Conv2d(out_sec + in_stag2_up_chs, out_sec + in_stag2_up_chs, kernel_size=3, stride=1, padding=2, dilation=2, bias=False)), ("norm.2", norm_act(out_sec + in_stag2_up_chs)), ("conv.2", nn.Conv2d(out_sec + in_stag2_up_chs, self.n_class, kernel_size=1, stride=1, padding=0, bias=True)), ("up1", nn.Upsample(size=in_size, mode='bilinear'))])) self._initialize_weights()
def __init__(self, small=False, classes=19, in_size=(448, 896), num_init_features=64, k_r=96, groups=4, k_sec=(3, 4, 20, 3), inc_sec=(16, 32, 24, 128), out_sec=(512, 256, 128), dil_sec=(1, 1, 1, 2, 4), aspp_sec=(7, 14, 21), norm_act=ABN): super(SEDPNShuffleNet, self).__init__() bw_factor = 1 if small else 4 # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 1. conv1 (N, 3, W, H)->(N, 64, W/4, H/4) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # if small: self.encode_in = nn.Sequential( OrderedDict([("conv_in", nn.Conv2d(3, num_init_features, kernel_size=3, stride=2, padding=1, bias=False)), ("bn_in", norm_act(num_init_features)), ("pool_in", nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])) else: self.encode_in = nn.Sequential( OrderedDict([("conv_in", nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False)), ("bn_in", norm_act(num_init_features)), ("pool_in", nn.MaxPool2d(kernel_size=3, stride=2, padding=1))])) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 2. conv2 (N, 64, W/4, H/4)->(N, 336, W/4, H/4) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # conv1x1c_ch = 64 * bw_factor # For 1x1c ch=64 OR 256 + inc inc = inc_sec[0] # For Dense ch=16 conv1x1a_ch = (k_r * conv1x1c_ch) // (64 * bw_factor) # For 1x1a ch=96 conv3x3b_ch = conv1x1a_ch # For 3x3b ch=96 encode_blocks1 = OrderedDict() encode_blocks1['conv2_1'] = DualPathInPlaceABNBlock(num_init_features, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[0], 'proj', norm_act=norm_act) in_chs = conv1x1c_ch + 3 * inc # 96+3*16=144 for i in range(2, k_sec[0] + 1): encode_blocks1['conv2_' + str(i)] = DualPathInPlaceABNBlock( in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[0], 'normal', norm_act=norm_act) in_chs += inc self.encode_stg1 = nn.Sequential(encode_blocks1) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 3. conv3 (N, 336, W/4, H/4)->(N, 704, W/8, H/8) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # conv1x1c_ch = 128 * bw_factor # For 1x1c ch=128 OR 512 + inc inc = inc_sec[1] # For Dense ch=32 conv1x1a_ch = (k_r * conv1x1c_ch) // (64 * bw_factor ) # For 1x1a ch=192 conv3x3b_ch = conv1x1a_ch # For 3x3b ch=192 encode_blocks2 = OrderedDict() encode_blocks2['conv3_1'] = DualPathInPlaceABNBlock(in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[1], 'down', norm_act=norm_act) in_chs = conv1x1c_ch + 3 * inc for i in range(2, k_sec[1] + 1): encode_blocks2['conv3_' + str(i)] = DualPathInPlaceABNBlock( in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[1], 'normal', norm_act=norm_act) in_chs += inc self.encode_stg2 = nn.Sequential(encode_blocks2) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 4. conv4 (N, 704, W/8, H/8)->(N, 1552, W/16, H/16) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # conv1x1c_ch = 256 * bw_factor # For 1x1c ch=256 OR 1024 + inc inc = inc_sec[2] # For Dense ch=24 conv1x1a_ch = (k_r * conv1x1c_ch) // (64 * bw_factor ) # For 1x1a ch=384 conv3x3b_ch = conv1x1a_ch # For 3x3b ch=384 encode_blocks3 = OrderedDict() encode_blocks3['conv4_1'] = DualPathInPlaceABNBlock(in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[2], 'down', norm_act=norm_act) in_chs = conv1x1c_ch + 3 * inc for i in range(2, int(k_sec[2] / 2) + 1): encode_blocks3['conv4_' + str(i)] = DualPathInPlaceABNBlock( in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[2], 'normal', norm_act=norm_act) in_chs += inc for i in range(int(k_sec[2] / 2) + 1, k_sec[2] + 1): encode_blocks3['conv4_' + str(i)] = DualPathInPlaceABNBlock( in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[3], 'normal', norm_act=norm_act) in_chs += inc self.encode_stg3 = nn.Sequential(encode_blocks3) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 5. conv5 (N, 1552, W/16, H/16)->(N, 2688, W/16, H/16) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # conv1x1c_ch = 512 * bw_factor # For 1x1c ch=512 OR 2048 + inc inc = inc_sec[3] # For Dense ch=128 conv1x1a_ch = (k_r * conv1x1c_ch) // (64 * bw_factor ) # For 1x1a ch=768 conv3x3b_ch = conv1x1a_ch # For 3x3b ch=768 encode_blocks4 = OrderedDict() encode_blocks4['conv5_1'] = DualPathInPlaceABNBlock(in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[4], 'proj', norm_act=norm_act) in_chs = conv1x1c_ch + 3 * inc for i in range(2, k_sec[3] + 1): encode_blocks4['conv5_' + str(i)] = DualPathInPlaceABNBlock( in_chs, conv1x1a_ch, conv3x3b_ch, conv1x1c_ch, inc, groups, dil_sec[4], 'normal', norm_act=norm_act) in_chs += inc encode_blocks4['conv5_bn_ac'] = CatInPlaceABN(in_chs) self.encode_stg4 = nn.Sequential(encode_blocks4) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 6. ASPP #1 (N, 2688, W/16, H/16)->(N, 512, W/8, H/8) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # self.aspp1 = nn.Sequential( OrderedDict([("aspp1", ASPPInPlaceABNBlock(in_chs, out_sec[0], feat_res=(int(in_size[0] / 16), int(in_size[1] / 16)), aspp_sec=aspp_sec, norm_act=norm_act))])) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 7. ASPP #2 (N, 1216, W/8, H/8)->(N, 256, W/4, H/4) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # self.aspp2_in = nn.Sequential( OrderedDict([("aspp2_in", CatInPlaceABN(704, norm_act=norm_act))])) self.aspp2 = nn.Sequential( OrderedDict([("aspp2", ASPPInPlaceABNBlock(out_sec[0] + 704, out_sec[1], feat_res=(int(in_size[0] / 8), int(in_size[1] / 8)), aspp_sec=(aspp_sec[0] * 2, aspp_sec[1] * 2, aspp_sec[2] * 2), norm_act=norm_act))])) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # # 8. ASPP #3 (N, 592, W/4, H/4)->(N, 128, W/1, H/1) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # self.aspp3_in = nn.Sequential( OrderedDict([("aspp3_in", CatInPlaceABN(336, norm_act=norm_act))])) self.aspp3 = nn.Sequential( OrderedDict([("aspp3", ASPPInPlaceABNBlock(out_sec[1] + 336, out_sec[2], feat_res=(int(in_size[0] / 4), int(in_size[1] / 4)), up_ratio=4, aspp_sec=(aspp_sec[0] * 4, aspp_sec[1] * 4, aspp_sec[2] * 4), norm_act=norm_act))])) self.score1 = nn.Sequential( OrderedDict([("score1", nn.Conv2d(out_sec[0], classes, kernel_size=1, stride=1, padding=0, bias=True)), ("se1_classes", SEBlock(classes, 4)), ("up1", nn.Upsample(size=in_size, mode='bilinear'))])) self.score2 = nn.Sequential( OrderedDict([("score2", nn.Conv2d(out_sec[1], classes, kernel_size=1, stride=1, padding=0, bias=True)), ("se2_classes", SEBlock(classes, 4)), ("up2", nn.Upsample(size=in_size, mode='bilinear'))])) self.score3 = nn.Sequential( OrderedDict([("score3", nn.Conv2d(out_sec[2], classes, kernel_size=1, stride=1, padding=0, bias=True)), ("se3_classes", SEBlock(classes, 4))])) self.score4 = nn.Sequential( OrderedDict([("score4_norm", norm_act(classes)), ("score4", nn.Conv2d(classes, classes, kernel_size=1, stride=1, padding=0, bias=True)), ("se4_classes", SEBlock(classes, 4))]))
def __init__(self, structure, norm_act=ABN, classes=0, dilation=True, use_se=True, in_size=(64, 64), aspp_out=512, fusion_out=64, aspp_sec=(12, 24, 36)): """ Wider ResNet with pre-activation (identity mapping) and Squeeze & Excitation(SE) blocks :param structure: (list of int) Number of residual blocks in each of the six modules of the network. :param norm_act: (callable) Function to create normalization / activation Module. :param classes: (int) Not `0` for segmentation task :param dilation: (bool) `True` for segmentation task :param use_se: (bool) Use Squeeze & Excitation (SE) or not :param in_size: (tuple of int) Size of the input image :param out_sec: (tuple of int) Number of channels of the ASPP output :param aspp_sec: (tuple of int) Dilation rate used in ASPP """ super(SEWiderResNetV2, self).__init__() self.structure = structure self.dilation = dilation self.classes = classes self.Sig = nn.Sigmoid() if len(structure) != 6: raise ValueError("Expected a structure with six values") # Initial layers self.mod1 = nn.Sequential( OrderedDict([("conv1", nn.Conv2d(512, 64, 3, stride=1, padding=1, bias=False))])) # Groups of residual blocks in_channels = 64 channels = [(128, 128), (256, 256), (512, 512), (512, 1024), (512, 1024, 2048), (1024, 2048, 4096)] for mod_id, num in enumerate(structure): # Create blocks for module blocks = [] for block_id in range(num): if not dilation: dil = 1 stride = 2 if block_id == 0 and 2 <= mod_id <= 4 else 1 else: if mod_id == 3: dil = 2 elif mod_id == 4: dil = 4 elif mod_id == 5: dil = 8 else: dil = 1 stride = 2 if block_id == 0 and mod_id == 2 else 1 if mod_id == 4: drop = partial(nn.Dropout2d, p=0.2) elif mod_id == 5: drop = partial(nn.Dropout2d, p=0.3) else: drop = None blocks.append(("block%d" % (block_id + 1), IdentityResidualBlock(in_channels, channels[mod_id], norm_act=norm_act, stride=stride, dilation=dil, dropout=drop, use_se=use_se))) # Update channels and p_keep in_channels = channels[mod_id][-1] # Create module if mod_id < 2: self.add_module("pool%d" % (mod_id + 2), nn.MaxPool2d(3, stride=2, padding=1)) self.add_module("mod%d" % (mod_id + 2), nn.Sequential(OrderedDict(blocks))) # Pooling and predictor # self.feat_out = nn.Sequential(OrderedDict([("out_norm", norm_act(in_channels)), # ("out_down", nn.Conv2d(in_channels, 1024, # kernel_size=1, stride=1, # padding=0, bias=True))])) self.bn_out = norm_act(in_channels) if classes != 0: self.stg3_fusion = nn.Conv2d(channels[1][1], fusion_out, kernel_size=1, stride=1, padding=0, bias=False) self.aspp = nn.Sequential( OrderedDict([ ("aspp", ASPPInPlaceABNBlock(channels[5][2], aspp_out, feat_res=(int(in_size[0] / 8), int(in_size[1] / 8)), up_ratio=2, aspp_sec=aspp_sec)) ])) self.score = nn.Sequential( OrderedDict([("conv", nn.Conv2d(aspp_out + fusion_out, classes, kernel_size=3, stride=1, padding=1, bias=True)), ("up", nn.Upsample(size=in_size, mode='bilinear'))]))
def __init__(self, num_clases=19, in_size=(448, 896), aspp_out=512, fusion_out=64, aspp_sec=(12, 24, 36), norm_act=ABN): super(InceptionResNetV2, self).__init__() self.num_clases = num_clases # Modules self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2, padding=1) self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1) self.maxpool_3a = nn.MaxPool2d(3, stride=2, padding=1) self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1, padding=1) self.maxpool_5a = nn.MaxPool2d(3, stride=2, padding=1) self.mixed_5b = Mixed_5b() self.repeat = nn.Sequential( Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17) ) self.mixed_6a = Mixed_6a() self.repeat_1 = nn.Sequential( Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10) ) self.mixed_7a = Mixed_7a() self.repeat_2 = nn.Sequential( Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20) ) self.block8 = Block8(noReLU=True) self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1) if num_clases != 0: self.stg3_fusion = nn.Conv2d(192, fusion_out, kernel_size=1, stride=1, padding=0, bias=False) self.aspp = nn.Sequential(OrderedDict([("aspp", ASPPInPlaceABNBlock(1536, aspp_out, feat_res=(int(in_size[0] / 8), int(in_size[1] / 8)), up_ratio=2, aspp_sec=aspp_sec))])) self.score_se = nn.Sequential(ModifiedSCSEBlock(channel=aspp_out+fusion_out, reduction=16)) self.score = nn.Sequential(OrderedDict([("conv", nn.Conv2d(aspp_out+fusion_out, num_clases, kernel_size=3, stride=1, padding=1, bias=True)), ("up", nn.Upsample(size=in_size, mode='bilinear'))]))