def __init__(self, in_channels, num_class, scale, is_aux=False): super(BiSeNetHead, self).__init__() if is_aux: self.conv_3x3 = ConvBnReLU(in_channels, 256, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False) else: self.conv_3x3 = ConvBnReLU(in_channels, 64, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False) if is_aux: self.classified_conv = nn.Conv2d(256, num_class, kernel_size=1, stride=1, padding=0) else: self.classified_conv = nn.Conv2d(64, num_class, kernel_size=1, stride=1, padding=0) self.scale = scale
def make_decoder(in_channels, out_channels): layers = nn.Sequential( ConvBnReLU(in_channels, (in_channels // 4), kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False), DeconvBnReLU((in_channels // 4), (in_channels // 4), kernel_size=2, stride=2, padding=0, has_bn=True, has_relu=True, inplace=True, has_bias=False), ConvBnReLU((in_channels // 4), out_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False)) return layers
def __init__(self, in_channels, out_channels, reduction=1): super(FeatureFusion, self).__init__() self.conv_1x1 = ConvBnReLU(in_channels, out_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.channel_attention = nn.Sequential( nn.AdaptiveAvgPool2d(1), ConvBnReLU(out_channels, out_channels // reduction, kernel_size=1, stride=1, padding=0, has_bn=False, has_relu=True, has_bias=False), ConvBnReLU(out_channels // reduction, out_channels, kernel_size=1, stride=1, padding=0, has_bn=False, has_relu=False, has_bias=False), nn.Sigmoid())
def __init__(self, in_channels, out_channels): super(SpatialPath, self).__init__() mid_channel = 64 self.conv_7x7 = ConvBnReLU(in_channels, mid_channel, kernel_size=7, stride=2, padding=3, has_bn=True, has_relu=True, has_bias=False) self.conv_3x3_1 = ConvBnReLU(mid_channel, mid_channel, kernel_size=3, stride=2, padding=1, has_bn=True, has_relu=True, has_bias=False) self.conv_3x3_2 = ConvBnReLU(mid_channel, mid_channel, kernel_size=3, stride=2, padding=1, has_bn=True, has_relu=True, has_bias=False) self.conv_1x1 = ConvBnReLU(mid_channel, out_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False)
def __init__(self, in_channels, out_channels, num_layers): super(SegNetDecoder, self).__init__() layers = list() layers.append(ConvBnReLU(in_channels, in_channels // 2)) for i in range(num_layers - 1): layers.append(ConvBnReLU(in_channels//2, in_channels // 2)) layers.append(ConvBnReLU(in_channels // 2, out_channels)) self.decoder = nn.Sequential(*layers)
def __init__(self, in_channels_low, in_channels_high, out_channels, num_class, is_train=True): super(CascadeFeatureFusion, self).__init__() self.is_train = is_train self.conv_low = ConvBnReLU(in_channels_low, out_channels, kernel_size=3, padding=2, dilation=2, has_bn=True, has_relu=False, has_bias=False) self.conv_high = ConvBnReLU(in_channels_high, out_channels, kernel_size=3, padding=2, dilation=2, has_bn=True, has_relu=False, has_bias=False) self.aux_classify = nn.Conv2d(out_channels, num_class, 1, bias=False)
def __init__(self, num_class, is_train=False): super(ICNet, self).__init__() self.conv_resolution1 = nn.Sequential( ConvBnReLU(3, 32, kernel_size=3, stride=2), ConvBnReLU(32, 32, kernel_size=3, stride=2), ConvBnReLU(32, 64, kernel_size=3, stride=2) ) self.backbone = Backbone(50) self.ppm = PyramidPoolingModule(512) self.head = ICNetHead(num_class, is_train=is_train) self.__setattr__('exclusive', ['conv_sub1', 'head'])
def __init__(self, num_class, is_train=False): super(BiSeNet, self).__init__() # self.first_pooling = nn.MaxPool2d(2, 2) self.context_path = ContextPath() self.business_layer = [] self.is_train = is_train self.spatial_path = SpatialPath(3, 128) conv_channel = 128 self.global_context = nn.Sequential( nn.AdaptiveAvgPool2d(1), ConvBnReLU(512, conv_channel, kernel_size=1, stride=1, padding=0, has_bn=False, has_relu=True, has_bias=False)) # stage = [512, 256, 128, 64] self.arms = nn.Sequential(AttentionRefinement(512, conv_channel), AttentionRefinement(256, conv_channel)) self.refines = nn.Sequential( ConvBnReLU(conv_channel, conv_channel, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False), ConvBnReLU(conv_channel, conv_channel, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False)) self.heads = nn.Sequential( BiSeNetHead(conv_channel, num_class, 16, True), BiSeNetHead(conv_channel, num_class, 8, True), BiSeNetHead(conv_channel * 2, num_class, 8, False)) self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 1)
def __init__(self, in_channels, out_channels): super(AttentionRefinement, self).__init__() self.conv_3x3 = ConvBnReLU(in_channels, out_channels, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False) self.channel_attention = nn.Sequential( nn.AdaptiveAvgPool2d(1), ConvBnReLU(out_channels, out_channels, kernel_size=1, stride=1, padding=0, has_bn=False, has_relu=False, has_bias=False), nn.Sigmoid())
def __init__(self, in_channels, out_channels, stride=1, downsample=None): super(ResidualBlock, self).__init__() self.conv1 = ConvBnReLU(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, has_bn=True, has_relu=True, has_bias=False) self.conv2 = ConvBnReLU(out_channels, out_channels, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=False, has_bias=False) self.downsample = downsample self.stride = stride self.relu = nn.ReLU(inplace=True)
def __init__(self, num_class): super(PSPNet, self).__init__() resnet = resnet_model_load(50) self.conv1 = resnet.conv1 self.layer1 = resnet.layer1 self.layer2 = resnet.layer2 self.layer3 = resnet.layer3 self.layer4 = resnet.layer4 in_channels = 2048 self.ppm = PyramidPoolingModule(in_channels) self.classified_conv = nn.Sequential( ConvBnReLU(in_channels * 2, in_channels // 4), nn.Conv2d(in_channels // 4, num_class, 1))
def make_encoder(block, in_channels, out_channels): layers = [] for i in range(0, 2): if i is 0: downsample = ConvBnReLU(in_channels, out_channels, kernel_size=1, stride=2, padding=0, has_bn=True, has_relu=False, has_bias=False) layers.append(block(in_channels, out_channels, 2, downsample)) else: layers.append(block(out_channels, out_channels)) return nn.Sequential(*layers)
def __init__(self, in_channels, out_channels, atrous_rates=[6, 12, 18]): super(AsppPlus, self).__init__() self.conv0 = ConvBnReLU(in_channels, out_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.conv1 = ConvBnReLU(in_channels, out_channels, kernel_size=3, stride=1, padding=atrous_rates[0], dilation=atrous_rates[0], has_bn=True, has_relu=True, has_bias=False) self.conv2 = ConvBnReLU(in_channels, out_channels, kernel_size=3, stride=1, padding=atrous_rates[1], dilation=atrous_rates[1], has_bn=True, has_relu=True, has_bias=False) self.conv3 = ConvBnReLU(in_channels, out_channels, kernel_size=3, stride=1, padding=atrous_rates[2], dilation=atrous_rates[2], has_bn=True, has_relu=True, has_bias=False) self.conv4 = nn.Sequential( nn.AdaptiveAvgPool2d(1), ConvBnReLU(in_channels, out_channels, kernel_size=1, stride=1, padding=0, has_bn=False, has_relu=True, has_bias=False) ) self.combine_conv = ConvBnReLU(out_channels*5, out_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False)
def __init__(self, num_class): super(DFANet, self).__init__() self.conv1 = ConvBnReLU(3, 8, kernel_size=3, stride=2, padding=1, has_bn=True, has_relu=True, has_bias=False) self.enc2_1 = DFANetEncoder(in_channels=8, out_channels=48, stage=4) self.enc3_1 = DFANetEncoder(in_channels=48, out_channels=96, stage=6) self.enc4_1 = DFANetEncoder(in_channels=96, out_channels=192, stage=4) self.fca_1 = FcAttention(192, 192) self.enc2_2 = DFANetEncoder(in_channels=240, out_channels=48, stage=4) self.enc3_2 = DFANetEncoder(in_channels=144, out_channels=96, stage=6) self.enc4_2 = DFANetEncoder(in_channels=288, out_channels=192, stage=4) self.fca_2 = FcAttention(192, 192) self.enc2_3 = DFANetEncoder(in_channels=240, out_channels=48, stage=4) self.enc3_3 = DFANetEncoder(in_channels=144, out_channels=96, stage=6) self.enc4_3 = DFANetEncoder(in_channels=288, out_channels=192, stage=4) self.fca_3 = FcAttention(192, 192) # fuse to decoder self.enc2_1_to_decoder = ConvBnReLU(48, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.enc2_2_to_decoder = ConvBnReLU(48, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.enc2_3_to_decoder = ConvBnReLU(48, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.fca_1_to_decoder = ConvBnReLU(192, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.fca_2_to_decoder = ConvBnReLU(192, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.fca_3_to_decoder = ConvBnReLU(192, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.merge_conv = ConvBnReLU(32, 32, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.classified_conv = nn.Conv2d(32, num_class, 1, 1, bias=False)
def __init__(self, in_channels, num_class): super(DeepLabHead, self).__init__() self.conv1x1 = ConvBnReLU(in_channels, in_channels, kernel_size=1, stride=1, padding=0, has_bn=True, has_relu=True, has_bias=False) self.conv3x3 = ConvBnReLU(in_channels*2, num_class, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False)
def __init__(self, num_class): super(LinkNet, self).__init__() filters = [64, 128, 256, 512] self.feature_scale = 1 filters = [x // self.feature_scale for x in filters] self.inchannels = filters[0] # # Encoder self.encoder_before = nn.Sequential( ConvBnReLU(3, filters[0], kernel_size=7, stride=2, padding=3, has_bn=True, has_relu=True, has_bias=False), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) block = ResidualBlock self.encoder1 = make_encoder(block=block, in_channels=filters[0], out_channels=filters[0]) self.encoder2 = make_encoder(block=block, in_channels=filters[0], out_channels=filters[1]) self.encoder3 = make_encoder(block=block, in_channels=filters[1], out_channels=filters[2]) self.encoder4 = make_encoder(block=block, in_channels=filters[2], out_channels=filters[3]) self.avgpool = nn.AvgPool2d(7) # # Decoder self.decoder4 = make_decoder(in_channels=filters[3], out_channels=filters[2]) self.decoder3 = make_decoder(in_channels=filters[2], out_channels=filters[1]) self.decoder2 = make_decoder(in_channels=filters[1], out_channels=filters[0]) self.decoder1 = make_decoder(in_channels=filters[0], out_channels=filters[0]) # # Final classify self.final_deconv1 = DeconvBnReLU(filters[0], 32 // self.feature_scale, kernel_size=2, stride=2, padding=0, has_bn=True, has_relu=True, inplace=True, has_bias=False) self.final_conv2 = ConvBnReLU(32 // self.feature_scale, 32 // self.feature_scale, kernel_size=3, stride=1, padding=1, has_bn=True, has_relu=True, has_bias=False) self.classified_conv = nn.ConvTranspose2d(32 // self.feature_scale, num_class, 2, 2)