def __init__(self, num_classes, trunk='wrn38', criterion=None, use_dpc=False, init_all=False): super(DeepV3Plus, self).__init__() self.criterion = criterion self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8, dpc=use_dpc) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) self.final = nn.Sequential( nn.Conv2d(256 + 48, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, num_classes, kernel_size=1, bias=False)) if init_all: initialize_weights(self.aspp) initialize_weights(self.bot_aspp) initialize_weights(self.bot_fine) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk='wrn38', criterion=None): super(MscaleV3Plus, self).__init__() self.criterion = criterion self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) # Semantic segmentation prediction head self.final = nn.Sequential( nn.Conv2d(256 + 48, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, num_classes, kernel_size=1, bias=False)) # Scale-attention prediction head scale_in_ch = 2 * (256 + 48) self.scale_attn = nn.Sequential( nn.Conv2d(scale_in_ch, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 1, kernel_size=1, bias=False), nn.Sigmoid()) if cfg.OPTIONS.INIT_DECODER: initialize_weights(self.bot_fine) initialize_weights(self.bot_aspp) initialize_weights(self.scale_attn) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(Basic, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk_name=trunk, output_stride=8) self.seg_head = make_seg_head(in_ch=high_level_ch, out_ch=num_classes) initialize_weights(self.seg_head)
def __init__(self, num_classes, trunk='wrn38', criterion=None, fuse_aspp=False, attn_2b=False): super(MscaleDeeper, self).__init__() self.criterion = criterion self.fuse_aspp = fuse_aspp self.attn_2b = attn_2b self.backbone, s2_ch, s4_ch, high_level_ch = get_trunk( trunk_name=trunk, output_stride=8) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8) self.convs2 = nn.Conv2d(s2_ch, 32, kernel_size=1, bias=False) self.convs4 = nn.Conv2d(s4_ch, 64, kernel_size=1, bias=False) self.conv_up1 = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) self.conv_up2 = ConvBnRelu(256 + 64, 256, kernel_size=5, padding=2) self.conv_up3 = ConvBnRelu(256 + 32, 256, kernel_size=5, padding=2) self.conv_up5 = nn.Conv2d(256, num_classes, kernel_size=1, bias=False) # Scale-attention prediction head if self.attn_2b: attn_ch = 2 else: attn_ch = 1 self.scale_attn = make_attn_head(in_ch=256, out_ch=attn_ch) if cfg.OPTIONS.INIT_DECODER: initialize_weights(self.convs2, self.convs4, self.conv_up1, self.conv_up2, self.conv_up3, self.conv_up5, self.scale_attn)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(MscaleOCR, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk) self.ocr = OCR_block(high_level_ch) self.scale_attn = make_attn_head( in_ch=cfg.MODEL.OCR.MID_CHANNELS, out_ch=1)
def __init__(self, num_classes, trunk='resnet-50', criterion=None, use_dpc=False, init_all=False, output_stride=8): super(DeepV3ATTN, self).__init__() self.criterion = criterion self.backbone, _s2_ch, _s4_ch, high_level_ch = \ get_trunk(trunk, output_stride=output_stride) #self.aspp, aspp_out_ch = get_aspp(high_level_ch, # bottleneck_ch=256, # output_stride=output_stride, # dpc=use_dpc) #self.attn = APNB(in_channels=high_level_ch, out_channels=high_level_ch, key_channels=256, value_channels=256, dropout=0.5, sizes=([1]), norm_type='batchnorm', psp_size=(1,3,6,8)) self.attn = AFNB(low_in_channels=2048, high_in_channels=4096, out_channels=2048, key_channels=1024, value_channels=2048, dropout=0.5, sizes=([1]), norm_type='batchnorm', psp_size=(1, 3, 6, 8)) self.final = make_seg_head(in_ch=high_level_ch, out_ch=num_classes) initialize_weights(self.attn) initialize_weights(self.final)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(OCRNetASPP, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8) self.ocr = OCR_block(aspp_out_ch)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(MscaleBasic, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk_name=trunk, output_stride=8) self.cls_head = make_seg_head(in_ch=high_level_ch, out_ch=num_classes) self.scale_attn = make_attn_head(in_ch=high_level_ch, out_ch=1)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(ASPP, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=cfg.MODEL.ASPP_BOT_CH, output_stride=8) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) self.final = make_seg_head(in_ch=256, out_ch=num_classes) initialize_weights(self.final, self.bot_aspp, self.aspp)
def __init__(self, num_classes, trunk='resnet-50', criterion=None, use_dpc=False, init_all=False, output_stride=8): super(DeepV3, self).__init__() self.criterion = criterion self.backbone, _s2_ch, _s4_ch, high_level_ch = \ get_trunk(trunk, output_stride=output_stride) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=output_stride, dpc=use_dpc) self.final = make_seg_head(in_ch=aspp_out_ch, out_ch=num_classes) initialize_weights(self.aspp) initialize_weights(self.final)
def __init__(self, num_classes, trunk='wrn38', criterion=None): super(DeeperS8, self).__init__() self.criterion = criterion self.trunk, s2_ch, s4_ch, high_level_ch = get_trunk(trunk_name=trunk, output_stride=8) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8) self.convs2 = nn.Conv2d(s2_ch, 32, kernel_size=1, bias=False) self.convs4 = nn.Conv2d(s4_ch, 64, kernel_size=1, bias=False) self.conv_up1 = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) self.conv_up2 = ConvBnRelu(256 + 64, 256, kernel_size=5, padding=2) self.conv_up3 = ConvBnRelu(256 + 32, 256, kernel_size=5, padding=2) self.conv_up5 = nn.Conv2d(256, num_classes, kernel_size=1, bias=False)
def __init__(self, num_classes, trunk='wrn38', criterion=None, use_dpc=False, fuse_aspp=False, attn_2b=False, attn_cls=False): super(MscaleV3Plus, self).__init__() self.criterion = criterion self.fuse_aspp = fuse_aspp self.attn_2b = attn_2b self.attn_cls = attn_cls self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8, dpc=use_dpc) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) # Semantic segmentation prediction head bot_ch = cfg.MODEL.SEGATTN_BOT_CH self.final = nn.Sequential( nn.Conv2d(256 + 48, bot_ch, kernel_size=3, padding=1, bias=False), Norm2d(bot_ch), nn.ReLU(inplace=True), nn.Conv2d(bot_ch, bot_ch, kernel_size=3, padding=1, bias=False), Norm2d(bot_ch), nn.ReLU(inplace=True), nn.Conv2d(bot_ch, num_classes, kernel_size=1, bias=False)) # Scale-attention prediction head if self.attn_2b: attn_ch = 2 else: if self.attn_cls: attn_ch = num_classes else: attn_ch = 1 scale_in_ch = 256 + 48 self.scale_attn = make_attn_head(in_ch=scale_in_ch, out_ch=attn_ch) if cfg.OPTIONS.INIT_DECODER: initialize_weights(self.bot_fine) initialize_weights(self.bot_aspp) initialize_weights(self.scale_attn) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk='wrn38', criterion=None, use_dpc=False, fuse_aspp=False, attn_2b=False): super(MscaleV3Plus, self).__init__() self.criterion = criterion self.fuse_aspp = fuse_aspp self.attn_2b = attn_2b self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8, dpc=use_dpc, img_norm = False) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) #self.asnb = ASNB(low_in_channels = 48, high_in_channels=256, out_channels=256, key_channels=64, value_channels=256, dropout=0., sizes=([1]), norm_type='batchnorm',attn_scale=0.25) self.adnb = ADNB(d_model=256, nhead=8, num_encoder_layers=2, dim_feedforward=256, dropout=0.5, activation="relu", num_feature_levels=1, enc_n_points=4) # Semantic segmentation prediction head bot_ch = cfg.MODEL.SEGATTN_BOT_CH self.final = nn.Sequential( nn.Conv2d(256 + 48, bot_ch, kernel_size=3, padding=1, bias=False), Norm2d(bot_ch), nn.ReLU(inplace=True), nn.Conv2d(bot_ch, bot_ch, kernel_size=3, padding=1, bias=False), Norm2d(bot_ch), nn.ReLU(inplace=True), nn.Conv2d(bot_ch, num_classes, kernel_size=1, bias=False)) # Scale-attention prediction head if self.attn_2b: attn_ch = 2 else: attn_ch = 1 scale_in_ch = 256 + 48 self.scale_attn = make_attn_head(in_ch=scale_in_ch, out_ch=attn_ch) if cfg.OPTIONS.INIT_DECODER: initialize_weights(self.bot_fine) initialize_weights(self.bot_aspp) initialize_weights(self.scale_attn) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk='wrn38', criterion=None, use_dpc=False, init_all=False): super(DeepV3PlusATTN, self).__init__() self.criterion = criterion self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) #self.aspp, aspp_out_ch = get_aspp(high_level_ch, # bottleneck_ch=256, # output_stride=8, # dpc=use_dpc) #self.attn = APNB(in_channels=high_level_ch, out_channels=high_level_ch, key_channels=256, value_channels=256, dropout=0.5, sizes=([1]), norm_type='batchnorm', psp_size=(1, 3, 6, 8)) self.attn = AFNB(low_in_channels=2048, high_in_channels=4096, out_channels=256, key_channels=64, value_channels=256, dropout=0.8, sizes=([1]), norm_type='batchnorm', psp_size=(1, 3, 6, 8)) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(256, 256, kernel_size=1, bias=False) self.final = nn.Sequential( nn.Conv2d(256 + 48, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, num_classes, kernel_size=1, bias=False)) if init_all: initialize_weights(self.attn) initialize_weights(self.bot_aspp) initialize_weights(self.bot_fine) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk='hrnetv2', criterion=None): super(OCRNet, self).__init__() self.criterion = criterion self.backbone, _, _, high_level_ch = get_trunk(trunk) self.ocr = OCR_block(high_level_ch)
def __init__(self, num_classes, trunk='wrn38', criterion=None, use_dpc=False, fuse_aspp=False, attn_2b=False, bn_head=False): super(ASDV3P, self).__init__() self.criterion = criterion self.fuse_aspp = fuse_aspp self.attn_2b = attn_2b self.backbone, s2_ch, _s4_ch, high_level_ch = get_trunk(trunk) self.aspp, aspp_out_ch = get_aspp(high_level_ch, bottleneck_ch=256, output_stride=8, dpc=use_dpc) self.bot_fine = nn.Conv2d(s2_ch, 48, kernel_size=1, bias=False) self.bot_aspp = nn.Conv2d(aspp_out_ch, 256, kernel_size=1, bias=False) # Semantic segmentation prediction head self.final = nn.Sequential( nn.Conv2d(256 + 48, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, num_classes, kernel_size=1, bias=False)) # Scale-attention prediction head assert cfg.MODEL.N_SCALES is not None self.scales = sorted(cfg.MODEL.N_SCALES) num_scales = len(self.scales) if cfg.MODEL.ATTNSCALE_BN_HEAD or bn_head: self.scale_attn = nn.Sequential( nn.Conv2d(num_scales * (256 + 48), 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, padding=1, bias=False), Norm2d(256), nn.ReLU(inplace=True), nn.Conv2d(256, num_scales, kernel_size=1, bias=False)) else: self.scale_attn = nn.Sequential( nn.Conv2d(num_scales * (256 + 48), 512, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(512, num_scales, kernel_size=1, padding=1, bias=False)) if cfg.OPTIONS.INIT_DECODER: initialize_weights(self.bot_fine) initialize_weights(self.bot_aspp) initialize_weights(self.scale_attn) initialize_weights(self.final) else: initialize_weights(self.final)
def __init__(self, num_classes, trunk, criterion=None, use_aspp=False, num_filters=128): """Initialize a new segmentation model. Keyword arguments: num_classes -- number of output classes (e.g., 19 for Cityscapes) trunk -- the name of the trunk to use ('mobilenetv3_large', 'mobilenetv3_small') use_aspp -- whether to use DeepLabV3+ style ASPP (True) or Lite R-ASPP (False) (setting this to True may yield better results, at the cost of latency) num_filters -- the number of filters in the segmentation head """ super(LRASPP, self).__init__() self.criterion = criterion self.trunk, s2_ch, s4_ch, high_level_ch = get_trunk(trunk_name=trunk) self.use_aspp = use_aspp # Reduced atrous spatial pyramid pooling if self.use_aspp: self.aspp_conv1 = nn.Sequential( nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.BatchNorm2d(num_filters), nn.ReLU(inplace=True), ) self.aspp_conv2 = nn.Sequential( nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.Conv2d(num_filters, num_filters, 3, dilation=12, padding=12), nn.BatchNorm2d(num_filters), nn.ReLU(inplace=True), ) self.aspp_conv3 = nn.Sequential( nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.Conv2d(num_filters, num_filters, 3, dilation=36, padding=36), nn.BatchNorm2d(num_filters), nn.ReLU(inplace=True), ) self.aspp_pool = nn.Sequential( nn.AdaptiveAvgPool2d(1), nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.BatchNorm2d(num_filters), nn.ReLU(inplace=True), ) aspp_out_ch = num_filters * 4 else: self.aspp_conv1 = nn.Sequential( nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.BatchNorm2d(num_filters), nn.ReLU(inplace=True), ) self.aspp_conv2 = nn.Sequential( nn.AvgPool2d(kernel_size=(49, 49), stride=(16, 20)), nn.Conv2d(high_level_ch, num_filters, 1, bias=False), nn.Sigmoid(), ) aspp_out_ch = num_filters self.convs2 = nn.Conv2d(s2_ch, 32, kernel_size=1, bias=False) self.convs4 = nn.Conv2d(s4_ch, 64, kernel_size=1, bias=False) self.conv_up1 = nn.Conv2d(aspp_out_ch, num_filters, kernel_size=1) self.conv_up2 = ConvBnRelu(num_filters + 64, num_filters, kernel_size=1) self.conv_up3 = ConvBnRelu(num_filters + 32, num_filters, kernel_size=1) self.last = nn.Conv2d(num_filters, num_classes, kernel_size=1)