示例#1
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))

        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # initialization
        for modules in [self.bbox_tower, self.bbox_pred, self.centerness]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
示例#2
0
    def __init__(self, cfg, in_channels):
        super(ATSSHead, self).__init__()
        self.cfg = cfg
        num_classes = cfg.MODEL.ATSS.NUM_CLASSES - 1
        num_anchors = len(
            cfg.MODEL.ATSS.ASPECT_RATIOS) * cfg.MODEL.ATSS.SCALES_PER_OCTAVE

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.ATSS.NUM_CONVS):
            if self.cfg.MODEL.ATSS.USE_DCN_IN_TOWER and \
                    i == cfg.MODEL.ATSS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.bbox_pred = nn.Conv2d(in_channels,
                                   num_anchors * 4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    num_anchors * 1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # initialization
        for modules in [self.bbox_tower, self.bbox_pred, self.centerness]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        if self.cfg.MODEL.ATSS.REGRESSION_TYPE == 'POINT':
            assert num_anchors == 1, "regressing from a point only support num_anchors == 1"
            torch.nn.init.constant_(self.bbox_pred.bias, 4)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
示例#3
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(SipMaskHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.SIPMASK.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.SIPMASK.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.SIPMASK.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.SIPMASK.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.SIPMASK.USE_DCN_IN_TOWER

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.SIPMASK.NUM_CONVS - 1):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.SIPMASK.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())

        for i in range(cfg.MODEL.SIPMASK.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.SIPMASK.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())
        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        self.nc = 32
        ###########instance##############
        self.feat_align = FeatureAlign(in_channels, in_channels, 3)
        self.sip_cof = nn.Conv2d(in_channels, self.nc * 4, 3, padding=1)

        self.sip_mask_lat = nn.Conv2d(512, self.nc, 3, padding=1)
        self.sip_mask_lat0 = nn.Conv2d(768, 512, 1, padding=0)
        self.relu = nn.ReLU(inplace=True)

        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.bbox_pred,
                self.cls_logits, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.SIPMASK.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
        self.feat_align.init_weights()
示例#4
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(DNAHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.DNA.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.DNA.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.DNA.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.DNA.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.DNA.USE_DCN_IN_TOWER
        self.hash_code = cfg.MODEL.DNA.HASH_CODE

        cls_tower = []
        bbox_tower = []
        dna_tower = []

        for i in range(cfg.MODEL.DNA.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.DNA.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            # Cls tower
            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())

            # BBox tower
            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())
            """
            # DNA tower 
            dna_tower.append(
                conv_func(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=True
                )
            )
            dna_tower.append(nn.GroupNorm(32, in_channels))
            dna_tower.append(nn.ReLU())
            """

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        #self.add_module('dna_tower', nn.Sequential(*dna_tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # For DNA. way 2.
        self.identity = nn.Conv2d(6,
                                  self.hash_code,
                                  kernel_size=3,
                                  stride=1,
                                  padding=1)

        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.identity, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.DNA.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])

        # For DNA. way 1.
        """
示例#5
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FCOSMaskHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1

        cls_tower = []
        bbox_tower = []
        mask_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            cls_tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())
            mask_tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            mask_tower.append(nn.GroupNorm(32, in_channels))
            mask_tower.append(nn.ReLU())

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.add_module('mask_tower', nn.Sequential(*mask_tower))
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_mask = nn.Conv2d(in_channels,
                                   1,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)

        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.mask_tower,
                self.cls_logits, self.bbox_pred, self.centerness,
                self.bbox_mask
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
示例#6
0
    def __init__(self, cfg, in_channels):  #in_channels=256*4
        """
        Arguments:
            in_channels (int): number of channels of the input feature
            此处是fpn每层的输出通道数 都是一样的
        """
        super(FCOSHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1  # 81-1 COCO数据集中总的分类类别为80类

        cls_tower = []  #分类层
        bbox_tower = []  #回归层
        #head部分
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):  #4 分类层的四层卷积
            cls_tower.append(
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            # cls_tower和bbox_tower都是4层的256通道的3×3的卷积层,后加一些GN和Relu
            cls_tower.append(nn.GroupNorm(32, in_channels))  #紧接着BN层
            cls_tower.append(nn.ReLU())  #ReLU

            bbox_tower.append(  #4 回归层的四层卷积
                nn.Conv2d(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        self.add_module('cls_tower',
                        nn.Sequential(*cls_tower))  #分类部分卷积网络构建 head
        self.add_module('bbox_tower',
                        nn.Sequential(*bbox_tower))  #回归部分卷积网络构建 head
        #  最终的分类层 最后一个分类层用卷积网络,卷积成81个通道的输出,每个通道对应的是其类别 shape:[H*W*C]
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        # 最后的边界框预测层 输出四个预测边框数值 r l t b shape:[H*W*4]
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        # shape" [H*W*1]
        #  最后的中心度预测层 中心度预测与分类预测在一个分支,由分类部分的四个卷积层输出的特征再经过一个卷积层输出一个对应的分数
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)

        # initialization 这些层里面的卷积参数都进行初始化
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):  #isinstance是考虑l是不是nn.Conv2d的类型
                    torch.nn.init.normal_(l.weight, std=0.01)  #初始化权重服从正态分布
                    torch.nn.init.constant_(l.bias, 0)  #初始偏置为0

        # initialize the bias for focal loss 我只知道分类是用focal loss,可能是一种经验trick?
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB  #0.01
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)  #分类的偏置初始化

        # P3-P7共有5层特征FPN,缩放因子,对回归结果进行缩放
        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
示例#7
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(EmbedMaskHead, self).__init__()
        # TODO: Implement the sigmoid version first.
        self.fpn_strides = cfg.MODEL.EMBED_MASK.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.EMBED_MASK.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.EMBED_MASK.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.EMBED_MASK.USE_DCN_IN_TOWER

        num_classes = cfg.MODEL.EMBED_MASK.NUM_CLASSES - 1
        embed_dim = cfg.MODEL.EMBED_MASK.EMBED_DIM
        prior_margin = cfg.MODEL.EMBED_MASK.PRIOR_MARGIN
        self.init_sigma_bias = math.log(-math.log(0.5) / (prior_margin**2))

        cls_tower = []
        bbox_tower = []
        mask_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())
            mask_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            mask_tower.append(nn.GroupNorm(32, in_channels))
            mask_tower.append(nn.ReLU())

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   4,
                                   kernel_size=3,
                                   stride=1,
                                   padding=1)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=3,
                                    stride=1,
                                    padding=1)
        # initialization
        for modules in [
                self.cls_tower, self.bbox_tower, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.EMBED_MASK.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])

        ########### Mask Predictions ############
        # proposal embedding
        self.proposal_spatial_embed_pred = nn.Conv2d(in_channels,
                                                     2,
                                                     kernel_size=3,
                                                     stride=1,
                                                     padding=1,
                                                     bias=True)
        self.proposal_other_embed_pred = nn.Conv2d(in_channels,
                                                   embed_dim - 2,
                                                   kernel_size=3,
                                                   stride=1,
                                                   padding=1,
                                                   bias=True)
        for modules in [
                self.proposal_spatial_embed_pred,
                self.proposal_other_embed_pred
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)
        # proposal margin
        self.proposal_margin_pred = nn.Conv2d(in_channels,
                                              1,
                                              kernel_size=3,
                                              stride=1,
                                              padding=1,
                                              bias=True)
        torch.nn.init.normal_(self.proposal_margin_pred.weight, std=0.01)
        torch.nn.init.constant_(self.proposal_margin_pred.bias,
                                self.init_sigma_bias)

        # pixel embedding
        self.add_module('mask_tower', nn.Sequential(*mask_tower))
        self.pixel_spatial_embed_pred = nn.Conv2d(in_channels,
                                                  2,
                                                  kernel_size=3,
                                                  stride=1,
                                                  padding=1,
                                                  bias=True)
        self.pixel_other_embed_pred = nn.Conv2d(in_channels,
                                                embed_dim - 2,
                                                kernel_size=3,
                                                stride=1,
                                                padding=1,
                                                bias=True)
        for modules in [
                self.mask_tower, self.pixel_spatial_embed_pred,
                self.pixel_other_embed_pred
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        self.position_scale = Scale(init_value=1.0)
示例#8
0
文件: yooo.py 项目: jszgz/FCOS-YOOO
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(YOOOHead, self).__init__()
        num_classes = cfg.MODEL.YOOO.NUM_CLASSES  # - 1  # if -1, its hard to calculate loss when there is no event in this frame, so NOEVENT is one class<=>one channel in the output, not like object event detection, positive sample is sparse in event
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES  #[8, 16, 32, 64, 128]
        self.norm_reg_targets = cfg.MODEL.YOOO.NORM_REG_TARGETS  # True: normalizing the regression targets with FPN strides
        self.centerness_on_reg = cfg.MODEL.YOOO.CENTERNESS_ON_REG  #True
        self.use_dcn_in_tower = cfg.MODEL.YOOO.USE_DCN_IN_TOWER  #False

        self.used_level = [0, 1, 2, 3, 4]  # 0,1,2,3,4  p3,p4,p5,p6,p7
        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.YOOO.NUM_CONVS):  # range(4)
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.YOOO.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            cls_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                conv_func(in_channels,
                          in_channels,
                          kernel_size=3,
                          stride=1,
                          padding=1,
                          bias=True))
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        #chwangteng: global average pooling to 1x1x256
        cls_tower.append(nn.AdaptiveAvgPool2d(output_size=(1, 1)))
        bbox_tower.append(nn.AdaptiveAvgPool2d(output_size=(1, 1)))

        self.add_module('cls_tower_event', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower_event', nn.Sequential(*bbox_tower))

        self.cls_logits = nn.Conv2d(in_channels,
                                    num_classes,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0)
        self.bbox_pred = nn.Conv2d(in_channels,
                                   2,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        self.centerness = nn.Conv2d(in_channels,
                                    1,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0)
        # initialization
        for modules in [
                self.cls_tower_event, self.bbox_tower_event, self.cls_logits,
                self.bbox_pred, self.centerness
        ]:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)
        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)

        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        #scale exp()
        self.scales = nn.ModuleList(
            [Scale(init_value=1.0) for _ in range(len(self.used_level))])
示例#9
0
    def __init__(self, cfg, in_channels):
        """
        Arguments:
            in_channels (int): number of channels of the input feature
        """
        super(FADFCOSHead, self).__init__()
        
        self.cfg = cfg
        self.fpn_lvl = 5

        # TODO: Implement the sigmoid version first.
        num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1
        self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES
        self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG
        self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER

        cls_tower = []
        bbox_tower = []
        for i in range(cfg.MODEL.FCOS.NUM_CONVS):
            if self.use_dcn_in_tower and \
                    i == cfg.MODEL.FCOS.NUM_CONVS - 1:
                conv_func = DFConv2d
            else:
                conv_func = nn.Conv2d

            cls_tower.append(
                conv_func(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=True
                )
            )
            cls_tower.append(nn.GroupNorm(32, in_channels))
            cls_tower.append(nn.ReLU())
            bbox_tower.append(
                conv_func(
                    in_channels,
                    in_channels,
                    kernel_size=3,
                    stride=1,
                    padding=1,
                    bias=True
                )
            )
            bbox_tower.append(nn.GroupNorm(32, in_channels))
            bbox_tower.append(nn.ReLU())

        # import fad modules
        from fad_core.modeling.modules.search_rcnn import SearchRCNN
        from fad_core.modeling.modules.augment_rcnn import AugmentRCNN

        # --------- cls tower            
        if cfg.MODEL.FAD.CLSTOWER:
            if cfg.MODEL.FAD.SEARCH:
                cls_tower = SearchRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_CLS, cfg.MODEL.FAD.NUM_CELLS_CLS, n_nodes=cfg.MODEL.FAD.NUM_NODES_CLS) 
            else:
                # augment 
                cls_tower = AugmentRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_CLS, cfg.MODEL.FAD.NUM_CELLS_CLS, cfg.MODEL.FAD.GENO_CLS[0]) 

        if cfg.MODEL.FAD.BOXTOWER:
            if cfg.MODEL.FAD.SEARCH:
                bbox_tower = SearchRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_BOX, cfg.MODEL.FAD.NUM_CELLS_BOX, n_nodes=cfg.MODEL.FAD.NUM_NODES_BOX) 
            else:        
                # augment 
                bbox_tower = AugmentRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_BOX, cfg.MODEL.FAD.NUM_CELLS_BOX, cfg.MODEL.FAD.GENO_BOX[0]) 
     
        if cfg.MODEL.FAD.CLSTOWER:
            self.add_module('cls_tower', cls_tower)
        else:
            self.add_module('cls_tower', nn.Sequential(*cls_tower))

        if cfg.MODEL.FAD.BOXTOWER:
            self.add_module('bbox_tower', bbox_tower)
        else:
            self.add_module('bbox_tower', nn.Sequential(*bbox_tower))

        # ---- if 1x1 conv to reduce dim first
        if cfg.MODEL.FAD.CLSTOWER:
            self.cls_reduce = nn.Conv2d(
                in_channels*cfg.MODEL.FAD.NUM_NODES_CLS, in_channels, kernel_size=1, stride=1,
                padding=0
            ) 

        self.cls_logits = nn.Conv2d(
            in_channels, num_classes, kernel_size=3, stride=1,
            padding=1
        )

        if cfg.MODEL.FAD.BOXTOWER:
            self.bbox_pred = nn.Conv2d(
                in_channels*min(1,cfg.MODEL.FAD.NUM_NODES_BOX), 4, kernel_size=3, stride=1,
                padding=1
            )
            self.box_reduce = nn.Conv2d(
                in_channels*cfg.MODEL.FAD.NUM_NODES_BOX, in_channels, kernel_size=1, stride=1,
                padding=0
            ) 
        else:
            self.bbox_pred = nn.Conv2d(
                in_channels, 4, kernel_size=3, stride=1,
                padding=1
            )

        self.centerness = nn.Conv2d(
            in_channels, 1, kernel_size=3, stride=1,
            padding=1
        )

        # initialization
        list_init = [self.cls_logits, self.bbox_pred, self.centerness]
        if self.cfg.MODEL.FAD.CLSTOWER:
             list_init.append(self.cls_reduce)
        else:
             list_init.append(self.cls_tower)
        if not self.cfg.MODEL.FAD.BOXTOWER:
             list_init.append(self.bbox_tower)
             
        for modules in list_init:
            for l in modules.modules():
                if isinstance(l, nn.Conv2d):
                    torch.nn.init.normal_(l.weight, std=0.01)
                    torch.nn.init.constant_(l.bias, 0)

        # initialize the bias for focal loss
        prior_prob = cfg.MODEL.FCOS.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])