def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(FCOSHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.FCOS.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # initialization for modules in [self.bbox_tower, self.bbox_pred, self.centerness]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
def __init__(self, cfg, in_channels): super(ATSSHead, self).__init__() self.cfg = cfg num_classes = cfg.MODEL.ATSS.NUM_CLASSES - 1 num_anchors = len( cfg.MODEL.ATSS.ASPECT_RATIOS) * cfg.MODEL.ATSS.SCALES_PER_OCTAVE cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.ATSS.NUM_CONVS): if self.cfg.MODEL.ATSS.USE_DCN_IN_TOWER and \ i == cfg.MODEL.ATSS.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.bbox_pred = nn.Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, num_anchors * 1, kernel_size=3, stride=1, padding=1) # initialization for modules in [self.bbox_tower, self.bbox_pred, self.centerness]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss if self.cfg.MODEL.ATSS.REGRESSION_TYPE == 'POINT': assert num_anchors == 1, "regressing from a point only support num_anchors == 1" torch.nn.init.constant_(self.bbox_pred.bias, 4) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(SipMaskHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.SIPMASK.NUM_CLASSES - 1 self.fpn_strides = cfg.MODEL.SIPMASK.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.SIPMASK.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.SIPMASK.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.SIPMASK.USE_DCN_IN_TOWER cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.SIPMASK.NUM_CONVS - 1): if self.use_dcn_in_tower and \ i == cfg.MODEL.SIPMASK.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d cls_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) for i in range(cfg.MODEL.SIPMASK.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.SIPMASK.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) self.add_module('cls_tower', nn.Sequential(*cls_tower)) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) self.nc = 32 ###########instance############## self.feat_align = FeatureAlign(in_channels, in_channels, 3) self.sip_cof = nn.Conv2d(in_channels, self.nc * 4, 3, padding=1) self.sip_mask_lat = nn.Conv2d(512, self.nc, 3, padding=1) self.sip_mask_lat0 = nn.Conv2d(768, 512, 1, padding=0) self.relu = nn.ReLU(inplace=True) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.bbox_pred, self.cls_logits, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.SIPMASK.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)]) self.feat_align.init_weights()
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(DNAHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.DNA.NUM_CLASSES - 1 self.fpn_strides = cfg.MODEL.DNA.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.DNA.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.DNA.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.DNA.USE_DCN_IN_TOWER self.hash_code = cfg.MODEL.DNA.HASH_CODE cls_tower = [] bbox_tower = [] dna_tower = [] for i in range(cfg.MODEL.DNA.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.DNA.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d # Cls tower cls_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) # BBox tower bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) """ # DNA tower dna_tower.append( conv_func( in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True ) ) dna_tower.append(nn.GroupNorm(32, in_channels)) dna_tower.append(nn.ReLU()) """ self.add_module('cls_tower', nn.Sequential(*cls_tower)) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) #self.add_module('dna_tower', nn.Sequential(*dna_tower)) self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # For DNA. way 2. self.identity = nn.Conv2d(6, self.hash_code, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.cls_logits, self.bbox_pred, self.identity, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.DNA.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)]) # For DNA. way 1. """
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(FCOSMaskHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 cls_tower = [] bbox_tower = [] mask_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): cls_tower.append( nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) mask_tower.append( nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) mask_tower.append(nn.GroupNorm(32, in_channels)) mask_tower.append(nn.ReLU()) self.add_module('cls_tower', nn.Sequential(*cls_tower)) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.add_module('mask_tower', nn.Sequential(*mask_tower)) self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) self.bbox_mask = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.mask_tower, self.cls_logits, self.bbox_pred, self.centerness, self.bbox_mask ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
def __init__(self, cfg, in_channels): #in_channels=256*4 """ Arguments: in_channels (int): number of channels of the input feature 此处是fpn每层的输出通道数 都是一样的 """ super(FCOSHead, self).__init__() # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 # 81-1 COCO数据集中总的分类类别为80类 cls_tower = [] #分类层 bbox_tower = [] #回归层 #head部分 for i in range(cfg.MODEL.FCOS.NUM_CONVS): #4 分类层的四层卷积 cls_tower.append( nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) # cls_tower和bbox_tower都是4层的256通道的3×3的卷积层,后加一些GN和Relu cls_tower.append(nn.GroupNorm(32, in_channels)) #紧接着BN层 cls_tower.append(nn.ReLU()) #ReLU bbox_tower.append( #4 回归层的四层卷积 nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) self.add_module('cls_tower', nn.Sequential(*cls_tower)) #分类部分卷积网络构建 head self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) #回归部分卷积网络构建 head # 最终的分类层 最后一个分类层用卷积网络,卷积成81个通道的输出,每个通道对应的是其类别 shape:[H*W*C] self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1) # 最后的边界框预测层 输出四个预测边框数值 r l t b shape:[H*W*4] self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) # shape" [H*W*1] # 最后的中心度预测层 中心度预测与分类预测在一个分支,由分类部分的四个卷积层输出的特征再经过一个卷积层输出一个对应的分数 self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # initialization 这些层里面的卷积参数都进行初始化 for modules in [ self.cls_tower, self.bbox_tower, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): #isinstance是考虑l是不是nn.Conv2d的类型 torch.nn.init.normal_(l.weight, std=0.01) #初始化权重服从正态分布 torch.nn.init.constant_(l.bias, 0) #初始偏置为0 # initialize the bias for focal loss 我只知道分类是用focal loss,可能是一种经验trick? prior_prob = cfg.MODEL.FCOS.PRIOR_PROB #0.01 bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) #分类的偏置初始化 # P3-P7共有5层特征FPN,缩放因子,对回归结果进行缩放 self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(EmbedMaskHead, self).__init__() # TODO: Implement the sigmoid version first. self.fpn_strides = cfg.MODEL.EMBED_MASK.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.EMBED_MASK.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.EMBED_MASK.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.EMBED_MASK.USE_DCN_IN_TOWER num_classes = cfg.MODEL.EMBED_MASK.NUM_CLASSES - 1 embed_dim = cfg.MODEL.EMBED_MASK.EMBED_DIM prior_margin = cfg.MODEL.EMBED_MASK.PRIOR_MARGIN self.init_sigma_bias = math.log(-math.log(0.5) / (prior_margin**2)) cls_tower = [] bbox_tower = [] mask_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.FCOS.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d cls_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) mask_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) mask_tower.append(nn.GroupNorm(32, in_channels)) mask_tower.append(nn.ReLU()) self.add_module('cls_tower', nn.Sequential(*cls_tower)) self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(in_channels, 4, kernel_size=3, stride=1, padding=1) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1) # initialization for modules in [ self.cls_tower, self.bbox_tower, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.EMBED_MASK.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)]) ########### Mask Predictions ############ # proposal embedding self.proposal_spatial_embed_pred = nn.Conv2d(in_channels, 2, kernel_size=3, stride=1, padding=1, bias=True) self.proposal_other_embed_pred = nn.Conv2d(in_channels, embed_dim - 2, kernel_size=3, stride=1, padding=1, bias=True) for modules in [ self.proposal_spatial_embed_pred, self.proposal_other_embed_pred ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # proposal margin self.proposal_margin_pred = nn.Conv2d(in_channels, 1, kernel_size=3, stride=1, padding=1, bias=True) torch.nn.init.normal_(self.proposal_margin_pred.weight, std=0.01) torch.nn.init.constant_(self.proposal_margin_pred.bias, self.init_sigma_bias) # pixel embedding self.add_module('mask_tower', nn.Sequential(*mask_tower)) self.pixel_spatial_embed_pred = nn.Conv2d(in_channels, 2, kernel_size=3, stride=1, padding=1, bias=True) self.pixel_other_embed_pred = nn.Conv2d(in_channels, embed_dim - 2, kernel_size=3, stride=1, padding=1, bias=True) for modules in [ self.mask_tower, self.pixel_spatial_embed_pred, self.pixel_other_embed_pred ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) self.position_scale = Scale(init_value=1.0)
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(YOOOHead, self).__init__() num_classes = cfg.MODEL.YOOO.NUM_CLASSES # - 1 # if -1, its hard to calculate loss when there is no event in this frame, so NOEVENT is one class<=>one channel in the output, not like object event detection, positive sample is sparse in event self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES #[8, 16, 32, 64, 128] self.norm_reg_targets = cfg.MODEL.YOOO.NORM_REG_TARGETS # True: normalizing the regression targets with FPN strides self.centerness_on_reg = cfg.MODEL.YOOO.CENTERNESS_ON_REG #True self.use_dcn_in_tower = cfg.MODEL.YOOO.USE_DCN_IN_TOWER #False self.used_level = [0, 1, 2, 3, 4] # 0,1,2,3,4 p3,p4,p5,p6,p7 cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.YOOO.NUM_CONVS): # range(4) if self.use_dcn_in_tower and \ i == cfg.MODEL.YOOO.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d cls_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( conv_func(in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True)) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) #chwangteng: global average pooling to 1x1x256 cls_tower.append(nn.AdaptiveAvgPool2d(output_size=(1, 1))) bbox_tower.append(nn.AdaptiveAvgPool2d(output_size=(1, 1))) self.add_module('cls_tower_event', nn.Sequential(*cls_tower)) self.add_module('bbox_tower_event', nn.Sequential(*bbox_tower)) self.cls_logits = nn.Conv2d(in_channels, num_classes, kernel_size=1, stride=1, padding=0) self.bbox_pred = nn.Conv2d(in_channels, 2, kernel_size=1, stride=1, padding=0) self.centerness = nn.Conv2d(in_channels, 1, kernel_size=1, stride=1, padding=0) # initialization for modules in [ self.cls_tower_event, self.bbox_tower_event, self.cls_logits, self.bbox_pred, self.centerness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) #scale exp() self.scales = nn.ModuleList( [Scale(init_value=1.0) for _ in range(len(self.used_level))])
def __init__(self, cfg, in_channels): """ Arguments: in_channels (int): number of channels of the input feature """ super(FADFCOSHead, self).__init__() self.cfg = cfg self.fpn_lvl = 5 # TODO: Implement the sigmoid version first. num_classes = cfg.MODEL.FCOS.NUM_CLASSES - 1 self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES self.norm_reg_targets = cfg.MODEL.FCOS.NORM_REG_TARGETS self.centerness_on_reg = cfg.MODEL.FCOS.CENTERNESS_ON_REG self.use_dcn_in_tower = cfg.MODEL.FCOS.USE_DCN_IN_TOWER cls_tower = [] bbox_tower = [] for i in range(cfg.MODEL.FCOS.NUM_CONVS): if self.use_dcn_in_tower and \ i == cfg.MODEL.FCOS.NUM_CONVS - 1: conv_func = DFConv2d else: conv_func = nn.Conv2d cls_tower.append( conv_func( in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True ) ) cls_tower.append(nn.GroupNorm(32, in_channels)) cls_tower.append(nn.ReLU()) bbox_tower.append( conv_func( in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True ) ) bbox_tower.append(nn.GroupNorm(32, in_channels)) bbox_tower.append(nn.ReLU()) # import fad modules from fad_core.modeling.modules.search_rcnn import SearchRCNN from fad_core.modeling.modules.augment_rcnn import AugmentRCNN # --------- cls tower if cfg.MODEL.FAD.CLSTOWER: if cfg.MODEL.FAD.SEARCH: cls_tower = SearchRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_CLS, cfg.MODEL.FAD.NUM_CELLS_CLS, n_nodes=cfg.MODEL.FAD.NUM_NODES_CLS) else: # augment cls_tower = AugmentRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_CLS, cfg.MODEL.FAD.NUM_CELLS_CLS, cfg.MODEL.FAD.GENO_CLS[0]) if cfg.MODEL.FAD.BOXTOWER: if cfg.MODEL.FAD.SEARCH: bbox_tower = SearchRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_BOX, cfg.MODEL.FAD.NUM_CELLS_BOX, n_nodes=cfg.MODEL.FAD.NUM_NODES_BOX) else: # augment bbox_tower = AugmentRCNN(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.FAD.NUM_CHANNELS_BOX, cfg.MODEL.FAD.NUM_CELLS_BOX, cfg.MODEL.FAD.GENO_BOX[0]) if cfg.MODEL.FAD.CLSTOWER: self.add_module('cls_tower', cls_tower) else: self.add_module('cls_tower', nn.Sequential(*cls_tower)) if cfg.MODEL.FAD.BOXTOWER: self.add_module('bbox_tower', bbox_tower) else: self.add_module('bbox_tower', nn.Sequential(*bbox_tower)) # ---- if 1x1 conv to reduce dim first if cfg.MODEL.FAD.CLSTOWER: self.cls_reduce = nn.Conv2d( in_channels*cfg.MODEL.FAD.NUM_NODES_CLS, in_channels, kernel_size=1, stride=1, padding=0 ) self.cls_logits = nn.Conv2d( in_channels, num_classes, kernel_size=3, stride=1, padding=1 ) if cfg.MODEL.FAD.BOXTOWER: self.bbox_pred = nn.Conv2d( in_channels*min(1,cfg.MODEL.FAD.NUM_NODES_BOX), 4, kernel_size=3, stride=1, padding=1 ) self.box_reduce = nn.Conv2d( in_channels*cfg.MODEL.FAD.NUM_NODES_BOX, in_channels, kernel_size=1, stride=1, padding=0 ) else: self.bbox_pred = nn.Conv2d( in_channels, 4, kernel_size=3, stride=1, padding=1 ) self.centerness = nn.Conv2d( in_channels, 1, kernel_size=3, stride=1, padding=1 ) # initialization list_init = [self.cls_logits, self.bbox_pred, self.centerness] if self.cfg.MODEL.FAD.CLSTOWER: list_init.append(self.cls_reduce) else: list_init.append(self.cls_tower) if not self.cfg.MODEL.FAD.BOXTOWER: list_init.append(self.bbox_tower) for modules in list_init: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value) self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(5)])