def __init__(self, cfg): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) if cfg.MODEL.ROI_MASK_HEAD.OVERLAP: self.conv5_overlap = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.overlap_fcn_logits = Conv2d(dim_reduced, 1, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(RoiAlignMaskFeatureExtractor, self).__init__() input_channels = 257 self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1) self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1) self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1: self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0) self.mask_fcn_logits = Conv2d(256, 1, 3, 1, 1) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.conv5_mask, self.mask_fcn_logits ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0) else: self.mask_fcn_logits = None self.conv5_mask = Conv2d(256, 16, 3, 1, 1) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.conv5_mask, ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0)
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.dual_modal = cfg.MODEL.ROI_MASK_HEAD.DUAL_MODAL self.use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN if self.dual_modal: self.conv5_mask = Conv2d(num_inputs, dim_reduced * 4, 3, padding=1) self.pixel_shuffle = nn.PixelShuffle(2) else: self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_final = Conv2d(dim_reduced, num_classes, 1, 1, 0) self.cfg = cfg for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(SeqRCNNC4Predictor, self).__init__() num_classes = 1 # char_num_classes = cfg.MODEL.ROI_MASK_HEAD.CHAR_NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: if cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'CAT': num_inputs = dim_reduced + 1 elif cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'MIX' or cfg.MODEL.ROI_MASK_HEAD.MIX_OPTION == 'ATTENTION_CHANNEL': num_inputs = dim_reduced * 2 else: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) if cfg.SEQUENCE.SEQ_ON: # self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) self.seq = make_roi_seq_predictor(cfg, dim_reduced) # else: # self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, num_inputs=256, dim_reduced=256, num_conv=0, no_transform1=False, first_kernel=3, no_relu=False, use_leaky_relu=False): super(DeConvUpSampler, self).__init__() self.first_kernel = first_kernel self.no_relu = no_relu self.use_leaky_relu = use_leaky_relu if no_transform1: self.transform1 = EmptyBlock() else: self.transform1 = self.build_transform(num_inputs, dim_reduced, dim_reduced, num_conv) self.deconv1 = ConvTranspose2d(dim_reduced, dim_reduced, 2, 2, 0) self.transform2 = self.build_transform(dim_reduced, dim_reduced, dim_reduced, num_conv) self.deconv2 = ConvTranspose2d(dim_reduced, num_inputs, 2, 2, 0) for modules in [self.transform1.modules(), self.transform2.modules(), [self.deconv1, self.deconv2]]: for l in modules: if isinstance(l, (nn.Conv2d, nn.ConvTranspose2d)): nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0)
def __init__(self, cfg): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor if cfg.MODEL.ROI_MASK_HEAD.USE_DECONV: block = cfg.MODEL.DECONV.BLOCK if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm': self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) self.conv5_mask = DeconvTransposed( num_inputs, dim_reduced, 2, 2, 0, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.mask_fcn_logits = Deconv( dim_reduced, num_classes, 1, 1, 0, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) else: self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, predictor): super(MaskRelationRefineNet, self).__init__() self.cfg = cfg.clone() hide_dim = ( 784, ) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1 else (int( 14 * 14 * 16), ) self.output_channel = 784 if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL == 1 else int( 14 * 14 * 16) self.relation_hw = 14 if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL != 1 else 28 self.appearance_feature_extractor = make_relation_mask_feature_extractor( cfg) self.prepare_sort_by_cluster = False self.num_center_per_class = 1 if self.cfg.MODEL.RELATION_MASK.IOU_COOR == True and self.geo_feature_dim == 4: self.geo_feature_dim = 5 if cfg.MODEL.RELATION_MASK.IOU_COOR and self.geo_feature_dim > 5: self.geo_feature_dim = int(self.geo_feature_dim / 4 * 5) self.boxcoder = BoxCoder(weights=(10., 10., 5., 5.)) # self.class_agnostic = cfg.MODEL.RELATION_NMS.CLASS_AGNOSTIC self.fg_class = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES - 1 # in_channel = int(16 * 14 * 14) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL!=1 else () self.classifier = nn.Conv2d(cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, 3, 1) # self.classifier = nn.Linear(128, int(cfg.MODEL.ROI_MASK_HEAD.RESOLUTION * cfg.MODEL.ROI_MASK_HEAD.RESOLUTION), bias=True) if cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL != 1: self.deconv_1 = ConvTranspose2d(16, 16, 2, 2, 0) # self.detections_per_img = cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG self.iter = 0 if self.cfg.MODEL.RELATION_MASK.TYPE == 'CAM': self.relation_module = CAM_Module(128) elif self.cfg.MODEL.RELATION_MASK.TYPE == 'CIAM': self.relation_module = CIAM_Module(cfg) if self.cfg.MODEL.RELATION_MASK.SAME_PREDICTOR: self.predictor = predictor else: self.deconv_1 = ConvTranspose2d( cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, 2, 2, 0) self.classifier = nn.Conv2d( cfg.MODEL.RELATION_MASK.EXTRACTOR_CHANNEL, 3, 1, 1, 0)
def __init__(self, cfg): super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION pooler = Pooler( output_size=(resolution, resolution), scales=cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES, sampling_ratio=cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO, ) self.pooler = pooler layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS next_feature = cfg.MODEL.BACKBONE.OUT_CHANNELS self.blocks = [] self.use_attn = False if cfg.MODEL.ROI_MASK_HEAD.ATTN == "" else True # Determine whether upsampling is necessary from the resolution # if cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / (2.0 * resolution) == 2.0: # use_upsample = True # else: # use_upsample = False use_upsample = \ True if (cfg.MODEL.ROI_MASK_HEAD.RESOLUTION / resolution) == 4.0 \ else False for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) # if layer_idx % 2 == 1 and use_upsample: # module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0) # else: # module = Conv2d(next_feature, layer_features, 3, 1, 1) if layer_idx == 3 and use_upsample: module = ConvTranspose2d(next_feature, layer_features, 2, 2, 0) else: module = Conv2d(next_feature, layer_features, 3, 1, 1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) if self.use_attn and layer_idx in [2]: attn_name = "mask_attn{}".format(layer_idx) size = (layer_features, resolution, resolution) self.add_module(attn_name, RoIAttnModule(cfg, size)) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): input_channels = 256 self.mask_fcn1 = Conv2d(input_channels, 256, 3, 1, 1) self.mask_fcn2 = Conv2d(256, 256, 3, 1, 1) self.mask_fcn3 = Conv2d(256, 256, 3, 1, 1) self.conv5_mask = ConvTranspose2d(256, 256, 2, 2, 0) self.mask_fcn_logits = Conv2d(256, 1, 1, 1, 0) for l in [ self.mask_fcn1, self.mask_fcn2, self.mask_fcn3, self.mask_fcn4, self.conv5_mask, self.mask_fcn_logits ]: nn.init.kaiming_normal_(l.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(l.bias, 0)
def __init__(self, cfg, in_channels): super(MaskXRCNNC4Predictor, self).__init__() # num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.USE_MLPMASK = cfg.MODEL.ROI_MASK_HEAD.USE_MLPMASK self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) # self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) if self.USE_MLPMASK: self.MLP_mask = nn.Linear(256 * 28 * 28, 28 * 28) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(BOUNDARYRCNNC4Predictor, self).__init__() dim_reduced = cfg.MODEL.ROI_BOUNDARY_HEAD.CONV_LAYERS[-1] self.resol = cfg.MODEL.ROI_BOUNDARY_HEAD.RESOLUTION # 56 if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2 ** (stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS #256 num_inputs = res2_out_channels * stage2_relative_factor self.bo_input_xy = Conv2d(num_inputs, num_inputs, 1, 1, 0) nn.init.kaiming_normal_(self.bo_input_xy.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.bo_input_xy.bias, 0) self.conv5_bo_xy = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) nn.init.kaiming_normal_(self.conv5_bo_xy.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.conv5_bo_xy.bias, 0) self.bo_input_1_1 = Conv2d(dim_reduced, dim_reduced, 1, 1, 0) nn.init.kaiming_normal_(self.bo_input_1_1.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.bo_input_1_1.bias, 0) self.bo_input_2_1 = Conv2d(dim_reduced, dim_reduced, 1, 1, 0) nn.init.kaiming_normal_(self.bo_input_2_1.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.bo_input_2_1.bias, 0) self.conv5_bo_x = Conv2d(dim_reduced, 1, (3, 1), 1, (1,0)) # H W nn.init.kaiming_normal_(self.conv5_bo_x.weight, mode='fan_out', nonlinearity='relu') # 'relu' nn.init.constant_(self.conv5_bo_x.bias, 0) self.conv5_bo_y = Conv2d(dim_reduced, 1, (1, 3), 1, (0,1)) # H W nn.init.kaiming_normal_(self.conv5_bo_y.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.conv5_bo_y.bias, 0) self.up_scale=2
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels # if cfg.MODEL.COARSE_ON and not cfg.MODEL.BBOXFeature_ON: # num_inputs =2048#in_channels # if cfg.MODEL.COARSE_ON and cfg.MODEL.BBOXFeature_ON: # num_inputs = 3072 # in_channels self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") self.n_class = num_classes self.maxpool_col = nn.AdaptiveMaxPool2d((28, 1)) self.maxpool_row = nn.AdaptiveMaxPool2d((1, 28))
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES # 81 dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] # 256 num_inputs = in_channels # 256 # 转置卷积, 上采样两倍, 14-->28 self.conv5_mask = ConvTranspose2d(in_channels=num_inputs, out_channels=dim_reduced, kernel_size=2, stride=2, padding=0) # 1x1卷积 self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, in_channels): super(KeypointRCNNPredictor, self).__init__() self.in_channels = in_channels self.num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES self.num_convs = 4 self.point_feat_channels = 32 self.conv_out_channels = self.point_feat_channels * self.num_keypoints conv_kernel_size = 3 conv_kernel_size1 = 5 deconv_kernel_size = 4 # deconv_kernel = 4 # self.kps_score_lowres = layers.ConvTranspose2d( # input_features, # num_keypoints, # deconv_kernel, # stride=2, # padding=deconv_kernel // 2 - 1, # ) # nn.init.kaiming_normal_( # self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" # ) # nn.init.constant_(self.kps_score_lowres.bias, 0) # self.up_scale = 2 # self.out_channels = num_keypoints self.convs = [] for i in range(self.num_convs): _in_channels = (self.in_channels if i == 0 else self.conv_out_channels) strides = 1 padding = (conv_kernel_size - 1) // 2 self.convs.append( nn.Sequential( Conv2d(_in_channels, self.conv_out_channels, conv_kernel_size, strides, padding), GroupNorm(32, self.conv_out_channels), nn.ReLU(inplace=True))) self.convs = nn.Sequential(*self.convs) # self.convs1 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size1 - 1) // 2 # self.convs1.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size1, # strides, # padding), # GroupNorm(32, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs1 = nn.Sequential(*self.convs1) # self.convs2 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size1 - 1) // 2 # self.convs2.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size1, # strides, # padding), # GroupNorm(32, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs2 = nn.Sequential(*self.convs2) # self.updeconv1_1 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.norm1 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2) # self.updeconv1_2 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.norm2 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2) # self.updeconv2_1 = ConvTranspose2d( # self.conv_out_channels // 2, # self.num_keypoints // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.updeconv2_2 = ConvTranspose2d( # self.conv_out_channels // 2, # self.num_keypoints // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) self.updeconv1_ = ConvTranspose2d(self.conv_out_channels, self.conv_out_channels, kernel_size=deconv_kernel_size, stride=2, padding=(deconv_kernel_size - 2) // 2, groups=self.num_keypoints) self.norm1 = GroupNorm(self.num_keypoints, self.conv_out_channels) self.updeconv2_ = ConvTranspose2d(self.conv_out_channels, self.num_keypoints, kernel_size=deconv_kernel_size, stride=2, padding=(deconv_kernel_size - 2) // 2, groups=self.num_keypoints) # self.conv_guide = Conv2d( # self.conv_out_channels, # self.conv_out_channels, # 3, # 1, # 1) # self.dcn = DFConv2d_guide(self.conv_out_channels, # self.num_keypoints, # groups=self.num_keypoints) # self.norm2 = GroupNorm(self.num_keypoints, self.conv_out_channels) # self.final_conv = Conv2d( # self.conv_out_channels, # self.num_keypoints, # 1, # 1, # 0, # groups=self.num_keypoints) # self.conv_offset = Conv2d( # self.conv_out_channels, # self.num_keypoints * 2, # 1, # 1, # 0, # groups=self.num_keypoints) # self.convs_1 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size - 1) // 2 # self.convs_1.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size, # strides, # padding), # GroupNorm(36, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs_1 = nn.Sequential(*self.convs_1) # self.updeconv1_1 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints) # self.norm1_1 = GroupNorm(self.num_keypoints, self.conv_out_channels) # self.updeconv2_1 = ConvTranspose2d( # self.conv_out_channels, # self.num_keypoints, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints) # #TODO 20201015 # self.neighbor_points = [] # grid_size = 3 # for i in range(grid_size): # i-th column # for j in range(grid_size): # j-th row # neighbors = [] # if i > 0: # left: (i - 1, j) # neighbors.append((i - 1) * grid_size + j) # if j > 0: # up: (i, j - 1) # neighbors.append(i * grid_size + j - 1) # if j < grid_size - 1: # down: (i, j + 1) # neighbors.append(i * grid_size + j + 1) # if i < grid_size - 1: # right: (i + 1, j) # neighbors.append((i + 1) * grid_size + j) # self.neighbor_points.append(tuple(neighbors)) # self.forder_trans = nn.ModuleList() # first-order feature transition # self.sorder_trans = nn.ModuleList() # second-order feature transition # for neighbors in self.neighbor_points: # fo_trans = nn.ModuleList() # so_trans = nn.ModuleList() # for _ in range(len(neighbors)): # # each transition module consists of a 5x5 depth-wise conv and # # 1x1 conv. # fo_trans.append( # nn.Sequential( # Conv2d( # self.point_feat_channels, # self.point_feat_channels, # 5, # stride=1, # padding=2, # groups=self.point_feat_channels), # Conv2d(self.point_feat_channels, # self.point_feat_channels, 1))) # so_trans.append( # nn.Sequential( # Conv2d( # self.point_feat_channels, # self.point_feat_channels, # 5, # 1, # 2, # groups=self.point_feat_channels), # Conv2d(self.point_feat_channels, # self.point_feat_channels, 1))) # self.forder_trans.append(fo_trans) # self.sorder_trans.append(so_trans) # representation_size = 14 * 14 * 288 # self.keypoints_weight = nn.Linear(representation_size, self.num_keypoints) # nn.init.normal_(self.cls_score.weight, std=0.01) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data) if m.bias is not None: nn.init.constant_(m.bias, 0) for m in self.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, std=0.001) if m.bias is not None: m.bias.data.zero_() # nn.init.constant_(self.final_conv.bias,-np.log(0.99/0.01)) # nn.init.constant_(self.dcn.bias,-np.log(0.99/0.01)) nn.init.constant_(self.updeconv2_.bias, -np.log(0.99 / 0.01))
def __init__(self, cfg): super(KERCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_KE_HEAD.CONV_LAYERS[-1] self.resol = cfg.MODEL.ROI_KE_HEAD.RESOLUTION if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2**(stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor assert (cfg.MODEL.ROI_KE_HEAD.NUM_KES % 2 == 0), 'require plural but got {}'.format( str(cfg.MODEL.ROI_KE_HEAD.NUM_KES)) NumPred = int(cfg.MODEL.ROI_KE_HEAD.NUM_KES / 2 + 2) self.ke_input_xy = Conv2d(num_inputs, num_inputs, 1, 1, 0) nn.init.kaiming_normal_(self.ke_input_xy.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.ke_input_xy.bias, 0) self.conv5_ke_xy = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) nn.init.kaiming_normal_(self.conv5_ke_xy.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.conv5_ke_xy.bias, 0) self.conv5_ke_x_shrink = Conv2d(dim_reduced, NumPred, (self.resol, 1), 1, 0) # H W nn.init.kaiming_normal_(self.conv5_ke_x_shrink.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.conv5_ke_x_shrink.bias, 0) self.conv5_ke_y_shrink = Conv2d(dim_reduced, NumPred, (1, self.resol), 1, 0) # H W nn.init.kaiming_normal_(self.conv5_ke_y_shrink.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.conv5_ke_y_shrink.bias, 0) # mt branch self.cat_trans = Conv2d(dim_reduced, cfg.MODEL.ROI_KE_HEAD.NUM_KES, 1, 1, 0) nn.init.kaiming_normal_(self.cat_trans.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.cat_trans.bias, 0) self.mty = Conv2d(cfg.MODEL.ROI_KE_HEAD.NUM_KES, cfg.MODEL.ROI_KE_HEAD.NUM_MATCHTYPE, (int(self.resol / 2), int(self.resol / 2)), 1, 0) nn.init.kaiming_normal_(self.mty.weight, mode='fan_out', nonlinearity='relu') nn.init.constant_(self.mty.bias, 0) self.up_scale = 2