def __init__(self, cfg, norm_func): super(BaseStem, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS self.conv1 = Conv2d(3, out_channels, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_func(out_channels) for l in [ self.conv1, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() # resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION if cfg.MODEL.CHAR_MASK_ON: resolution_h = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_H resolution_w = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION_W else: resolution_h = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION resolution_w = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution_h, resolution_w), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(MaskRCNN_panet_Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES num_inputs = in_channels self.cfg = cfg self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(StemWithSyncBN, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS self.conv1 = Conv2d( 3, out_channels, kernel_size=7, stride=2, padding=3, ) self.bn1 = SyncBatchNorm2d(out_channels) for l in [ self.conv1, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def __init__(self, cfg, in_channels): super(DepthMaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_DEPTHMASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def make_conv( in_channels, out_channels, kernel_size, stride=1, dilation=1 ): if use_deconv: conv = Deconv( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation * (kernel_size - 1) // 2, dilation=dilation, bias=True, block=block, sampling_stride=sampling_stride, sync=sync, norm_type=norm_type, ) else: conv = Conv2d( in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=dilation * (kernel_size - 1) // 2, dilation=dilation, bias=False if use_gn else True ) # Caffe2 implementation uses XavierFill, which in fact # corresponds to kaiming_uniform_ in PyTorch nn.init.kaiming_uniform_(conv.weight, a=1) if (not (use_gn or use_gw)) or use_deconv: nn.init.constant_(conv.bias, 0) module = [conv,] if not use_deconv: if use_gn: module.append(group_norm(out_channels)) if use_gw: module.append(Whitening_IGWItN(out_channels)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor_Upsample, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] num_inputs = in_channels self.conv5_mask = nn.Sequential( nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), nn.Conv2d(num_inputs, dim_reduced, 3, 1, 1), ) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, norm_func): super(BaseStem, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS stride = cfg.MODEL.RESNETS.STEM_STRIDE self.conv1 = Conv2d(3, out_channels, kernel_size=7, stride=stride, padding=3, bias=False) self.bn1 = norm_func(out_channels) for l in [ self.conv1, ]: nn.init.kaiming_uniform_(l.weight, a=1) self.remove_max_pooling = cfg.MODEL.RESNETS.REMOVE_STEM_POOL # add by hui
def __init__(self, cfg, norm_func): super(BaseStem, self).__init__() out_channels = cfg.MODEL.RESNETS.STEM_OUT_CHANNELS in_channels = cfg.MODEL.RESNETS.STEM_IN_CHANNELS # if cfg.MODEL.RGB_ON and cfg.MODEL.DEPTH_ON: # in_channels = cfg.MODEL.RESNETS.STEM_RGBDIN_CHANNELS # if isrgb:#cfg.MODEL.RGB_ON and not cfg.MODEL.DEPTH_ON:#if isrgb: # in_channels = cfg.MODEL.RESNETS.STEM_IN_CHANNELS self.conv1 = Conv2d(in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_func(out_channels) for l in [ self.conv1, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1, batch_norm=FrozenBatchNorm2d): modules = [] for i in range(convs): modules.extend([ Conv2d(inplanes, planes, kernel_size=3, stride=stride if i == 0 else 1, padding=dilation, bias=False, dilation=dilation), batch_norm(planes), nn.ReLU(inplace=True) ]) inplanes = planes return nn.Sequential(*modules)
def __init__(self, cfg, in_channels): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES # 81 dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] # 256 num_inputs = in_channels # 256 # 转置卷积, 上采样两倍, 14-->28 self.conv5_mask = ConvTranspose2d(in_channels=num_inputs, out_channels=dim_reduced, kernel_size=2, stride=2, padding=0) # 1x1卷积 self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg): super(MaskRCNNC4Predictor, self).__init__() num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] if cfg.MODEL.ROI_HEADS.USE_FPN: num_inputs = dim_reduced else: stage_index = 4 stage2_relative_factor = 2 ** (stage_index - 1) res2_out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS num_inputs = res2_out_channels * stage2_relative_factor self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def make_conv3x3( in_channels, out_channels, dilation=1, stride=1, use_gn=False, use_relu=False, use_bias=True, kaiming_init=True, adaptive_group_norm=False, ): conv = Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, bias=False if not use_bias or use_gn else True) if kaiming_init: nn.init.kaiming_normal_(conv.weight, mode="fan_out", nonlinearity="relu") else: torch.nn.init.normal_(conv.weight, std=0.01) if not use_gn and use_bias: nn.init.constant_(conv.bias, 0) module = [ conv, ] if use_gn: module.append(group_norm(out_channels, adaptive=adaptive_group_norm)) if use_relu: module.append(nn.ReLU(inplace=True)) if len(module) > 1: return nn.Sequential(*module) return conv
def __init__(self, cfg, in_channels): super(KeypointRCNNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.pooler = pooler input_features = in_channels layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "conv_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, levels, channels, num_classes=1000, in_chans=3, cardinality=1, base_width=64, block=DlaBottle2neck, residual_root=False, linear_root=False, batch_norm=FrozenBatchNorm2d, drop_rate=0.0, global_pool='avg', feature_only=True, dcn_config=(False, )): super(DLA, self).__init__() self.channels = channels self.num_classes = num_classes self.cardinality = cardinality self.base_width = base_width self.drop_rate = drop_rate # check whether deformable conv config is right if len(dcn_config) != 6: raise ValueError("Deformable configuration is not correct, " "every level should specifcy a configuration.") self.base_layer = nn.Sequential( Conv2d(in_chans, channels[0], kernel_size=7, stride=1, padding=3, bias=False), batch_norm(channels[0]), nn.ReLU(inplace=True)) self.level0 = self._make_conv_level(channels[0], channels[0], levels[0], batch_norm=batch_norm) self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2, batch_norm=batch_norm) cargs = dict(cardinality=cardinality, base_width=base_width, root_residual=residual_root, batch_norm=batch_norm) self.level2 = DlaTree(levels[2], block, channels[1], channels[2], 2, level_root=False, with_dcn=dcn_config[2], **cargs) self.level3 = DlaTree(levels[3], block, channels[2], channels[3], 2, level_root=True, with_dcn=dcn_config[3], **cargs) self.level4 = DlaTree(levels[4], block, channels[3], channels[4], 2, level_root=True, with_dcn=dcn_config[4], **cargs) self.level5 = DlaTree(levels[5], block, channels[4], channels[5], 2, level_root=True, with_dcn=dcn_config[5], **cargs) if not feature_only: self.num_features = channels[-1] self.global_pool = SelectAdaptivePool2d(pool_type=global_pool) self.fc = nn.Conv2d(self.num_features * self.global_pool.feat_mult(), num_classes, 1, bias=True)
def __init__(self, levels, block, in_channels, out_channels, stride=1, dilation=1, cardinality=1, base_width=64, level_root=False, root_dim=0, root_kernel_size=1, root_residual=False, batch_norm=FrozenBatchNorm2d, with_dcn=False): super(DlaTree, self).__init__() if root_dim == 0: root_dim = 2 * out_channels if level_root: root_dim += in_channels cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width, batch_norm=batch_norm, with_dcn=with_dcn) if levels == 1: self.tree1 = block(in_channels, out_channels, stride, **cargs) self.tree2 = block(out_channels, out_channels, 1, **cargs) else: cargs.update( dict(root_kernel_size=root_kernel_size, root_residual=root_residual)) self.tree1 = DlaTree(levels - 1, block, in_channels, out_channels, stride, root_dim=0, **cargs) self.tree2 = DlaTree(levels - 1, block, out_channels, out_channels, root_dim=root_dim + out_channels, **cargs) if levels == 1: self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual, batch_norm=batch_norm) self.level_root = level_root self.root_dim = root_dim self.downsample = nn.MaxPool2d(stride, stride=stride) if stride > 1 else None self.project = None if in_channels != out_channels: self.project = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False), batch_norm(out_channels)) self.levels = levels
def __init__(self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, dcn_config, dw_config): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False), norm_func(out_channels), ) for modules in [ self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above with_dcn = dcn_config.get("stage_with_dcn", False) if with_dcn: deformable_groups = dcn_config.get("deformable_groups", 1) with_modulated_dcn = dcn_config.get("with_modulated_dcn", False) self.conv2 = DFConv2d(bottleneck_channels, bottleneck_channels, with_modulated_dcn=with_modulated_dcn, kernel_size=3, stride=stride_3x3, groups=num_groups, dilation=dilation, deformable_groups=deformable_groups, bias=False) else: self.conv2 = Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) nn.init.kaiming_uniform_(self.conv2.weight, a=1) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = norm_func(out_channels) self.with_dw = dw_config.get("stage_with_dw", False) if self.with_dw: self.insert_pos = dw_config.get('insert_pos', 'after1x1') assert self.insert_pos in ['after1x1', 'after3x3', 'afterAdd'] if self.insert_pos == 'afterAdd': dw_block = DynamicWeightsCat11 dw_channels = out_channels elif self.insert_pos == 'after3x3': dw_block = ReDynamicWeightsCat33 #ReDynamicWeightsCat33, DeformDGMN dw_channels = bottleneck_channels dw_group = dw_config.get('group', 1) dw_kernel = dw_config.get('kernel', 3) dw_dilation = dw_config.get('dilation', (1, 4, 8, 12)) dw_shuffle = dw_config.get('shuffle', False) dw_deform = dw_config.get('deform', 'none') self.dw_block = dw_block(channels=dw_channels, group=dw_group, kernel=dw_kernel, dilation=dw_dilation, shuffle=dw_shuffle, deform=dw_deform) else: self.dw_block = None for l in [ self.conv1, self.conv3, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def conv_1x1_bn(inp, oup): return nn.Sequential(Conv2d(inp, oup, 1, 1, 0, bias=False), BatchNorm2d(oup), nn.ReLU6(inplace=True))
def conv_bn(inp, oup, stride): return nn.Sequential(Conv2d(inp, oup, 3, stride, 1, bias=False), BatchNorm2d(oup), nn.ReLU6(inplace=True))
def __init__(self, cfg): super(Panoptic_FPN_Segmentation_Branch, self).__init__() self.cfg = cfg.clone() assert 'FPN' in cfg.MODEL.BACKBONE.CONV_BODY, 'Segmentation Branch should build on FPN backbone' # Resnet backbone has 4 stages self.upsample_level1 = nn.Sequential( Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True)) self.upsample_level2 = nn.Sequential( Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) self.upsample_level3 = nn.Sequential( Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) self.upsample_level4 = nn.Sequential( Conv2d(cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True), Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, 3, 1, 1), nn.GroupNorm(num_groups=32, num_channels=cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL), nn.ReLU(True), nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)) if cfg.MODEL.SEG_BRANCH.MERGE_OP == "add": self.to_segment_conv = Conv2d(cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL, cfg.MODEL.SEG_BRANCH.CLS_NUM, 1, 1) else: self.to_segment_conv = Conv2d( cfg.MODEL.SEG_BRANCH.DECODER_CHANNEL * 4, cfg.MODEL.SEG_BRANCH.CLS_NUM, 1, 1) self.to_segment_upsample = nn.Upsample(scale_factor=4, mode='bilinear', align_corners=True)
def __init__( self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func ): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d( in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False ), norm_func(out_channels), ) for modules in [self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation ) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False ) self.bn3 = norm_func(out_channels) for l in [self.conv1, self.conv2, self.conv3, ]: nn.init.kaiming_uniform_(l.weight, a=1) self.layers_ = out_channels # if out_channels != 256 and cfg.SNL_ATTENTION: if cfg.SNL_ATTENTION: if cfg.ADD_C2: self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1) self.Wv_layer = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1) for l in [self.Wk_layer, self.Wv_layer]: nn.init.kaiming_uniform_(l.weight, a=1) elif out_channels != 256: self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1) self.Wv_layer = nn.Conv2d(out_channels, out_channels, kernel_size=1, stride=1) for l in [self.Wk_layer, self.Wv_layer]: nn.init.kaiming_uniform_(l.weight, a=1) if cfg.GC_ATTENTION: if cfg.ADD_C2: self.ratios = 4 self.planes = out_channels // self.ratios self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1) self.softmax = nn.Softmax(dim=2) self.channel_add_conv = nn.Sequential( nn.Conv2d(out_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, out_channels, kernel_size=1) ) for l in [self.Wk_layer]: nn.init.kaiming_uniform_(l.weight, a=1) for l in [self.channel_add_conv[0], self.channel_add_conv[-1]]: nn.init.constant_(l.weight, 0) nn.init.constant_(l.bias, 0) elif out_channels != 256: self.ratios = 4 self.planes = out_channels // self.ratios self.Wk_layer = nn.Conv2d(out_channels, 1, kernel_size=1, stride=1) self.softmax = nn.Softmax(dim=2) self.channel_add_conv = nn.Sequential( nn.Conv2d(out_channels, self.planes, kernel_size=1), nn.LayerNorm([self.planes, 1, 1]), nn.ReLU(inplace=True), # yapf: disable nn.Conv2d(self.planes, out_channels, kernel_size=1) ) for l in [self.Wk_layer]: nn.init.kaiming_uniform_(l.weight, a=1) for l in [self.channel_add_conv[0], self.channel_add_conv[-1]]: nn.init.constant_(l.weight, 0) nn.init.constant_(l.bias, 0)
def __init__(self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, reduction=16): super(SEBottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False), norm_func(out_channels), ) for modules in [ self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above self.conv2 = Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = norm_func(out_channels) for l in [ self.conv1, self.conv2, self.conv3, ]: nn.init.kaiming_uniform_(l.weight, a=1) self.relu = nn.ReLU(inplace=True) self.se = SELayer(out_channels, reduction)
def __init__( self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, use_dcn ): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d( in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False ), norm_func(out_channels), ) for modules in [self.downsample,]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.use_dcn = use_dcn self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above # DCN if not self.use_dcn: # pdb.set_trace() # print("stride_3x3: {}".format(stride_3x3)) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation ) ########################## DCN ########################## elif self.use_dcn: deformable_groups = 1 offset_channels = 18 conv_op = DeformConv self.conv2_offset = nn.Conv2d( bottleneck_channels, deformable_groups * offset_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) self.conv2 = conv_op( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, dilation=dilation, deformable_groups=deformable_groups, bias=False) # pdb.set_trace() else: # with_modulated_dcn pass ########################## DCN ########################## self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False ) self.bn3 = norm_func(out_channels) for l in [self.conv1, self.conv2, self.conv3,]: nn.init.kaiming_uniform_(l.weight, a=1)
def __init__(self, cfg): super(VLineHead, self).__init__() self.feature_extractor = make_vline_feature_extractor(cfg) self.detections_per_img = cfg.MODEL.VLINE_HEAD.DETECTIONS_PER_IMG num_backbon_feats_dim = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS num_extract_feats_dim = cfg.MODEL.VLINE_HEAD.NUM_EXTRACT_FEATS_DIM num_feats_linear = cfg.MODEL.VLINE_HEAD.NUM_FIRST_LINEAR self.use_first_linear = cfg.MODEL.VLINE_HEAD.USE_FIRST_LINEAR num_feats_global = 256 num_linear_another = 512 num_vlines = cfg.MODEL.VLINE_HEAD.BINS if not cfg.MODEL.VLINE_HEAD.USE_FBP else 20 # print("num_vlines: ", num_vlines) self.num_boundary = cfg.MODEL.VLINE_HEAD.NUM_BOUDARY self.use_stack = cfg.MODEL.VLINE_HEAD.USE_STACK self.use_eye = cfg.MODEL.VLINE_HEAD.USE_EYE self.use_global = cfg.MODEL.VLINE_HEAD.USE_GLOBAL self.train_nonrf = cfg.MODEL.VLINE_HEAD.TRAIN_NONRF if self.use_stack: print("using self.use_stack!!!!!!!!!!!!") print("using self.use_stack!!!!!!!!!!!!") print("using self.use_stack!!!!!!!!!!!!") num_stack_out = 256 if self.use_first_linear: self.stack_mean = nn.Linear(num_feats_linear * 2, num_stack_out) self.stack_mean_vert = nn.Linear(num_feats_linear * 2, num_stack_out) self.stack_max = nn.Linear(num_feats_linear * 2, num_stack_out) self.stack_max_vert = nn.Linear(num_feats_linear * 2, num_stack_out) else: self.stack_mean = nn.Linear(num_extract_feats_dim * 2, num_stack_out) self.stack_mean_vert = nn.Linear(num_extract_feats_dim * 2, num_stack_out) self.stack_max = nn.Linear(num_extract_feats_dim * 2, num_stack_out) self.stack_max_vert = nn.Linear(num_extract_feats_dim * 2, num_stack_out) self.use_indsgroupmap = cfg.MODEL.VLINE_HEAD.USE_INDSGROUPMAP if self.use_indsgroupmap: self.vline_pooling_mean = VLinePooling().apply self.vline_pooling_max = VLinePooling4().apply else: self.vline_pooling_mean = VLinePooling2() self.vline_pooling_max = VLinePooling3().apply num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES num_vline_classes = 1 if cfg.MODEL.CLS_AGNOSTIC_VLINE_REG else num_classes if self.use_first_linear: d_vline_feats = num_feats_linear * 4 d_vline_feats2 = num_feats_linear * 2 self.linear = nn.Linear(num_extract_feats_dim, num_feats_linear) self.linear_max = nn.Linear(num_extract_feats_dim, num_feats_linear) self.linear_vert = nn.Linear(num_extract_feats_dim, num_feats_linear) self.linear_vert_max = nn.Linear(num_extract_feats_dim, num_feats_linear) else: d_vline_feats = num_extract_feats_dim * 4 d_vline_feats2 = num_extract_feats_dim * 2 if self.use_eye: self.eye = torch.tensor(np.eye(num_vlines)).float() d_vline_feats += num_vlines d_vline_feats2 += num_vlines if self.use_global: d_vline_feats += num_feats_global d_vline_feats2 += num_feats_global self.classifier = nn.Linear(d_vline_feats, num_linear_another) self.classifier_vert = nn.Linear(d_vline_feats, num_linear_another) self.classifier_another = nn.Linear( num_linear_another, self.num_boundary * num_vline_classes) self.classifier_vert_another = nn.Linear( num_linear_another, self.num_boundary * num_vline_classes) self.classifier_mean = nn.Linear(d_vline_feats2, self.num_boundary * num_vline_classes) self.classifier_mean_vert = nn.Linear( d_vline_feats2, self.num_boundary * num_vline_classes) self.classifier_max = nn.Linear(d_vline_feats2, self.num_boundary * num_vline_classes) self.classifier_max_vert = nn.Linear( d_vline_feats2, self.num_boundary * num_vline_classes) self.loss_evaluator = make_vline_loss_evaluator(cfg) self.post_processor = make_vline_post_processor(cfg) input_size = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS next_feature = input_size self.blocks = [] # TODO(H): If it's possible to remove these convs when extracting feats? # Or maybe let's use the one trained on mask? layers = cfg.MODEL.VLINE_HEAD.CONV_LAYERS for layer_idx, layer_features in enumerate(layers, 1): layer_name = "vp_mask_fcn{}".format(layer_idx) module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.logger = logging.getLogger("maskrcnn_benchmark.trainer") self.logger.info("Get logger in model") num_pooler_resol = cfg.MODEL.VLINE_HEAD.POOLER_RESOLUTION # Global: Choice of 1 channel filter # self.conv_global = Conv2d(num_backbon_feats_dim, 1, 1) # self.linear_global = nn.Linear(num_pooler_resol*num_pooler_resol, num_feats_global) # Global: Choice of pooling (same as box regre head) # self.avgpool = nn.AdaptiveAvgPool2d(1) # self.linear_global = nn.Linear(num_backbon_feats_dim, num_feats_global) # Global: Choice of pooling (same as shape head) ker_size = 8 strid = 8 self.avgpool = nn.AvgPool2d(kernel_size=ker_size, stride=strid) num_inputs_global = int(np.floor( (num_pooler_resol - ker_size) / strid)) + 1 self.linear_global = nn.Linear( num_inputs_global * num_inputs_global * num_backbon_feats_dim, num_feats_global) # nn.init.normal_(self.linear_global.weight, mean=0, std=0.01) # nn.init.constant_(self.linear_global.bias, 0) self.softmax = nn.Softmax(dim=1)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(PANetMaskBranch, self).__init__() pooler = make_mask_pooler(cfg) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler parallel_layers = (256, 256, 256, 256) common_fcn_layers = (256, 256) # Parallel Block: 4 parallel fcn1 self.parallel_block = [] for layer_idx, layer_features in enumerate(parallel_layers, 1): layer_name = "mask_fcn_parallel{}".format(layer_idx) module = Conv2d(input_size, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) self.parallel_block.append(layer_name) # Common Block: fcn2, fcn3 self.common_blocks = [] for layer_idx, layer_features in enumerate(common_fcn_layers, 2): layer_name = "mask_fcn{}".format(layer_idx) module = Conv2d(layer_features, layer_features, 3, stride=1, padding=1) # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(module.bias, 0) self.add_module(layer_name, module) self.common_blocks.append(layer_name) layer_features = 256 # FCN branch: fcn4 + original_mask_predictor self.mask_fcn_4 = Conv2d(layer_features, layer_features, 3, stride=1, padding=1) nn.init.kaiming_normal_(self.mask_fcn_4.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(self.mask_fcn_4.bias, 0) self.mask_predictor = make_roi_mask_predictor(cfg) # FC Branch: conv4_fc, conv5_fc, fc # fc_fcn_layers = (256, 128) # self.fc_blocks = [] # for layer_idx, layer_features in enumerate(fc_fcn_layers, 4): # layer_name = "mask_fc_fcn{}".format(layer_idx) # module = Conv2d(layer_features, layer_features, 3, stride=1, padding=1) # # Caffe2 implementation uses MSRAFill, which in fact # # corresponds to kaiming_normal_ in PyTorch # nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") # nn.init.constant_(module.bias, 0) # self.add_module(layer_name, module) # self.fc_blocks.append(layer_name) # FC Branch: conv4_fc, conv5_fc, fc self.mask_fc_fcn_4 = Conv2d(256, 256, 3, stride=1, padding=1) nn.init.kaiming_normal_(self.mask_fc_fcn_4.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(self.mask_fc_fcn_4.bias, 0) self.mask_fc_fcn_5 = Conv2d(256, 128, 3, stride=1, padding=1) nn.init.kaiming_normal_(self.mask_fc_fcn_5.weight, mode="fan_out", nonlinearity="relu") nn.init.constant_(self.mask_fc_fcn_5.bias, 0) fc_layer = 128 self.pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION self.mask_resolution = cfg.MODEL.ROI_MASK_HEAD.RESOLUTION self.fc = nn.Linear(fc_layer * self.pooler_resolution * self.pooler_resolution, self.mask_resolution * self.mask_resolution, bias=True) nn.init.kaiming_uniform_(self.fc.weight, a=1) nn.init.constant_(self.fc.bias, 0)
def __init__(self, cfg): super(SampleBlock, self).__init__() out_ch = cfg.MODEL.SAMPLE_STREAM.OUT_CHANNELS self.sup_conv_d1 = Conv2d(out_ch, out_ch // 2, kernel_size=(3, 3), stride=1, groups=1, dilation=1, padding=1) self.sup_conv_d2 = Conv2d(out_ch, out_ch // 4, kernel_size=(3, 3), stride=1, groups=1, dilation=2, padding=2) self.sup_conv_d3 = Conv2d(out_ch, out_ch // 4, kernel_size=(3, 3), stride=1, groups=1, dilation=3, padding=3) self.offset_lateral = Conv2d(2 * out_ch, out_ch, kernel_size=(3, 3), stride=1, groups=1, dilation=1, padding=1) self.offset_pred = Conv2d(out_ch, 73, kernel_size=(3, 3), stride=1, groups=1, dilation=1, padding=1) self.sample_conv = DeformConv(out_ch, out_ch, kernel_size=(3, 3), stride=1, groups=1, dilation=1, padding=1, deformable_groups=4, bias=False) # self.sample_conv_1x1 = Conv2d(3*256, 256, kernel_size=(1, 1), stride=1, groups=1, dilation=1, padding=0) # Initialization for modules in [ self.sup_conv_d1, self.sup_conv_d2, self.sup_conv_d3, self.offset_lateral, self.offset_pred ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.kaiming_uniform_(l.weight, a=1) torch.nn.init.constant_(l.bias, 0)
def __init__(self, cfg, in_channels): super(KeypointRCNNPredictor, self).__init__() self.in_channels = in_channels self.num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES self.num_convs = 4 self.point_feat_channels = 32 self.conv_out_channels = self.point_feat_channels * self.num_keypoints conv_kernel_size = 3 conv_kernel_size1 = 5 deconv_kernel_size = 4 # deconv_kernel = 4 # self.kps_score_lowres = layers.ConvTranspose2d( # input_features, # num_keypoints, # deconv_kernel, # stride=2, # padding=deconv_kernel // 2 - 1, # ) # nn.init.kaiming_normal_( # self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" # ) # nn.init.constant_(self.kps_score_lowres.bias, 0) # self.up_scale = 2 # self.out_channels = num_keypoints self.convs = [] for i in range(self.num_convs): _in_channels = (self.in_channels if i == 0 else self.conv_out_channels) strides = 1 padding = (conv_kernel_size - 1) // 2 self.convs.append( nn.Sequential( Conv2d(_in_channels, self.conv_out_channels, conv_kernel_size, strides, padding), GroupNorm(32, self.conv_out_channels), nn.ReLU(inplace=True))) self.convs = nn.Sequential(*self.convs) # self.convs1 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size1 - 1) // 2 # self.convs1.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size1, # strides, # padding), # GroupNorm(32, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs1 = nn.Sequential(*self.convs1) # self.convs2 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size1 - 1) // 2 # self.convs2.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size1, # strides, # padding), # GroupNorm(32, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs2 = nn.Sequential(*self.convs2) # self.updeconv1_1 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.norm1 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2) # self.updeconv1_2 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.norm2 = GroupNorm(self.num_keypoints // 2, self.conv_out_channels // 2) # self.updeconv2_1 = ConvTranspose2d( # self.conv_out_channels // 2, # self.num_keypoints // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) # self.updeconv2_2 = ConvTranspose2d( # self.conv_out_channels // 2, # self.num_keypoints // 2, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints // 2) self.updeconv1_ = ConvTranspose2d(self.conv_out_channels, self.conv_out_channels, kernel_size=deconv_kernel_size, stride=2, padding=(deconv_kernel_size - 2) // 2, groups=self.num_keypoints) self.norm1 = GroupNorm(self.num_keypoints, self.conv_out_channels) self.updeconv2_ = ConvTranspose2d(self.conv_out_channels, self.num_keypoints, kernel_size=deconv_kernel_size, stride=2, padding=(deconv_kernel_size - 2) // 2, groups=self.num_keypoints) # self.conv_guide = Conv2d( # self.conv_out_channels, # self.conv_out_channels, # 3, # 1, # 1) # self.dcn = DFConv2d_guide(self.conv_out_channels, # self.num_keypoints, # groups=self.num_keypoints) # self.norm2 = GroupNorm(self.num_keypoints, self.conv_out_channels) # self.final_conv = Conv2d( # self.conv_out_channels, # self.num_keypoints, # 1, # 1, # 0, # groups=self.num_keypoints) # self.conv_offset = Conv2d( # self.conv_out_channels, # self.num_keypoints * 2, # 1, # 1, # 0, # groups=self.num_keypoints) # self.convs_1 = [] # for i in range(self.num_convs): # _in_channels = ( # self.in_channels if i == 0 else self.conv_out_channels) # strides = 1 # padding = (conv_kernel_size - 1) // 2 # self.convs_1.append( # nn.Sequential( # Conv2d( # _in_channels, # self.conv_out_channels, # conv_kernel_size, # strides, # padding), # GroupNorm(36, self.conv_out_channels), # nn.ReLU(inplace=True))) # self.convs_1 = nn.Sequential(*self.convs_1) # self.updeconv1_1 = ConvTranspose2d( # self.conv_out_channels, # self.conv_out_channels, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints) # self.norm1_1 = GroupNorm(self.num_keypoints, self.conv_out_channels) # self.updeconv2_1 = ConvTranspose2d( # self.conv_out_channels, # self.num_keypoints, # kernel_size=deconv_kernel_size, # stride=2, # padding=(deconv_kernel_size - 2) // 2, # groups=self.num_keypoints) # #TODO 20201015 # self.neighbor_points = [] # grid_size = 3 # for i in range(grid_size): # i-th column # for j in range(grid_size): # j-th row # neighbors = [] # if i > 0: # left: (i - 1, j) # neighbors.append((i - 1) * grid_size + j) # if j > 0: # up: (i, j - 1) # neighbors.append(i * grid_size + j - 1) # if j < grid_size - 1: # down: (i, j + 1) # neighbors.append(i * grid_size + j + 1) # if i < grid_size - 1: # right: (i + 1, j) # neighbors.append((i + 1) * grid_size + j) # self.neighbor_points.append(tuple(neighbors)) # self.forder_trans = nn.ModuleList() # first-order feature transition # self.sorder_trans = nn.ModuleList() # second-order feature transition # for neighbors in self.neighbor_points: # fo_trans = nn.ModuleList() # so_trans = nn.ModuleList() # for _ in range(len(neighbors)): # # each transition module consists of a 5x5 depth-wise conv and # # 1x1 conv. # fo_trans.append( # nn.Sequential( # Conv2d( # self.point_feat_channels, # self.point_feat_channels, # 5, # stride=1, # padding=2, # groups=self.point_feat_channels), # Conv2d(self.point_feat_channels, # self.point_feat_channels, 1))) # so_trans.append( # nn.Sequential( # Conv2d( # self.point_feat_channels, # self.point_feat_channels, # 5, # 1, # 2, # groups=self.point_feat_channels), # Conv2d(self.point_feat_channels, # self.point_feat_channels, 1))) # self.forder_trans.append(fo_trans) # self.sorder_trans.append(so_trans) # representation_size = 14 * 14 * 288 # self.keypoints_weight = nn.Linear(representation_size, self.num_keypoints) # nn.init.normal_(self.cls_score.weight, std=0.01) for m in self.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): nn.init.kaiming_normal_(m.weight.data) if m.bias is not None: nn.init.constant_(m.bias, 0) for m in self.modules(): if isinstance(m, nn.ConvTranspose2d): nn.init.normal_(m.weight.data, std=0.001) if m.bias is not None: m.bias.data.zero_() # nn.init.constant_(self.final_conv.bias,-np.log(0.99/0.01)) # nn.init.constant_(self.dcn.bias,-np.log(0.99/0.01)) nn.init.constant_(self.updeconv2_.bias, -np.log(0.99 / 0.01))
def _init_adaptor(self, s_channel, t_channel): adaptor = Conv2d( s_channel, t_channel, 1, 1, 0) nn.init.kaiming_uniform_(adaptor.weight, a=1) nn.init.constant_(adaptor.bias, 0) return adaptor
def __init__(self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, dcn_config): super(Bottleneck, self).__init__() self.downsample = None if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d(in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False), norm_func(out_channels), ) for modules in [ self.downsample, ]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels) # TODO: specify init for the above with_dcn = dcn_config.get("stage_with_dcn", False) if with_dcn: deformable_groups = dcn_config.get("deformable_groups", 1) with_modulated_dcn = dcn_config.get("with_modulated_dcn", False) self.conv2 = DFConv2d(bottleneck_channels, bottleneck_channels, with_modulated_dcn=with_modulated_dcn, kernel_size=3, stride=stride_3x3, groups=num_groups, dilation=dilation, deformable_groups=deformable_groups, bias=False) else: self.conv2 = Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=dilation, bias=False, groups=num_groups, dilation=dilation) nn.init.kaiming_uniform_(self.conv2.weight, a=1) self.bn2 = norm_func(bottleneck_channels) self.conv3 = Conv2d(bottleneck_channels, out_channels, kernel_size=1, bias=False) self.bn3 = norm_func(out_channels) for l in [ self.conv1, self.conv3, ]: nn.init.kaiming_uniform_(l.weight, a=1)
def __init__( self, in_channels, bottleneck_channels, out_channels, num_groups, stride_in_1x1, stride, dilation, norm_func, scale = 4 ): super(Bottle2neck, self).__init__() self.downsample = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) if in_channels != out_channels: down_stride = stride if dilation == 1 else 1 self.downsample = nn.Sequential( Conv2d( in_channels, out_channels, kernel_size=1, stride=down_stride, bias=False ), norm_func(out_channels), ) for modules in [self.downsample,]: for l in modules.modules(): if isinstance(l, Conv2d): nn.init.kaiming_uniform_(l.weight, a=1) self.stype = 'stage' self.pool = nn.AvgPool2d(kernel_size=3, stride = stride_3x3, padding=dilation) else: self.stype = 'normal' if dilation > 1: stride = 1 # reset to be 1 # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv self.conv1 = Conv2d( in_channels, bottleneck_channels*scale, kernel_size=1, stride=stride_1x1, bias=False, ) self.bn1 = norm_func(bottleneck_channels*scale) if scale == 1: self.nums = 1 else: self.nums = scale -1 convs = [] bns = [] for i in range(self.nums): convs.append(nn.Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride = stride_3x3, padding=dilation, groups=num_groups, dilation=dilation, bias=False )) bns.append(norm_func(bottleneck_channels)) self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList(bns) self.conv3 = Conv2d( bottleneck_channels*scale, out_channels, kernel_size=1, bias=False ) self.bn3 = norm_func(out_channels) self.scale = scale self.width = bottleneck_channels for l in [self.conv1, self.conv3,]: nn.init.kaiming_uniform_(l.weight, a=1) for l in self.convs: nn.init.kaiming_uniform_(l.weight, a=1)