def panoptic_upsampler_block(in_channels, out_channels, expansion): modules = [] if expansion == 0: modules.append( make_conv3x3(in_channels, out_channels, dilation=1, stride=1, use_gn=True, use_relu=True, kaiming_init=True)) # no upsample for i in range(expansion): modules.append( make_conv3x3(in_channels if i == 0 else out_channels, out_channels, dilation=1, stride=1, use_gn=True, use_relu=True, kaiming_init=True)) modules.append( nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) return nn.Sequential(*modules)
def __init__(self, cfg, in_channels, out_channels, mode="bilinear"): super(TwoConvUpsampleStage, self).__init__() self.mode = mode self.conv1 = make_conv3x3(in_channels, out_channels, use_gn=cfg.MODEL.SEMANTIC.USE_GN, use_relu=True) self.conv2 = make_conv3x3(in_channels, out_channels, use_gn=cfg.MODEL.SEMANTIC.USE_GN, use_relu=True)
def __init__(self, cfg): super(MaskIoUFeatureExtractor, self).__init__() input_channels = 260 # default 257 use_gn = cfg.MODEL.MASKIOU_USE_GN self.maskiou_fcn1 = make_conv3x3(input_channels, 256, use_gn=use_gn) self.maskiou_fcn2 = make_conv3x3(256, 256, use_gn=use_gn) self.maskiou_fcn3 = make_conv3x3(256, 256, use_gn=use_gn) self.maskiou_fcn4 = make_conv3x3(256, 256, stride=2, use_gn=use_gn) self.maskiou_fc1 = make_fc(256 * 7 * 7, 1024, use_gn=use_gn) self.maskiou_fc2 = make_fc(1024, 1024, use_gn=use_gn)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] self.conv4_fc = make_conv3x3(self.out_channels, dim_reduced, use_gn=use_gn) self.conv5_fc = make_conv3x3(dim_reduced, int(dim_reduced / 2), use_gn=use_gn) self.fc_final = make_fc( int(dim_reduced / 2) * (cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION)**2, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION**2)
def __init__( self, in_channels, refine_level=2, refine_type='none', use_gn=False, freeze=False, ): super(BFP, self).__init__() assert refine_type in ['none', 'conv', 'non_local'] self.in_channels = in_channels self.refine_level = refine_level self.refine_type = refine_type assert 0 <= self.refine_level if self.refine_type == 'conv': self.refine = make_conv3x3(self.in_channels, self.in_channels, use_gn=use_gn, use_relu=True, kaiming_init=True) elif self.refine_type == 'non_local': self.refine = NonLocal2D( self.in_channels, reduction=1, use_scale=False, use_gn=use_gn, ) else: self.refine = None self.freeze = freeze if self.freeze: dfs_freeze(self, requires_grad=False)
def __init__(self, cfg, in_channels): super(MaskIoUFeatureExtractor, self).__init__() layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION // 2 input_features = in_channels + 1 fc_input_size = layers[0] * resolution * resolution self.blocks = [] stride = 1 for layer_idx, layer_features in enumerate(layers, 1): layer_name = "maskiou_fcn{}".format(layer_idx) if layer_idx == len(layers): stride = 2 module = make_conv3x3(input_features, layer_features, stride=stride) self.add_module(layer_name, module) input_features = layer_features self.blocks.append(layer_name) self.maskiou_fc1 = nn.Linear(fc_input_size, 1024) self.maskiou_fc2 = nn.Linear(1024, 1024) for l in [self.maskiou_fc1, self.maskiou_fc2]: nn.init.kaiming_uniform_(l.weight, a=1) nn.init.constant_(l.bias, 0)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() pooler = Pooler(cfg.MODEL.ROI_MASK_HEAD) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, in_channels, reduction=2, use_scale=True, use_gn=True, mode='embedded_gaussian'): super(NonLocal2D, self).__init__() self.in_channels = in_channels self.reduction = reduction self.use_scale = use_scale self.inter_channels = in_channels // reduction self.mode = mode assert mode in ['embedded_gaussian', 'dot_product'] self.g = make_conv1x1(self.in_channels, self.inter_channels, kaiming_init=False) self.theta = make_conv1x1(self.in_channels, self.inter_channels, kaiming_init=False) self.phi = make_conv1x1(self.in_channels, self.inter_channels, kaiming_init=False) self.conv_out = make_conv3x3(self.inter_channels, self.in_channels, use_gn=use_gn, kaiming_init=True)
def __init__(self, cfg, in_channels): super(KeypointRCNNFeatureExtractor, self).__init__() use_gn = cfg.MODEL.ROI_KEYPOINT_HEAD.USE_GN use_contextual_pooler = False if use_contextual_pooler: pooler = make_contextual_pooler(cfg, 'ROI_KEYPOINT_HEAD') else: pooler = make_pooler(cfg, 'ROI_KEYPOINT_HEAD') self.pooler = pooler input_features = in_channels layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION next_feature = input_features self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "kp_fcn{}".format(layer_idx) module = make_conv3x3( next_feature, layer_features, dilation=1, stride=1, use_gn=use_gn, use_relu=True, kaiming_init=True ) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features if cfg.MODEL.ROI_KEYPOINT_HEAD.ATTENTION_ON: self.regional_attention = RegionalAttention(cfg, in_channels, self.pooler, resolution) else: self.regional_attention = None
def __init__(self, cfg): super(EMMPredictor, self).__init__() if cfg.MODEL.BACKBONE.CONV_BODY.startswith("DLA"): in_channels = cfg.MODEL.DLA.BACKBONE_OUT_CHANNELS elif cfg.MODEL.BACKBONE.CONV_BODY.startswith("R-"): in_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS else: in_channels = 128 self.cls_tower = make_conv3x3(in_channels=in_channels, out_channels=in_channels, use_gn=True, use_relu=True, kaiming_init=False) self.reg_tower = make_conv3x3(in_channels=in_channels, out_channels=in_channels, use_gn=True, use_relu=True, kaiming_init=False) self.cls = make_conv3x3(in_channels=in_channels, out_channels=2, kaiming_init=False) self.center = make_conv3x3(in_channels=in_channels, out_channels=1, kaiming_init=False) self.reg = make_conv3x3(in_channels=in_channels, out_channels=4, kaiming_init=False)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN use_gw = cfg.MODEL.ROI_MASK_HEAD.USE_GW layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION use_deconv = cfg.MODEL.ROI_MASK_HEAD.USE_DECONV block = cfg.MODEL.DECONV.BLOCK if use_deconv: use_gn = False use_gw = False next_feature = input_size self.blocks = [] if cfg.MODEL.DECONV.LAYERWISE_NORM: norm_type = cfg.MODEL.DECONV.MASK_NORM_TYPE else: norm_type = 'none' if cfg.MODEL.DECONV.MASK_NORM_TYPE == 'layernorm': self.mask_norm = LayerNorm(eps=cfg.MODEL.DECONV.EPS) for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_gw=use_gw, use_deconv=use_deconv, block=block, sampling_stride=cfg.MODEL.DECONV.STRIDE, sync=cfg.MODEL.DECONV.SYNC, norm_type=norm_type) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): super(MaskDecoder, self).__init__() sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES resolutions = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION #[32, 16, 8, 4, 4] poolers = [] for idx, resolution in enumerate(resolutions): poolers.append( Pooler( output_size=(resolution, resolution), scales=[scales[idx]], sampling_ratio=sampling_ratio, )) self.poolers = poolers inner_blocks = [] for idx in range(len(scales)): inner_block = make_conv1x1(in_channels, 128, dilation=1, use_gn=1, use_relu=1) block_name = 'inner_maskdecoder_{}'.format(idx + 1) self.add_module(block_name, inner_block) inner_blocks.append(block_name) conv_blocks = [] for idx in range(len(scales) - 1): if idx < len(scales) - 1: conv_block = make_conv3x3(128, 128, use_gn=1, use_relu=1) else: conv_block = nn.Sequential( make_conv3x3(128, 128, use_gn=1, use_relu=1), make_conv3x3(128, 128, use_gn=1, use_relu=1), ) block_name = 'conv_maskdecoder_{}'.format(idx + 1) self.add_module(block_name, conv_block) conv_blocks.append(block_name) self.inner_blocks = inner_blocks self.conv_blocks = conv_blocks self.out_channels = 128
def __init__(self, cfg, in_channels): super(SegmentationBranch, self).__init__() self.in_channels = in_channels self.seg_fcn1 = make_conv1x1(in_channels, in_channels, use_relu=0, kaiming_init=False) self.seg_fcn2 = make_conv3x3(in_channels, in_channels, use_gn=1, use_relu=1) self.seg_fcn3 = make_conv3x3(in_channels, in_channels, use_gn=1, use_relu=1) self.seg_fcn4 = make_conv3x3(in_channels, in_channels, use_gn=1, use_relu=1) self.predict = make_conv1x1(in_channels, 1, kaiming_init=False)
def __init__(self, cfg, in_channels): super(MaskIoUFeatureExtractor, self).__init__() input_channels = in_channels + 1 # cat features and mask single channel use_gn = cfg.MODEL.ROI_MASKIOU_HEAD.USE_GN representation_size = cfg.MODEL.ROI_MASKIOU_HEAD.MLP_HEAD_DIM resolution_key = "RESOLUTION" pooler_resolution_key = "POOLER_RESOLUTION" resolution = cfg.MODEL.ROI_MASK_HEAD[resolution_key] input_pooler_resolution = cfg.MODEL.ROI_MASK_HEAD[ pooler_resolution_key] self.max_pool2d = lambda x: x if resolution == input_pooler_resolution * 2: self.max_pool2d = torch.nn.MaxPool2d(kernel_size=2, stride=2) resolution = resolution // 2 # after max pooling 2x2 elif resolution != input_pooler_resolution: raise NotImplementedError( "Only supports %s == %s or %s == 2x%s. Received %d vs %d instead" % (resolution_key, pooler_resolution_key, resolution_key, pooler_resolution_key, resolution, input_pooler_resolution)) layers = cfg.MODEL.ROI_MASKIOU_HEAD.CONV_LAYERS # stride=1 for each layer, and stride=2 for last layer strides = [1 for l in layers] strides[-1] = 2 next_feature = input_channels self.blocks = [] for layer_idx, layer_features in enumerate(layers): layer_name = "maskiou_fcn{}".format(layer_idx + 1) stride = strides[layer_idx] module = make_conv3x3(next_feature, layer_features, stride=stride, dilation=1, use_gn=use_gn) self.add_module(layer_name, module) self.blocks.append(layer_name) next_feature = layer_features if stride == 2: resolution = resolution // 2 self.maskiou_fc1 = make_fc(next_feature * resolution**2, representation_size, use_gn=False) self.maskiou_fc2 = make_fc(representation_size, representation_size, use_gn=False) self.out_channels = representation_size
def __init__(self, cfg, in_channels, extract_type="avg"): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION self.extract_type = extract_type next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) if self.extract_type == "corr" and layer_idx == 1: input_feature = resolution**2 else: input_feature = next_feature module = make_conv3x3(input_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) input_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features if extract_type == "corr": self.feature_l2_norm = FeatureL2Norm() self.feature_correlation = FeatureCorrelation()
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION[0] scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) pooler_mask = Pooler( output_size=(resolution, resolution), scales=(1., ), sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler self.pooler_mask = pooler_mask use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) flag = cfg.MODEL.SEG_ON_ADD_CHANEL and (layer_idx == 1) module = make_conv3x3(next_feature + flag, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNSpatialAttentionFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO lvl_map_func = cfg.MODEL.ROI_MASK_HEAD.LEVEL_MAP_FUNCTION self.maskiou = cfg.MODEL.MASKIOU_ON pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, lvl_map_func=lvl_map_func) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION #spatial attention module self.spatialAtt = SpatialAttention() self.num_pooler = len(scales) next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = PyramidRROIAlign( output_size=(resolution, resolution), scales=scales, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION self.word_margin = cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN self.det_margin = cfg.MODEL.RRPN.GT_BOX_MARGIN self.rescale = self.word_margin / self.det_margin next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = cfg.MODEL.BACKBONE.OUT_CHANNELS self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) # module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) # # Caffe2 implementation uses MSRAFill, which in fact # # corresponds to kaiming_normal_ in PyTorch # nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") # nn.init.constant_(module.bias, 0) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn ) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION # ex. 14 scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES # ex. (0.25, 0.125, 0.0625, 0.03125) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO # ex. 2 pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels # backbone input channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN # false layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS # (256, 256, 256, 256) dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION # 1 next_feature = input_size self.blocks = [] # enumerate(.., start=0) for layer_idx, layer_features in enumerate(layers, 1): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) self.out_channels = layer_features # 256
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(BOUNDARYRCNNFPNFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_BOUNDARY_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, deformable=cfg.MODEL.ROI_BOUNDARY_HEAD.DEFORMABLE_POOLING # deformable = True ) input_size = in_channels self.pooler = pooler layers = cfg.MODEL.ROI_BOUNDARY_HEAD.CONV_LAYERS use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] for layer_idx, layer_features in enumerate(layers, 1): layer_name = "boundary_fcn{}".format(layer_idx) module = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn) self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name)
def __init__(self, output_size, scales, sampling_ratio, in_channels=512, cat_all_levels=False): """ Arguments: output_size (list[tuple[int]] or list[int]): output size for the pooled region scales (list[float]): scales for each Pooler sampling_ratio (int): sampling ratio for ROIAlign """ super(Pooler, self).__init__() poolers = [] for scale in scales: poolers.append( ROIAlign(output_size, spatial_scale=scale, sampling_ratio=sampling_ratio)) self.poolers = nn.ModuleList(poolers) self.output_size = output_size self.cat_all_levels = cat_all_levels # get the levels in the feature map by leveraging the fact that the network always # downsamples by a factor of 2 at each level. lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() self.map_levels = LevelMapper(lvl_min, lvl_max) # reduce the channels if self.cat_all_levels: self.reduce_channel = make_conv3x3(in_channels * len(self.poolers), in_channels, dilation=1, stride=1, use_relu=True)
def __init__(self, cfg, in_channels): super(C52MLPFeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_BOX_HEAD.POOLER_SCALES use_gn = cfg.MODEL.ROI_BOX_HEAD.USE_GN sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO assert len(scales) == 1 pooler = Pooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) self.conv = make_conv3x3(in_channels, 256, use_gn=use_gn, use_relu=True) in_channels = 256 representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM self.pooler = pooler input_size = in_channels * resolution**2 self.fc6 = make_fc(input_size, representation_size, use_gn) self.fc7 = make_fc(representation_size, representation_size, use_gn) self.out_channels = representation_size
def __init__(self, cfg, in_channels_list, in_channels_scale, out_channels, one_by_one_in_channels, mode="bilinear"): super(FPNBasedSemanticSegmentationHead, self).__init__() self.mode = mode self.upsampling_blocks = [] self.number_upsamples_per = [] priming = cfg.MODEL.RPN.USE_SEMANTIC_FEATURES or cfg.MODEL.ROI_HEADS.USE_SEMANTIC_FEATURES # skip the possible "top" features? target_scale = cfg.MODEL.SEMANTIC.COMBINE_AT_SCALE for idx, in_channels in enumerate(in_channels_list): upsampler_name = "upsample_scale{0}".format(idx) in_channels = in_channels_list[idx] scale = in_channels_scale[idx] number_upsamples = int(np.log2(target_scale / scale)) self.number_upsamples_per.append(number_upsamples) if number_upsamples == 0: # paper is not quite clear what happens here. my guess is the usual but no upsampling. upsampler = make_conv3x3(in_channels, out_channels, use_gn=cfg.MODEL.SEMANTIC.USE_GN, use_relu=True) # upsample1 = make_dfconv3x3( # in_channels, out_channels, use_gn=False, use_relu=True) # upsample2 = make_dfconv3x3( # out_channels, out_channels, use_gn=False, use_relu=True) # upsampler = nn.Sequential(*[upsample1, upsample2]) else: upsampler = SetOfUpsamplingStages(cfg, in_channels, out_channels, count=number_upsamples, mode=self.mode) #upsampler = StraightUpsamplingStages(cfg, in_channels, out_channels, count=number_upsamples, mode=self.mode) self.add_module(upsampler_name, upsampler) self.upsampling_blocks.append(upsampler) if not cfg.MODEL.RPN.USE_SEMANTIC_FEATURES: # unsure if there should be a ReLU here. make_1x1_conv = conv_with_kaiming_uniform(use_gn=False, use_relu=True) self.conv = make_1x1_conv(one_by_one_in_channels, out_channels, kernel_size=1, stride=1) make_project = conv_with_kaiming_uniform(use_gn=False, use_relu=False) # add VOID + THING vs VOID + THINGS + STUFF number_classes = (1 + cfg.MODEL.SEMANTIC.NUM_CLASSES + 1 if cfg.MODEL.SEMANTIC.COLLAPSE_THING_ONTOLOGY else cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES + cfg.MODEL.SEMANTIC.NUM_CLASSES) self.project = make_project(out_channels, number_classes, kernel_size=1, stride=1)
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNPANETFeatureExtractor, self).__init__() self.cfg = cfg resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION # 14 scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES # (0.25, 0.125, 0.0625, 0.03125) sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO # 2 pooler = AdaptivePooler( output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, ) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS # (256, 256, 256, 256) dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size self.blocks = [] # for layer_idx, layer_features in enumerate(layers, 1): # layer_name = "mask_fcn{}".format(layer_idx) # module = make_conv3x3( # next_feature, layer_features, # dilation=dilation, stride=1, use_gn=use_gn # ) # 这里用到膨胀卷积了 # self.add_module(layer_name, module) # next_feature = layer_features # self.blocks.append(layer_name) self.add_module("mask_fcn1_1", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_2", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_3", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) self.add_module("mask_fcn1_4", make_conv3x3(next_feature, layers[0], dilation=dilation, stride=1, use_gn=use_gn)) next_feature = layers[0] for layer_idx, layer_features in enumerate(layers[1:], 2): layer_name = "mask_fcn{}".format(layer_idx) module = make_conv3x3( next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn ) # 这里用到膨胀卷积了 self.add_module(layer_name, module) next_feature = layer_features self.blocks.append(layer_name) # TODO:区分前后景所需的模块,需要初始化权重!!! conv4 = nn.Conv2d(layers[2], layers[2], 3, 1, padding=1 * dilation, dilation=dilation, bias=False) nn.init.kaiming_normal_( conv4.weight, mode="fan_out", nonlinearity="relu" ) self.mask_conv4_fc = nn.Sequential( conv4, group_norm(layers[2]), nn.ReLU(inplace=True)) # --------------------------------------------------------------------------------------------------------# conv5 = nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False) nn.init.kaiming_normal_( conv5.weight, mode="fan_out", nonlinearity="relu" ) self.mask_conv5_fc = nn.Sequential( conv5, group_norm(int(layers[2] / 2)), nn.ReLU(inplace=True)) # self.mask_conv5_fc = nn.Sequential( # nn.Conv2d(layers[2], int(layers[2] / 2), 3, 1, padding=1 * dilation, dilation=dilation, bias=False), # group_norm(int(layers[2] / 2)), # nn.ReLU(inplace=True)) # nn.init.kaiming_normal_( # self.mask_conv5_fc.weight, mode="fan_out", nonlinearity="relu" # ) #---------------------------------------------------------------------------------------------------------# fc = nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2, (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True) nn.init.kaiming_normal_( fc.weight, mode="fan_out", nonlinearity="relu" ) self.mask_fc = nn.Sequential( fc, nn.ReLU(inplace=True)) # self.mask_fc = nn.Sequential( # nn.Linear(int(layers[2] / 2) * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION ** 2, # (2 * cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION) ** 2, bias=True), # nn.ReLU(inplace=True)) # nn.init.kaiming_normal_( # self.mask_fc.weight, mode="fan_out", nonlinearity="relu" # ) self.out_channels = layer_features
def __init__(self, cfg, in_channels): """ Arguments: num_classes (int): number of output classes input_size (int): number of channels of the input once it's flattened representation_size (int): size of the intermediate representation """ super(MaskRCNNFPN_adp_ff_FeatureExtractor, self).__init__() resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO pooler = Pooler(output_size=(resolution, resolution), scales=scales, sampling_ratio=sampling_ratio, panet=True) input_size = in_channels self.pooler = pooler use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION next_feature = input_size layer_features = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[0] #first 2 conv layers are shared and remains the same, but the paper says 3 is best module_list = [] for i in range(2): module_list.extend([ make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_relu=True) ]) next_feature = layer_features self.conv_fcn = nn.Sequential(*module_list) #this is for adaptive feature pooling, self.mask_conv1 = nn.ModuleList() # num_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1 num_levels = 4 for i in range(num_levels): self.mask_conv1.append( make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_relu=True), ) self.mask_conv4 = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_relu=True) self.mask_conv4_fc = make_conv3x3(next_feature, layer_features, dilation=dilation, stride=1, use_gn=use_gn, use_relu=True) self.mask_conv5_fc = make_conv3x3(next_feature, int(layer_features / 2), dilation=dilation, stride=1, use_gn=use_gn, use_relu=True) self.mask_fc = nn.Sequential( nn.Linear(int(layer_features / 2) * (resolution)**2, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION**2, bias=True), nn.ReLU(inplace=True)) # upsample layer self.upconv = nn.ConvTranspose2d(layer_features, layer_features, 2, 2, 0) self.out_channels = layer_features #init_weights # make_conv3x3 has already done the init, default kaiming = MSRAFFill in panet. self.apply(self._init_weights)