def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv: the number of conv layers conv_dim: the dimension of the conv layers norm: normalization for the conv layers """ super(MaskRCNNConvUpsampleHead, self).__init__() # fmt: off num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV input_channels = input_shape.channels cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK # fmt: on self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( input_channels if k == 0 else conv_dims, conv_dims, kernel_size=3, stride=1, padding=1, bias=not self.norm, norm=get_norm(self.norm, conv_dims), activation=F.relu, ) self.add_module("mask_fcn{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self.deconv = ConvTranspose2d( conv_dims if num_conv > 0 else input_channels, conv_dims, kernel_size=2, stride=2, padding=0, ) num_mask_classes = 1 if cls_agnostic_mask else num_classes self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0) for layer in self.conv_norm_relus + [self.deconv]: weight_init.c2_msra_fill(layer) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.predictor.weight, std=0.001) if self.predictor.bias is not None: nn.init.constant_(self.predictor.bias, 0)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: conv_dims: an iterable of output channel counts for each conv in the head e.g. (512, 512, 512) for three convs outputting 512 channels. num_keypoints: number of keypoint heatmaps to predicts, determines the number of channels in the final output. """ super(KRCNNConvDeconvUpsampleHead, self).__init__() # fmt: off # default up_scale to 2 (this can eventually be moved to config) up_scale = 2 conv_dims = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS in_channels = input_shape.channels # fmt: on self.blocks = [] for idx, layer_channels in enumerate(conv_dims, 1): module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) self.add_module("conv_fcn{}".format(idx), module) self.blocks.append(module) in_channels = layer_channels deconv_kernel = 4 self.score_lowres = ConvTranspose2d(in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1) self.up_scale = up_scale for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES feature_strides = {k: v.stride for k, v in input_shape.items()} feature_channels = {k: v.channels for k, v in input_shape.items()} self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT upsampling_strides = [] feature_strides_list = list(feature_strides.values()) upsampling_strides.append(feature_strides_list[0]) feature_strides_list = feature_strides_list[::-1] for s1, s2 in zip(feature_strides_list[:], feature_strides_list[1:]): upsampling_strides.append(s1 // s2) assert len(upsampling_strides) == len(self.in_features) score_convs = [] upsampling_convs = [] for idx, in_feature in enumerate(self.in_features): ch = feature_channels[in_feature] score_convs.append(Conv2d(ch, num_classes, kernel_size=1)) stride = upsampling_strides[idx] upsampling_convs.append( ConvTranspose2d( num_classes, num_classes, kernel_size=stride * 2, stride=stride, padding=1, bias=False, )) self.score_convs = nn.ModuleList(score_convs) self.upsampling_convs = nn.ModuleList(upsampling_convs) self._initialize_weights()