def __init__(self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm="", activation="ReLU"): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature. conv_dims (list[int]): the output dimensions of the conv layers fc_dims (list[int]): the output dimensions of the fc layers conv_norm (str or callable): normalization for the conv layers. See :func:`detectron2.layers.get_norm` for supported types. """ super().__init__() assert len(conv_dims) + len(fc_dims) > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=get_activation(activation), ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k, fc_dim in enumerate(fc_dims): if k == 0: self.add_module("flatten", nn.Flatten()) fc = nn.Linear(int(np.prod(self._output_size)), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.add_module("fc_activation{}".format(k + 1), get_activation(activation)) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, in_channels, out_channels, *, stride=1, norm="BN", activation="ReLU", attention=""): """ Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. stride (int): Stride for the first conv. norm (str or callable): normalization for all conv layers. See :func:`layers.get_norm` for supported format. activation(str):activation to use. """ super().__init__(in_channels, out_channels, stride) self.activation = get_activation(activation) self.attention = CBAM(out_channels) if attention == "CBAM" else None if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), activation=self.activation, ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__(self, in_channels, out_channels, in_feature="res5", activation="ReLU"): super().__init__() self.num_levels = 2 self.in_feature = in_feature self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) for module in [self.p6, self.p7]: weight_init.c2_xavier_fill(module) self.activation = get_activation(activation)
def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature num_classes (int): the number of foreground classes (i.e. background is not included). 1 if using class agnostic prediction. conv_dims (list[int]): a list of N>0 integers representing the output dimensions of N-1 conv layers and the last upsample layer. conv_norm (str or callable): normalization for the conv layers. See :func:`detectron2.layers.get_norm` for supported types. """ super().__init__(**kwargs) assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" self.conv_norm_relus = [] activation = get_activation(activation) cur_channels = input_shape.channels for k, conv_dim in enumerate(conv_dims[:-1]): conv = Conv2d( cur_channels, conv_dim, kernel_size=3, stride=1, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=activation, ) self.add_module("mask_fcn{}".format(k + 1), conv) self.conv_norm_relus.append(conv) cur_channels = conv_dim self.deconv = ConvTranspose2d( cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0 ) self.add_module("deconv_activation", activation) cur_channels = conv_dims[-1] self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1, stride=1, padding=0) for layer in self.conv_norm_relus + [self.deconv]: weight_init.c2_msra_fill(layer) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.predictor.weight, std=0.001) if self.predictor.bias is not None: nn.init.constant_(self.predictor.bias, 0)
def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature conv_dims: an iterable of output channel counts for each conv in the head e.g. (512, 512, 512) for three convs outputting 512 channels. """ super().__init__(num_keypoints=num_keypoints, **kwargs) # default up_scale to 2.0 (this can be made an option) up_scale = 2.0 in_channels = input_shape.channels activation = get_activation(activation) for idx, layer_channels in enumerate(conv_dims, 1): module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) self.add_module("conv_fcn{}".format(idx), module) self.add_module("conv_fcn_activation{}".format(idx), activation) in_channels = layer_channels deconv_kernel = 4 self.score_lowres = ConvTranspose2d(in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1) self.up_scale = up_scale for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: # Caffe2 implementation uses MSRAFill, which in fact # corresponds to kaiming_normal_ in PyTorch nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation="ReLU"): """ Args: norm (str or callable): norm after the first conv layer. See :func:`layers.get_norm` for supported format. """ super().__init__(in_channels, out_channels, 4) self.in_channels = in_channels self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), activation=self.activation, ) weight_init.c2_msra_fill(self.conv1)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation="ReLU", attention="", stride_in_1x1=False, dilation=1, ): """ Args: bottleneck_channels (int): number of output channels for the 3x3 "bottleneck" conv layers. num_groups (int): number of groups for the 3x3 conv layer. norm (str or callable): normalization for all conv layers. See :func:`layers.get_norm` for supported format. activation(str): activation to use. stride_in_1x1 (bool): when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. dilation (int): the dilation rate of the 3x3 conv layer. """ super().__init__(in_channels, out_channels, stride) self.activation = get_activation(activation) self.attention = CBAM(out_channels) if attention == "CBAM" else None if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), activation=self.activation, ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), activation=self.activation, ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, *, input_shape: List[ShapeSpec], num_classes, num_anchors, conv_dims: List[int], norm="", activation="ReLU", prior_prob=0.01, ): """ NOTE: this interface is experimental. Args: input_shape (List[ShapeSpec]): input shape num_classes (int): number of classes. Used to label background proposals. num_anchors (int): number of generated anchors conv_dims (List[int]): dimensions for each convolution layer norm (str or callable): Normalization for conv layers except for the two output layers. See :func:`detectron2.layers.get_norm` for supported types. prior_prob (float): Prior weight for computing bias """ super().__init__() activation = get_activation(activation) if norm == "BN" or norm == "SyncBN": logger.warning( "Shared norm does not work well for BN, SyncBN, expect poor results" ) cls_subnet = [] bbox_subnet = [] for in_channels, out_channels in zip([input_shape[0].channels] + conv_dims, conv_dims): cls_subnet.append( nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)) if norm: cls_subnet.append(get_norm(norm, out_channels)) cls_subnet.append(activation) bbox_subnet.append( nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)) if norm: bbox_subnet.append(get_norm(norm, out_channels)) bbox_subnet.append(activation) self.cls_subnet = nn.Sequential(*cls_subnet) self.bbox_subnet = nn.Sequential(*bbox_subnet) self.cls_score = nn.Conv2d(conv_dims[-1], num_anchors * num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = nn.Conv2d(conv_dims[-1], num_anchors * 4, kernel_size=3, stride=1, padding=1) # Initialization for modules in [ self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred ]: for layer in modules.modules(): if isinstance(layer, nn.Conv2d): torch.nn.init.normal_(layer.weight, mean=0, std=0.01) torch.nn.init.constant_(layer.bias, 0) # Use prior in model initialization to improve stability bias_value = -(math.log((1 - prior_prob) / prior_prob)) torch.nn.init.constant_(self.cls_score.bias, bias_value)
def __init__(self, cfg, input_shape: List[ShapeSpec]): """ Arguments: in_channels (int): number of channels of the input feature """ super().__init__() # TODO: Implement the sigmoid version first. self.num_classes = cfg.MODEL.FCOS.NUM_CLASSES self.fpn_strides = cfg.MODEL.FCOS.FPN_STRIDES head_configs = {"cls": (cfg.MODEL.FCOS.NUM_CLS_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE), "bbox": (cfg.MODEL.FCOS.NUM_BOX_CONVS, cfg.MODEL.FCOS.USE_DEFORMABLE), "share": (cfg.MODEL.FCOS.NUM_SHARE_CONVS, False)} norm = None if cfg.MODEL.FCOS.NORM == "none" else cfg.MODEL.FCOS.NORM activation = get_activation(cfg.MODEL.FCOS.ACTIVATION) self.num_levels = len(input_shape) in_channels = [s.channels for s in input_shape] assert len(set(in_channels)) == 1, "Each level must have the same channel!" in_channels = in_channels[0] self.in_channels_to_top_module = in_channels for head in head_configs: tower = [] num_convs, use_deformable = head_configs[head] for i in range(num_convs): # TODO: comment deformable # if use_deformable and i == num_convs - 1: # conv_func = DFConv2d # else: conv_func = nn.Conv2d tower.append(conv_func( in_channels, in_channels, kernel_size=3, stride=1, padding=1, bias=True )) if norm == "GN": tower.append(nn.GroupNorm(32, in_channels)) # TODO: NaiveGN # elif norm == "NaiveGN": # tower.append(NaiveGroupNorm(32, in_channels)) elif norm == "BN": tower.append(ModuleListDial([ nn.BatchNorm2d(in_channels) for _ in range(self.num_levels) ])) elif norm == "SyncBN": tower.append(ModuleListDial([ NaiveSyncBatchNorm(in_channels) for _ in range(self.num_levels) ])) tower.append(activation) self.add_module('{}_tower'.format(head), nn.Sequential(*tower)) self.cls_logits = nn.Conv2d( in_channels, self.num_classes, kernel_size=3, stride=1, padding=1 ) self.bbox_pred = nn.Conv2d( in_channels, 4, kernel_size=3, stride=1, padding=1 ) self.ctrness = nn.Conv2d( in_channels, 1, kernel_size=3, stride=1, padding=1 ) if cfg.MODEL.FCOS.USE_SCALE: self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(self.num_levels)]) else: self.scales = None for modules in [ self.cls_tower, self.bbox_tower, self.share_tower, self.cls_logits, self.bbox_pred, self.ctrness ]: for l in modules.modules(): if isinstance(l, nn.Conv2d): torch.nn.init.normal_(l.weight, std=0.01) torch.nn.init.constant_(l.bias, 0) # initialize the bias for focal loss prior_prob = cfg.MODEL.FCOS.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_logits.bias, bias_value)