def __init__(self, in_channels=3, out_channels=64, noise_var=0.0, norm="BN"): """ Args: norm (str or callable): norm after the first conv layer. See :func:`layers.get_norm` for supported format. """ super().__init__(in_channels, out_channels, 4) self.in_channels = in_channels self.noise_var = noise_var self.positional_noise = DefemLayer() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1)
def __init__( self, _RConv, in_channels, out_channels, is_first, rot_1x1_in, rot_1x1_out, noise_var=0, stride=1, padding=1, dilation=1, norm=None, activation=None, ): super(PRConvBlock, self).__init__() self.conv = _RConv( in_channels=in_channels, out_channels=out_channels, is_first=is_first, rot_1x1_in=rot_1x1_in, stride=stride, padding=padding, dilation=dilation, norm=None, #norm, activation=F.relu) self.is_first = is_first self.rot_1x1_out = rot_1x1_out self.noise_var = noise_var self.kernel_rot = self.conv.kernel_rot self.activation = activation nn.init.kaiming_normal_(self.conv.weight, mode="fan_out", nonlinearity="relu") if self.noise_var > 0: self.positional_noise = DefemLayer() if self.rot_1x1_out: self.conv_rot_1x1 = GConv1x1( rot_1x1=True, in_channels=out_channels, out_channels=out_channels, kernel_size=1, kernel_rot=self.kernel_rot, stride=1, padding=0, dilation=1, # norm=get_norm(norm, out_channels) ) weight_init.c2_msra_fill(self.conv_rot_1x1) self.norm = None if norm != None: self.norm = get_norm(norm, out_channels)
def __init__(self, in_channels, out_channels, kernel_type, spatial, spatial_1x1_in, spatial_1x1_out, noise_var=0, stride=1, padding=1, dilation=1, norm=None, activation=None): super(PSConvBlock, self).__init__() self.conv = _SConv(in_channels=in_channels, out_channels=out_channels, kernel_type=kernel_type, spatial=spatial, spatial_1x1_in=spatial_1x1_in, stride=stride, padding=padding, dilation=dilation, norm=norm, activation=activation) self.spatial = spatial self.spatial_1x1_out = spatial_1x1_out self.noise_var = noise_var self.num_kernel = self.conv.num_kernel self.activation = activation nn.init.kaiming_normal_(self.conv.weight, mode="fan_out", nonlinearity="relu") if self.noise_var > 0: self.positional_noise = DefemLayer() if self.spatial_1x1_out: self.conv_spatial_1x1 = Conv2d(in_channels=out_channels * self.num_kernel, out_channels=out_channels, kernel_size=1, stride=1, padding=0, dilation=1) self.conv_spatial_norm = get_norm(norm, out_channels) weight_init.c2_msra_fill(self.conv_spatial_1x1)
def __init__(self, in_channels=3, out_channels=64, norm="BN", c7x7=True, convf_name=None, rot_1x1_out=True, noise_var=0.0, stride_psr=1): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.conv_7x7 = None self.conv_psr = None self.noise_var = noise_var self.out_channels = out_channels self.defem_layer = DefemLayer() if c7x7: self.conv_7x7 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv_7x7) in_channels = out_channels if convf_name != "": GConvF = CONV_DICT[convf_name] self.conv_psr = GConvF(in_channels, out_channels, rot_1x1_out=rot_1x1_out, noise_var=noise_var, stride=stride_psr, padding=1, dilation=1, norm=norm)
def __init__(self, bottom_up, in_features, out_channels, noise_var=0.0, norm="", top_block=None, fuse_type="sum", num_classes=None): """ Args: bottom_up (Backbone): module representing the bottom up subnetwork. Must be a subclass of :class:`Backbone`. The multi-scale feature maps generated by the bottom up network, and listed in `in_features`, are used to generate FPN levels. in_features (list[str]): names of the input feature maps coming from the backbone to which FPN is attached. For example, if the backbone produces ["res2", "res3", "res4"], any *contiguous* sublist of these may be used; order must be from high to low resolution. out_channels (int): number of channels in the output feature maps. norm (str): the normalization to use. top_block (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list. The top_block further downsamples the feature map. It must have an attribute "num_levels", meaning the number of extra FPN levels added by this block, and "in_feature", which is a string representing its input feature (e.g., p5). fuse_type (str): types for fusing the top down features and the lateral ones. It can be "sum" (default), which sums up element-wise; or "avg", which takes the element-wise mean of the two. """ super(FPN, self).__init__() assert isinstance(bottom_up, Backbone) # Feature map strides and channels from the bottom up network (e.g. ResNet) input_shapes = bottom_up.output_shape() in_strides = [input_shapes[f].stride for f in in_features] in_channels = [input_shapes[f].channels for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs = [] output_convfs = [] output_convs = [] use_bias = norm == "" for idx, in_channels in enumerate(in_channels): lateral_norm = get_norm(norm, out_channels) output_norm_f = get_norm(norm, out_channels) output_norm = get_norm(norm, out_channels) lateral_conv = Conv2d(in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm) output_convf = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm_f, ) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_convf) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(in_strides[idx])) self.add_module("fpn_lateral{}".format(stage), lateral_conv) self.add_module("fpn_outputf{}".format(stage), output_convf) self.add_module("fpn_output{}".format(stage), output_conv) lateral_convs.append(lateral_conv) output_convfs.append(output_convf) output_convs.append(output_conv) # Place convs into top-down order (from low to high resolution) # to make the top-down computation in forward clearer. self.positional_noise = DefemLayer() self.lateral_convs = lateral_convs[::-1] self.output_convfs = output_convfs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"] self._out_feature_strides = { "p{}".format(int(math.log2(s))): s for s in in_strides } # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features } self._size_divisibility = in_strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type self.num_classes = num_classes if num_classes is not None: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.linear = nn.Linear(out_channels, num_classes) # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": # "The 1000-way fully-connected layer is initialized by # drawing weights from a zero-mean Gaussian with standard deviation of 0.01." nn.init.normal_(self.linear.weight, std=0.01)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, ): """ Args: bottleneck_channels (int): number of output channels for the 3x3 "bottleneck" conv layers. num_groups (int): number of groups for the 3x3 conv layer. norm (str or callable): normalization for all conv layers. See :func:`layers.get_norm` for supported format. stride_in_1x1 (bool): when stride>1, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. dilation (int): the dilation rate of the 3x3 conv layer. """ super().__init__(in_channels, out_channels, stride) self.defem_layer = DefemLayer() if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=1, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) # Zero-initialize the last normalization in each residual branch, # so that at the beginning, the residual branch starts with zeros, # and each residual block behaves like an identity. # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour": # "For BN layers, the learnable scaling coefficient γ is initialized # to be 1, except for each residual block's last BN # where γ is initialized to be 0." # nn.init.constant_(self.conv3.norm.weight, 0) # TODO this somehow hurts performance when training GN models from scratch. # Add it as an option when we need to use this code to train a backbone. self.noise_var = 0.0
def __init__(self, in_channels, out_channels, *, bottleneck_channels, conv_name, conv_1x1_rot, rot_1x1_out, noise_var=0.0, stride=1, norm="BN"): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) GConv = CONV_DICT[conv_name] self.conv2 = GConv(bottleneck_channels, bottleneck_channels, rot_1x1_in=True, rot_1x1_out=rot_1x1_out, noise_var=noise_var, stride=1, padding=1, norm=norm) self.kernel_rot = self.conv2.kernel_rot self.noise_var = noise_var self.defem_layer = DefemLayer() if in_channels != out_channels: self.shortcut = GConv1x1(rot_1x1=conv_1x1_rot, in_channels=in_channels, out_channels=out_channels, kernel_size=1, kernel_rot=self.kernel_rot, stride=1, padding=0, dilation=1, norm=get_norm(norm, out_channels)) weight_init.c2_msra_fill(self.shortcut) else: self.shortcut = None self.conv1 = GConv1x1(rot_1x1=conv_1x1_rot, in_channels=in_channels, out_channels=bottleneck_channels, kernel_size=1, kernel_rot=self.kernel_rot, stride=1, padding=0, dilation=1, norm=get_norm(norm, bottleneck_channels)) self.conv3 = GConv1x1(rot_1x1=conv_1x1_rot, in_channels=bottleneck_channels, out_channels=out_channels, kernel_size=1, kernel_rot=self.kernel_rot, stride=1, padding=0, dilation=1, norm=get_norm(norm, out_channels)) weight_init.c2_msra_fill(self.conv1) weight_init.c2_msra_fill(self.conv3)