Пример #1
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 noise_var=0.0,
                 norm="BN"):
        """
        Args:
            norm (str or callable): norm after the first conv layer.
                See :func:`layers.get_norm` for supported format.
        """
        super().__init__(in_channels, out_channels, 4)
        self.in_channels = in_channels
        self.noise_var = noise_var

        self.positional_noise = DefemLayer()

        self.conv1 = Conv2d(
            in_channels,
            out_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False,
            norm=get_norm(norm, out_channels),
        )
        weight_init.c2_msra_fill(self.conv1)
Пример #2
0
    def __init__(
        self,
        _RConv,
        in_channels,
        out_channels,
        is_first,
        rot_1x1_in,
        rot_1x1_out,
        noise_var=0,
        stride=1,
        padding=1,
        dilation=1,
        norm=None,
        activation=None,
    ):
        super(PRConvBlock, self).__init__()

        self.conv = _RConv(
            in_channels=in_channels,
            out_channels=out_channels,
            is_first=is_first,
            rot_1x1_in=rot_1x1_in,
            stride=stride,
            padding=padding,
            dilation=dilation,
            norm=None,  #norm,
            activation=F.relu)

        self.is_first = is_first
        self.rot_1x1_out = rot_1x1_out
        self.noise_var = noise_var
        self.kernel_rot = self.conv.kernel_rot
        self.activation = activation

        nn.init.kaiming_normal_(self.conv.weight,
                                mode="fan_out",
                                nonlinearity="relu")

        if self.noise_var > 0:
            self.positional_noise = DefemLayer()

        if self.rot_1x1_out:
            self.conv_rot_1x1 = GConv1x1(
                rot_1x1=True,
                in_channels=out_channels,
                out_channels=out_channels,
                kernel_size=1,
                kernel_rot=self.kernel_rot,
                stride=1,
                padding=0,
                dilation=1,
                # norm=get_norm(norm, out_channels)
            )
            weight_init.c2_msra_fill(self.conv_rot_1x1)

        self.norm = None
        if norm != None:
            self.norm = get_norm(norm, out_channels)
Пример #3
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_type,
                 spatial,
                 spatial_1x1_in,
                 spatial_1x1_out,
                 noise_var=0,
                 stride=1,
                 padding=1,
                 dilation=1,
                 norm=None,
                 activation=None):
        super(PSConvBlock, self).__init__()

        self.conv = _SConv(in_channels=in_channels,
                           out_channels=out_channels,
                           kernel_type=kernel_type,
                           spatial=spatial,
                           spatial_1x1_in=spatial_1x1_in,
                           stride=stride,
                           padding=padding,
                           dilation=dilation,
                           norm=norm,
                           activation=activation)

        self.spatial = spatial
        self.spatial_1x1_out = spatial_1x1_out
        self.noise_var = noise_var
        self.num_kernel = self.conv.num_kernel
        self.activation = activation

        nn.init.kaiming_normal_(self.conv.weight,
                                mode="fan_out",
                                nonlinearity="relu")

        if self.noise_var > 0:
            self.positional_noise = DefemLayer()

        if self.spatial_1x1_out:
            self.conv_spatial_1x1 = Conv2d(in_channels=out_channels *
                                           self.num_kernel,
                                           out_channels=out_channels,
                                           kernel_size=1,
                                           stride=1,
                                           padding=0,
                                           dilation=1)
            self.conv_spatial_norm = get_norm(norm, out_channels)
            weight_init.c2_msra_fill(self.conv_spatial_1x1)
Пример #4
0
    def __init__(self,
                 in_channels=3,
                 out_channels=64,
                 norm="BN",
                 c7x7=True,
                 convf_name=None,
                 rot_1x1_out=True,
                 noise_var=0.0,
                 stride_psr=1):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
        """
        super().__init__()

        self.conv_7x7 = None
        self.conv_psr = None
        self.noise_var = noise_var

        self.out_channels = out_channels
        self.defem_layer = DefemLayer()

        if c7x7:
            self.conv_7x7 = Conv2d(
                in_channels,
                out_channels,
                kernel_size=7,
                stride=2,
                padding=3,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
            weight_init.c2_msra_fill(self.conv_7x7)
            in_channels = out_channels

        if convf_name != "":
            GConvF = CONV_DICT[convf_name]
            self.conv_psr = GConvF(in_channels,
                                   out_channels,
                                   rot_1x1_out=rot_1x1_out,
                                   noise_var=noise_var,
                                   stride=stride_psr,
                                   padding=1,
                                   dilation=1,
                                   norm=norm)
Пример #5
0
    def __init__(self,
                 bottom_up,
                 in_features,
                 out_channels,
                 noise_var=0.0,
                 norm="",
                 top_block=None,
                 fuse_type="sum",
                 num_classes=None):
        """
        Args:
            bottom_up (Backbone): module representing the bottom up subnetwork.
                Must be a subclass of :class:`Backbone`. The multi-scale feature
                maps generated by the bottom up network, and listed in `in_features`,
                are used to generate FPN levels.
            in_features (list[str]): names of the input feature maps coming
                from the backbone to which FPN is attached. For example, if the
                backbone produces ["res2", "res3", "res4"], any *contiguous* sublist
                of these may be used; order must be from high to low resolution.
            out_channels (int): number of channels in the output feature maps.
            norm (str): the normalization to use.
            top_block (nn.Module or None): if provided, an extra operation will
                be performed on the output of the last (smallest resolution)
                FPN output, and the result will extend the result list. The top_block
                further downsamples the feature map. It must have an attribute
                "num_levels", meaning the number of extra FPN levels added by
                this block, and "in_feature", which is a string representing
                its input feature (e.g., p5).
            fuse_type (str): types for fusing the top down features and the lateral
                ones. It can be "sum" (default), which sums up element-wise; or "avg",
                which takes the element-wise mean of the two.
        """
        super(FPN, self).__init__()
        assert isinstance(bottom_up, Backbone)

        # Feature map strides and channels from the bottom up network (e.g. ResNet)
        input_shapes = bottom_up.output_shape()
        in_strides = [input_shapes[f].stride for f in in_features]
        in_channels = [input_shapes[f].channels for f in in_features]

        _assert_strides_are_log2_contiguous(in_strides)
        lateral_convs = []
        output_convfs = []
        output_convs = []

        use_bias = norm == ""
        for idx, in_channels in enumerate(in_channels):
            lateral_norm = get_norm(norm, out_channels)
            output_norm_f = get_norm(norm, out_channels)
            output_norm = get_norm(norm, out_channels)

            lateral_conv = Conv2d(in_channels,
                                  out_channels,
                                  kernel_size=1,
                                  bias=use_bias,
                                  norm=lateral_norm)
            output_convf = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm_f,
            )
            output_conv = Conv2d(
                out_channels,
                out_channels,
                kernel_size=3,
                stride=1,
                padding=1,
                bias=use_bias,
                norm=output_norm,
            )
            weight_init.c2_xavier_fill(lateral_conv)
            weight_init.c2_xavier_fill(output_convf)
            weight_init.c2_xavier_fill(output_conv)
            stage = int(math.log2(in_strides[idx]))
            self.add_module("fpn_lateral{}".format(stage), lateral_conv)
            self.add_module("fpn_outputf{}".format(stage), output_convf)
            self.add_module("fpn_output{}".format(stage), output_conv)

            lateral_convs.append(lateral_conv)
            output_convfs.append(output_convf)
            output_convs.append(output_conv)
        # Place convs into top-down order (from low to high resolution)
        # to make the top-down computation in forward clearer.
        self.positional_noise = DefemLayer()
        self.lateral_convs = lateral_convs[::-1]
        self.output_convfs = output_convfs[::-1]
        self.output_convs = output_convs[::-1]
        self.top_block = top_block
        self.in_features = in_features
        self.bottom_up = bottom_up
        # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"]
        self._out_feature_strides = {
            "p{}".format(int(math.log2(s))): s
            for s in in_strides
        }
        # top block output feature maps.
        if self.top_block is not None:
            for s in range(stage, stage + self.top_block.num_levels):
                self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1)

        self._out_features = list(self._out_feature_strides.keys())
        self._out_feature_channels = {
            k: out_channels
            for k in self._out_features
        }
        self._size_divisibility = in_strides[-1]
        assert fuse_type in {"avg", "sum"}
        self._fuse_type = fuse_type

        self.num_classes = num_classes
        if num_classes is not None:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.linear = nn.Linear(out_channels, num_classes)

            # Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
            # "The 1000-way fully-connected layer is initialized by
            # drawing weights from a zero-mean Gaussian with standard deviation of 0.01."
            nn.init.normal_(self.linear.weight, std=0.01)
Пример #6
0
    def __init__(
        self,
        in_channels,
        out_channels,
        *,
        bottleneck_channels,
        stride=1,
        num_groups=1,
        norm="BN",
        stride_in_1x1=False,
        dilation=1,
    ):
        """
        Args:
            bottleneck_channels (int): number of output channels for the 3x3
                "bottleneck" conv layers.
            num_groups (int): number of groups for the 3x3 conv layer.
            norm (str or callable): normalization for all conv layers.
                See :func:`layers.get_norm` for supported format.
            stride_in_1x1 (bool): when stride>1, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
            dilation (int): the dilation rate of the 3x3 conv layer.
        """
        super().__init__(in_channels, out_channels, stride)

        self.defem_layer = DefemLayer()
        if in_channels != out_channels:
            self.shortcut = Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                stride=1,
                bias=False,
                norm=get_norm(norm, out_channels),
            )
        else:
            self.shortcut = None

        # The original MSRA ResNet models have stride in the first 1x1 conv
        # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have
        # stride in the 3x3 conv
        stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride)

        self.conv1 = Conv2d(
            in_channels,
            bottleneck_channels,
            kernel_size=1,
            stride=1,
            bias=False,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv2 = Conv2d(
            bottleneck_channels,
            bottleneck_channels,
            kernel_size=3,
            stride=1,
            padding=1 * dilation,
            bias=False,
            groups=num_groups,
            dilation=dilation,
            norm=get_norm(norm, bottleneck_channels),
        )

        self.conv3 = Conv2d(
            bottleneck_channels,
            out_channels,
            kernel_size=1,
            bias=False,
            norm=get_norm(norm, out_channels),
        )

        for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]:
            if layer is not None:  # shortcut can be None
                weight_init.c2_msra_fill(layer)

        # Zero-initialize the last normalization in each residual branch,
        # so that at the beginning, the residual branch starts with zeros,
        # and each residual block behaves like an identity.
        # See Sec 5.1 in "Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour":
        # "For BN layers, the learnable scaling coefficient γ is initialized
        # to be 1, except for each residual block's last BN
        # where γ is initialized to be 0."

        # nn.init.constant_(self.conv3.norm.weight, 0)
        # TODO this somehow hurts performance when training GN models from scratch.
        # Add it as an option when we need to use this code to train a backbone.
        self.noise_var = 0.0
Пример #7
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 *,
                 bottleneck_channels,
                 conv_name,
                 conv_1x1_rot,
                 rot_1x1_out,
                 noise_var=0.0,
                 stride=1,
                 norm="BN"):
        """
        Args:
            norm (str or callable): a callable that takes the number of
                channels and return a `nn.Module`, or a pre-defined string
                (one of {"FrozenBN", "BN", "GN"}).
            stride_in_1x1 (bool): when stride==2, whether to put stride in the
                first 1x1 convolution or the bottleneck 3x3 convolution.
        """
        super().__init__(in_channels, out_channels, stride)

        GConv = CONV_DICT[conv_name]
        self.conv2 = GConv(bottleneck_channels,
                           bottleneck_channels,
                           rot_1x1_in=True,
                           rot_1x1_out=rot_1x1_out,
                           noise_var=noise_var,
                           stride=1,
                           padding=1,
                           norm=norm)

        self.kernel_rot = self.conv2.kernel_rot
        self.noise_var = noise_var
        self.defem_layer = DefemLayer()

        if in_channels != out_channels:
            self.shortcut = GConv1x1(rot_1x1=conv_1x1_rot,
                                     in_channels=in_channels,
                                     out_channels=out_channels,
                                     kernel_size=1,
                                     kernel_rot=self.kernel_rot,
                                     stride=1,
                                     padding=0,
                                     dilation=1,
                                     norm=get_norm(norm, out_channels))
            weight_init.c2_msra_fill(self.shortcut)
        else:
            self.shortcut = None

        self.conv1 = GConv1x1(rot_1x1=conv_1x1_rot,
                              in_channels=in_channels,
                              out_channels=bottleneck_channels,
                              kernel_size=1,
                              kernel_rot=self.kernel_rot,
                              stride=1,
                              padding=0,
                              dilation=1,
                              norm=get_norm(norm, bottleneck_channels))

        self.conv3 = GConv1x1(rot_1x1=conv_1x1_rot,
                              in_channels=bottleneck_channels,
                              out_channels=out_channels,
                              kernel_size=1,
                              kernel_rot=self.kernel_rot,
                              stride=1,
                              padding=0,
                              dilation=1,
                              norm=get_norm(norm, out_channels))

        weight_init.c2_msra_fill(self.conv1)
        weight_init.c2_msra_fill(self.conv3)