示例#1
0
    def __init__(self, dim_in, spatial_scale):
        """
        Arguments:
            num_classes (int): number of output classes
            input_size (int): number of channels of the input once it's flattened
            representation_size (int): size of the intermediate representation
        """
        super(roi_convx_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.MRCNN.ROI_XFORM_METHOD
        resolution = cfg.MRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.MRCNN.CONVX_HEAD.USE_LITE
        use_bn = cfg.MRCNN.CONVX_HEAD.USE_BN
        use_gn = cfg.MRCNN.CONVX_HEAD.USE_GN
        conv_dim = cfg.MRCNN.CONVX_HEAD.CONV_DIM
        num_stacked_convs = cfg.MRCNN.CONVX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.MRCNN.CONVX_HEAD.DILATION

        self.blocks = []
        for layer_idx in range(num_stacked_convs):
            layer_name = "mask_fcn{}".format(layer_idx + 1)
            module = make_conv(self.dim_in,
                               conv_dim,
                               kernel=3,
                               stride=1,
                               dilation=dilation,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.add_module(layer_name, module)
            self.dim_in = conv_dim
            self.blocks.append(layer_name)
        self.dim_out = self.dim_in

        if cfg.MRCNN.CONVX_HEAD.USE_WS:
            self = convert_conv2convws_model(self)

        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity="relu")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
                elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
示例#2
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = sum(dim_in)
        self.spatial_scale = spatial_scale

        hrfpn_dim = cfg.FPN.HRFPN.DIM  # 256
        use_lite = cfg.FPN.HRFPN.USE_LITE
        use_bn = cfg.FPN.HRFPN.USE_BN
        use_gn = cfg.FPN.HRFPN.USE_GN
        if cfg.FPN.HRFPN.POOLING_TYPE == 'AVG':
            self.pooling = F.avg_pool2d
        else:
            self.pooling = F.max_pool2d
        self.num_extra_pooling = cfg.FPN.HRFPN.NUM_EXTRA_POOLING  # 1
        self.num_output = len(dim_in) + self.num_extra_pooling  # 5

        self.reduction_conv = make_conv(self.dim_in,
                                        hrfpn_dim,
                                        kernel=1,
                                        use_bn=use_bn,
                                        use_gn=use_gn)
        self.dim_in = hrfpn_dim

        self.fpn_conv = nn.ModuleList()
        for i in range(self.num_output):
            self.fpn_conv.append(
                make_conv(self.dim_in,
                          hrfpn_dim,
                          kernel=3,
                          use_dwconv=use_lite,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          suffix_1x1=use_lite))
            self.dim_in = hrfpn_dim

        if self.num_extra_pooling:
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)
        self.dim_out = [self.dim_in for _ in range(self.num_output)]
        self._init_weights()
示例#3
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]

        method = cfg.FAST_RCNN.ROI_XFORM_METHOD
        resolution = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.FAST_RCNN.CONVFC_HEAD.USE_LITE
        use_bn = cfg.FAST_RCNN.CONVFC_HEAD.USE_BN
        use_gn = cfg.FAST_RCNN.CONVFC_HEAD.USE_GN
        conv_dim = cfg.FAST_RCNN.CONVFC_HEAD.CONV_DIM
        num_stacked_convs = cfg.FAST_RCNN.CONVFC_HEAD.NUM_STACKED_CONVS
        dilation = cfg.FAST_RCNN.CONVFC_HEAD.DILATION
        
        xconvs = []
        for ix in range(num_stacked_convs):
            xconvs.append(
                make_conv(self.dim_in, conv_dim, kernel=3, stride=1, dilation=dilation, use_dwconv=use_lite,
                          use_bn=use_bn, use_gn=use_gn, suffix_1x1=use_lite, use_relu=True)
            )
            self.dim_in = conv_dim
        self.add_module("xconvs", nn.Sequential(*xconvs))
        
        input_size = self.dim_in * resolution[0] * resolution[1]
        mlp_dim = cfg.FAST_RCNN.CONVFC_HEAD.MLP_DIM
        self.fc6 = make_fc(input_size, mlp_dim, use_bn=False, use_gn=False)
        self.dim_out = mlp_dim
        
        if cfg.FAST_RCNN.CONVFC_HEAD.USE_WS:
            self = convert_conv2convws_model(self)
示例#4
0
    def __init__(self, dim_in):
        super(convx_head, self).__init__()

        self.dim_in = dim_in + cfg.PRCNN.NUM_PARSING
        num_stacked_convs = cfg.PRCNN.PARSINGIOU.NUM_STACKED_CONVS  # default = 2
        conv_dim = cfg.PRCNN.PARSINGIOU.CONV_DIM
        mlp_dim = cfg.PRCNN.PARSINGIOU.MLP_DIM
        use_bn = cfg.PRCNN.PARSINGIOU.USE_BN
        use_gn = cfg.PRCNN.PARSINGIOU.USE_GN

        convx = []
        for _ in range(num_stacked_convs):
            layer_stride = 1 if _ < num_stacked_convs - 1 else 2
            convx.append(
                make_conv(
                    self.dim_in, conv_dim, kernel=3, stride=layer_stride, use_bn=use_bn, use_gn=use_gn, use_relu=True
                )
            )
            self.dim_in = conv_dim
        self.convx = nn.Sequential(*convx)

        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.parsingiou_fc1 = make_fc(self.dim_in, mlp_dim, use_bn=False, use_gn=False)
        self.parsingiou_fc2 = make_fc(mlp_dim, mlp_dim, use_bn=False, use_gn=False)
        self.dim_out = mlp_dim

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
示例#5
0
    def __init__(self, dim_in, spatial_scale):
        super(roi_convx_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.UVRCNN.ROI_XFORM_METHOD
        resolution = cfg.UVRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.UVRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_lite = cfg.UVRCNN.CONVX_HEAD.USE_LITE
        use_bn = cfg.UVRCNN.CONVX_HEAD.USE_BN
        use_gn = cfg.UVRCNN.CONVX_HEAD.USE_GN
        conv_dim = cfg.UVRCNN.CONVX_HEAD.CONV_DIM
        num_stacked_convs = cfg.UVRCNN.CONVX_HEAD.NUM_STACKED_CONVS
        dilation = cfg.UVRCNN.CONVX_HEAD.DILATION

        self.blocks = []
        for layer_idx in range(num_stacked_convs):
            layer_name = "UV_fcn{}".format(layer_idx + 1)
            module = make_conv(self.dim_in,
                               conv_dim,
                               kernel=3,
                               stride=1,
                               dilation=dilation,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.add_module(layer_name, module)
            self.dim_in = conv_dim
            self.blocks.append(layer_name)
        self.dim_out = self.dim_in
示例#6
0
    def __init__(self, dim_in):
        super(Hier_output, self).__init__()

        num_classes = cfg.HRCNN.NUM_CLASSES
        num_convs = cfg.HRCNN.OUTPUT_NUM_CONVS
        conv_dim = cfg.HRCNN.OUTPUT_CONV_DIM
        use_lite = cfg.HRCNN.OUTPUT_USE_LITE
        use_bn = cfg.HRCNN.OUTPUT_USE_BN
        use_gn = cfg.HRCNN.OUTPUT_USE_GN
        use_dcn = cfg.HRCNN.OUTPUT_USE_DCN
        prior_prob = cfg.HRCNN.PRIOR_PROB

        self.norm_reg_targets = cfg.HRCNN.NORM_REG_TARGETS
        self.centerness_on_reg = cfg.HRCNN.CENTERNESS_ON_REG

        cls_tower = []
        bbox_tower = []
        for i in range(num_convs):
            conv_type = 'deform' if use_dcn and i == num_convs - 1 else 'normal'
            cls_tower.append(
                make_conv(dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))
            bbox_tower.append(
                make_conv(dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          dilation=1,
                          use_dwconv=use_lite,
                          conv_type=conv_type,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          kaiming_init=False,
                          suffix_1x1=use_lite))
            dim_in = conv_dim

        self.add_module('cls_tower', nn.Sequential(*cls_tower))
        self.add_module('bbox_tower', nn.Sequential(*bbox_tower))
        self.cls_deconv = ConvTranspose2d(conv_dim, conv_dim, 2, 2, 0)
        self.bbox_deconv = ConvTranspose2d(conv_dim, conv_dim, 2, 2, 0)
        self.cls_logits = Conv2d(conv_dim,
                                 num_classes,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)
        self.bbox_pred = Conv2d(conv_dim,
                                4,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        self.centerness = Conv2d(conv_dim,
                                 1,
                                 kernel_size=3,
                                 stride=1,
                                 padding=1)

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    torch.nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # initialize the bias for focal loss
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        torch.nn.init.constant_(self.cls_logits.bias, bias_value)

        self.scales = nn.ModuleList([Scale(init_value=1.0) for _ in range(1)])
示例#7
0
    def __init__(self, dim_in, spatial_scale):
        super(roi_gce_head, self).__init__()
        self.dim_in = dim_in[-1]

        method = cfg.PRCNN.ROI_XFORM_METHOD
        resolution = cfg.HRCNN.ROI_XFORM_RESOLUTION
        sampling_ratio = cfg.HRCNN.ROI_XFORM_SAMPLING_RATIO
        pooler = Pooler(
            method=method,
            output_size=resolution,
            scales=spatial_scale,
            sampling_ratio=sampling_ratio,
        )
        self.pooler = pooler

        use_nl = cfg.HRCNN.GCE_HEAD.USE_NL
        use_bn = cfg.HRCNN.GCE_HEAD.USE_BN
        use_gn = cfg.HRCNN.GCE_HEAD.USE_GN
        conv_dim = cfg.HRCNN.GCE_HEAD.CONV_DIM
        asppv3_dim = cfg.HRCNN.GCE_HEAD.ASPPV3_DIM
        num_convs_before_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_BEFORE_ASPPV3
        asppv3_dilation = cfg.HRCNN.GCE_HEAD.ASPPV3_DILATION
        num_convs_after_asppv3 = cfg.HRCNN.GCE_HEAD.NUM_CONVS_AFTER_ASPPV3

        # convx before asppv3 module
        before_asppv3_list = []
        for _ in range(num_convs_before_asppv3):
            before_asppv3_list.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
            self.dim_in = conv_dim
        self.conv_before_asppv3 = nn.Sequential(
            *before_asppv3_list) if len(before_asppv3_list) else None

        # asppv3 module
        self.asppv3 = []
        self.asppv3.append(
            make_conv(self.dim_in,
                      asppv3_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        for dilation in asppv3_dilation:
            self.asppv3.append(
                make_conv(self.dim_in,
                          asppv3_dim,
                          kernel=3,
                          dilation=dilation,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
        self.asppv3 = nn.ModuleList(self.asppv3)
        self.im_pool = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            make_conv(self.dim_in,
                      asppv3_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        self.dim_in = (len(asppv3_dilation) + 2) * asppv3_dim

        feat_list = []
        feat_list.append(
            make_conv(self.dim_in,
                      conv_dim,
                      kernel=1,
                      use_bn=use_bn,
                      use_gn=use_gn,
                      use_relu=True))
        if use_nl:
            feat_list.append(
                NonLocal2d(conv_dim,
                           int(conv_dim * cfg.HRCNN.GCE_HEAD.NL_RATIO),
                           conv_dim,
                           use_gn=True))
        self.feat = nn.Sequential(*feat_list)
        self.dim_in = conv_dim

        # convx after asppv3 module
        assert num_convs_after_asppv3 >= 1
        after_asppv3_list = []
        for _ in range(num_convs_after_asppv3):
            after_asppv3_list.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
            self.dim_in = conv_dim
        self.conv_after_asppv3 = nn.Sequential(
            *after_asppv3_list) if len(after_asppv3_list) else None
        self.dim_out = self.dim_in
示例#8
0
    def __init__(self, dim_in, spatial_scale):
        super().__init__()
        self.dim_in = dim_in[-1]  # 2048
        self.spatial_scale = spatial_scale

        fpn_dim = cfg.FPN.DIM  # 256
        use_lite = cfg.FPN.USE_LITE
        use_bn = cfg.FPN.USE_BN
        use_gn = cfg.FPN.USE_GN
        min_level, max_level = get_min_max_levels()  # 2, 6
        self.num_backbone_stages = len(dim_in) - (
            min_level - cfg.FPN.LOWEST_BACKBONE_LVL
        )  # 4 (cfg.FPN.LOWEST_BACKBONE_LVL=2)

        # P5 in
        self.p5_in = make_conv(self.dim_in,
                               fpn_dim,
                               kernel=1,
                               use_bn=use_bn,
                               use_gn=use_gn)

        # P5 out
        self.p5_out = make_conv(fpn_dim,
                                fpn_dim,
                                kernel=3,
                                use_dwconv=use_lite,
                                use_bn=use_bn,
                                use_gn=use_gn,
                                suffix_1x1=use_lite)

        # fpn module
        self.fpn_in = []
        self.fpn_out = []
        for i in range(self.num_backbone_stages - 1):  # skip the top layer
            px_in = make_conv(dim_in[-i - 2],
                              fpn_dim,
                              kernel=1,
                              use_bn=use_bn,
                              use_gn=use_gn)  # from P4 to P2
            px_out = make_conv(fpn_dim,
                               fpn_dim,
                               kernel=3,
                               use_dwconv=use_lite,
                               use_bn=use_bn,
                               use_gn=use_gn,
                               suffix_1x1=use_lite)
            self.fpn_in.append(px_in)
            self.fpn_out.append(px_out)
        self.fpn_in = nn.ModuleList(self.fpn_in)  # [P4, P3, P2]
        self.fpn_out = nn.ModuleList(self.fpn_out)
        self.dim_in = fpn_dim

        # P6. Original FPN P6 level implementation from CVPR'17 FPN paper.
        if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == cfg.FPN.HIGHEST_BACKBONE_LVL + 1:
            self.maxpool_p6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0)
            self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        # Coarser FPN levels introduced for RetinaNet
        if cfg.FPN.EXTRA_CONV_LEVELS and max_level > cfg.FPN.HIGHEST_BACKBONE_LVL:
            self.extra_pyramid_modules = nn.ModuleList()
            if cfg.FPN.USE_C5:
                self.dim_in = dim_in[-1]
            for i in range(cfg.FPN.HIGHEST_BACKBONE_LVL + 1, max_level + 1):
                self.extra_pyramid_modules.append(
                    make_conv(self.dim_in,
                              fpn_dim,
                              kernel=3,
                              stride=2,
                              use_dwconv=use_lite,
                              use_bn=use_bn,
                              use_gn=use_gn,
                              suffix_1x1=use_lite))
                self.dim_in = fpn_dim
                self.spatial_scale.append(self.spatial_scale[-1] * 0.5)

        # self.spatial_scale.reverse()  # [1/64, 1/32, 1/16, 1/8, 1/4]
        # self.dim_out = [self.dim_in]
        num_roi_levels = cfg.FPN.ROI_MAX_LEVEL - cfg.FPN.ROI_MIN_LEVEL + 1
        # Retain only the spatial scales that will be used for RoI heads. `self.spatial_scale`
        # may include extra scales that are used for RPN proposals, but not for RoI heads.
        self.spatial_scale = self.spatial_scale[:num_roi_levels]
        self.dim_out = [self.dim_in for _ in range(num_roi_levels)]

        if cfg.FPN.USE_WS:
            self = convert_conv2convws_model(self)

        self._init_weights()
示例#9
0
    def __init__(self, dim_in, spatial_scale):
        super(fused_head, self).__init__()

        self.dim_in = dim_in[-1]
        self.fusion_level = cfg.SEMSEG.SEMSEG_HEAD.FUSION_LEVEL
        self.num_convs = cfg.SEMSEG.SEMSEG_HEAD.NUM_CONVS

        num_ins = cfg.SEMSEG.SEMSEG_HEAD.NUM_IN_STAGE
        conv_dim = cfg.SEMSEG.SEMSEG_HEAD.CONV_DIM
        use_bn = cfg.SEMSEG.SEMSEG_HEAD.USE_BN
        use_gn = cfg.SEMSEG.SEMSEG_HEAD.USE_GN

        lateral_convs = []
        for layer_idx in range(num_ins):
            lateral_convs.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=1,
                          stride=1,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True,
                          inplace=False))
        self.add_module('lateral_convs', nn.Sequential(*lateral_convs))
        self.dim_in = conv_dim

        convs = []
        for layer_idx in range(self.num_convs):
            convs.append(
                make_conv(self.dim_in,
                          conv_dim,
                          kernel=3,
                          stride=1,
                          use_bn=use_bn,
                          use_gn=use_gn,
                          use_relu=True))
            self.dim_in = conv_dim
        self.add_module('convs', nn.Sequential(*convs))

        self.conv_embedding = make_conv(self.dim_in,
                                        dim_in[-1],
                                        kernel=3,
                                        stride=1,
                                        use_bn=use_bn,
                                        use_gn=use_gn,
                                        use_relu=True)

        self.dim_out = self.dim_in

        # Initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode="fan_out",
                                        nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_uniform_(m.weight, a=1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)