Exemplo n.º 1
0
    def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1):
        print("NUM BLOCKS:", num_blocks, "STRIDE:", stride)
        if self._use_norm:
            BatchNorm2d = change_default_args(
                eps=1e-3, momentum=0.01)(ME.MinkowskiBatchNorm)
            Conv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution)
            SubMConv2d = change_default_args(bias=False, dimension=2)(ME.MinkowskiConvolution)
            ConvTranspose2d = change_default_args(bias=False, dimension=2)(
                ME.MinkowskiConvolutionTranspose)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution)
            SubMConv2d = change_default_args(bias=True, dimension=2)(ME.MinkowskiConvolution)
            ConvTranspose2d = change_default_args(bias=True, dimension=2)(
                ME.MinkowskiConvolutionTranspose)
        ReLU = ME.MinkowskiReLU()

        block = Sequential(    
            # PrintLayer(0),   
            Conv2d(inplanes, planes, 2, stride=stride),
            BatchNorm2d(planes),
            ReLU,
            # PrintLayer(1),
        )
        for j in range(num_blocks):
            block.add(SubMConv2d(planes, planes, 3))
            block.add(BatchNorm2d(planes)),
            block.add(ReLU)
            # block.add(PrintLayer(2 + j))

        return block, planes
Exemplo n.º 2
0
    def _make_layer(self, inplanes, planes, num_blocks, stride=1):
        if self._use_norm:
            if self._use_groupnorm:
                BatchNorm2d = change_default_args(
                    num_groups=self._num_groups, eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(
                    eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        block = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(inplanes, planes, 3, stride=stride),
            BatchNorm2d(planes),
            nn.ReLU(),
        )
        for j in range(num_blocks):
            block.add(Conv2d(planes, planes, 3, padding=1))
            block.add(BatchNorm2d(planes))
            block.add(nn.ReLU())

        return block, planes
    def _make_layer(self, inplanes, planes, num_blocks, idx, stride=1):
        if self._use_norm:
            if self._use_groupnorm:
                SparseBatchNorm2d = change_default_args(
                    num_groups=self._num_groups, eps=1e-3)(GroupNorm)
                DenseBatchNorm2d = change_default_args(
                    num_groups=self._num_groups, eps=1e-3)(GroupNorm)
            else:
                SparseBatchNorm2d = change_default_args(eps=1e-3,
                                                        momentum=0.01)(
                                                            nn.BatchNorm1d)
                DenseBatchNorm2d = change_default_args(
                    eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
            SparseConv2d = change_default_args(bias=False)(spconv.SparseConv2d)
            DenseConv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                spconv.SparseConvTranspose2d)
        else:
            SparseBatchNorm2d = Empty
            DenseBatchNorm2d = Empty
            SparseConv2d = change_default_args(bias=True)(spconv.SparseConv2d)
            DenseConv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                spconv.SparseConvTranspose2d)
        print("STRIDE:", stride)

        if idx <= LAST_SPARSE_IDX:
            block = spconv.SparseSequential(
                SparseZeroPad2d(1),
                SparseConv2d(inplanes, planes, 3, stride=stride),
                SparseBatchNorm2d(planes),
                nn.ReLU(),
            )
            for j in range(num_blocks):
                block.add(SparseConv2d(planes, planes, 3, padding=1))
                block.add(SparseBatchNorm2d(planes))
                block.add(nn.ReLU())
        else:
            block = Sequential(
                nn.ZeroPad2d(1),
                DenseConv2d(inplanes, planes, 3, stride=stride),
                DenseBatchNorm2d(planes),
                nn.ReLU(),
            )
            for j in range(num_blocks):
                block.add(DenseConv2d(planes, planes, 3, padding=1))
                block.add(DenseBatchNorm2d(planes))
                block.add(nn.ReLU())

        return block, planes
Exemplo n.º 4
0
class PSA(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1, 2, 4],
                 num_upsample_filters=[256, 256, 256],
                 num_input_filters=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 name='psa'):
        """
        :param use_norm:
        :param num_class:
        :param layer_nums:
        :param layer_strides:
        :param num_filters:
        :param upsample_strides:
        :param num_upsample_filters:
        :param num_input_filters:
        :param num_anchor_per_loc:
        :param encode_background_as_zeros:
        :param use_direction_classifier:
        :param use_groupnorm:
        :param num_groups:
        :param use_bev:
        :param box_code_size:
        :param name:
        """
        super(PSA, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc  ## 2
        self._use_direction_classifier = use_direction_classifier  # True
        self._use_bev = use_bev  # False
        assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(
                layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(
                np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        if use_norm:  # True
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        if use_bev:
            self.bev_extractor = Sequential(
                Conv2d(6, 32, 3, padding=1),
                BatchNorm2d(32),
                nn.ReLU(),
                # nn.MaxPool2d(2, 2),
                Conv2d(32, 64, 3, padding=1),
                BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            block2_input_filters += 64

        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_input_filters,
                   num_filters[0],
                   3,
                   stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(block2_input_filters,
                   num_filters[1],
                   3,
                   stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(num_filters[2],
                            num_upsample_filters[2],
                            upsample_strides[2],
                            stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(sum(num_upsample_filters),
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters),
                                          num_anchor_per_loc * 2, 1)

        ###################  refine
        self.bottle_conv = nn.Conv2d(sum(num_upsample_filters),
                                     sum(num_upsample_filters) // 3, 1)

        self.block1_dec2x = nn.MaxPool2d(kernel_size=2)  ### C=64
        self.block1_dec4x = nn.MaxPool2d(kernel_size=4)  ### C=64

        self.block2_dec2x = nn.MaxPool2d(kernel_size=2)  ### C=128
        self.block2_inc2x = ConvTranspose2d(
            num_filters[1],
            num_filters[0] // 2,
            upsample_strides[1],
            stride=upsample_strides[1])  ### C=32

        self.block3_inc2x = ConvTranspose2d(
            num_filters[2],
            num_filters[1] // 2,
            upsample_strides[1],
            stride=upsample_strides[1])  #### C=64
        self.block3_inc4x = ConvTranspose2d(
            num_filters[2],
            num_filters[0] // 2,
            upsample_strides[2],
            stride=upsample_strides[2])  #### C=32

        self.fusion_block1 = nn.Conv2d(
            num_filters[0] + num_filters[0] // 2 + num_filters[0] // 2,
            num_filters[0], 1)
        self.fusion_block2 = nn.Conv2d(
            num_filters[0] + num_filters[1] + num_filters[1] // 2,
            num_filters[1], 1)
        self.fusion_block3 = nn.Conv2d(
            num_filters[0] + num_filters[1] + num_filters[2], num_filters[2],
            1)

        self.refine_up1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.refine_up2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.refine_up3 = Sequential(
            ConvTranspose2d(num_filters[2],
                            num_upsample_filters[2],
                            upsample_strides[2],
                            stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )

        #######
        C_Bottle = cfg.PSA.C_Bottle
        C = cfg.PSA.C_Reudce

        self.RF1 = Sequential(  # 3*3
            Conv2d(C_Bottle * 2, C, kernel_size=1, stride=1),
            BatchNorm2d(C),
            nn.ReLU(inplace=True),
            Conv2d(C,
                   C_Bottle * 2,
                   kernel_size=3,
                   stride=1,
                   padding=1,
                   dilation=1),
            BatchNorm2d(C_Bottle * 2),
            nn.ReLU(inplace=True),
        )

        self.RF2 = Sequential(  # 5*5
            Conv2d(C_Bottle, C, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(C),
            nn.ReLU(inplace=True),
            Conv2d(C, C_Bottle, kernel_size=3, stride=1, padding=1,
                   dilation=1),
            BatchNorm2d(C_Bottle),
            nn.ReLU(inplace=True),
        )

        self.RF3 = Sequential(  # 7*7
            Conv2d(C_Bottle // 2, C, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(C),
            nn.ReLU(inplace=True),
            Conv2d(C, C, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(C),
            nn.ReLU(inplace=True),
            Conv2d(C, C_Bottle // 2, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(C_Bottle // 2),
            nn.ReLU(inplace=True),
        )

        self.concat_conv1 = nn.Conv2d(num_filters[1],
                                      num_filters[1],
                                      kernel_size=3,
                                      padding=1)  ## kernel_size=3
        self.concat_conv2 = nn.Conv2d(num_filters[1],
                                      num_filters[1],
                                      kernel_size=3,
                                      padding=1)
        self.concat_conv3 = nn.Conv2d(num_filters[1],
                                      num_filters[1],
                                      kernel_size=3,
                                      padding=1)

        self.refine_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.refine_loc = nn.Conv2d(sum(num_upsample_filters),
                                    num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.refine_dir = nn.Conv2d(sum(num_upsample_filters),
                                        num_anchor_per_loc * 2, 1)

    def forward(self, x, bev=None):
        x1 = self.block1(x)
        up1 = self.deconv1(x1)

        x2 = self.block2(x1)
        up2 = self.deconv2(x2)
        x3 = self.block3(x2)
        up3 = self.deconv3(x3)
        coarse_feat = torch.cat([up1, up2, up3], dim=1)
        box_preds = self.conv_box(coarse_feat)
        cls_preds = self.conv_cls(coarse_feat)

        # [N, C, y(H), x(W)]
        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
        cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
        ret_dict = {
            "box_preds": box_preds,
            "cls_preds": cls_preds,
        }
        if self._use_direction_classifier:
            dir_cls_preds = self.conv_dir_cls(coarse_feat)
            dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
            ret_dict["dir_cls_preds"] = dir_cls_preds

        ###############Refine:
        blottle_conv = self.bottle_conv(coarse_feat)

        x1_dec2x = self.block1_dec2x(x1)
        x1_dec4x = self.block1_dec4x(x1)

        x2_dec2x = self.block2_dec2x(x2)
        x2_inc2x = self.block2_inc2x(x2)

        x3_inc2x = self.block3_inc2x(x3)
        x3_inc4x = self.block3_inc4x(x3)

        concat_block1 = torch.cat([x1, x2_inc2x, x3_inc4x], dim=1)
        fusion_block1 = self.fusion_block1(concat_block1)

        concat_block2 = torch.cat([x1_dec2x, x2, x3_inc2x], dim=1)
        fusion_block2 = self.fusion_block2(concat_block2)

        concat_block3 = torch.cat([x1_dec4x, x2_dec2x, x3], dim=1)
        fusion_block3 = self.fusion_block3(concat_block3)

        refine_up1 = self.RF3(fusion_block1)
        refine_up1 = self.refine_up1(refine_up1)
        refine_up2 = self.RF2(fusion_block2)
        refine_up2 = self.refine_up2(refine_up2)
        refine_up3 = self.RF1(fusion_block3)
        refine_up3 = self.refine_up3(refine_up3)

        branch1_sum_wise = refine_up1 + blottle_conv
        branch2_sum_wise = refine_up2 + blottle_conv
        branch3_sum_wise = refine_up3 + blottle_conv

        concat_conv1 = self.concat_conv1(branch1_sum_wise)
        concat_conv2 = self.concat_conv2(branch2_sum_wise)
        concat_conv3 = self.concat_conv3(branch3_sum_wise)

        PSA_output = torch.cat([concat_conv1, concat_conv2, concat_conv3],
                               dim=1)

        refine_cls_preds = self.refine_cls(PSA_output)
        refine_loc_preds = self.refine_loc(PSA_output)

        refine_loc_preds = refine_loc_preds.permute(0, 2, 3, 1).contiguous()
        refine_cls_preds = refine_cls_preds.permute(0, 2, 3, 1).contiguous()
        ret_dict["Refine_loc_preds"] = refine_loc_preds
        ret_dict["Refine_cls_preds"] = refine_cls_preds

        if self._use_direction_classifier:
            refine_dir_preds = self.refine_dir(PSA_output)
            refine_dir_preds = refine_dir_preds.permute(0, 2, 3,
                                                        1).contiguous()
            ret_dict["Refine_dir_preds"] = refine_dir_preds

        return ret_dict
Exemplo n.º 5
0
class RPN(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=(3, 5, 5),
                 layer_strides=(2, 2, 2),
                 num_filters=(128, 128, 256),
                 upsample_strides=(1, 2, 4),
                 num_upsample_filters=(256, 256, 256),
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 box_code_size=7,
                 num_direction_bins=2,
                 name='rpn'):
        """deprecated. exists for checkpoint backward compilability (SECOND v1.0)
        """
        super(RPN, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        upsample_strides = [
            np.round(u).astype(np.int64) for u in upsample_strides
        ]
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(
                layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(
                np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(
                    num_groups=num_groups, eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(
                    eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(
                num_input_features, num_filters[0], 3,
                stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(
                num_filters[0],
                num_upsample_filters[0],
                upsample_strides[0],
                stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(
                block2_input_filters,
                num_filters[1],
                3,
                stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(
                num_filters[1],
                num_upsample_filters[1],
                upsample_strides[1],
                stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(
                num_filters[2],
                num_upsample_filters[2],
                upsample_strides[2],
                stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(
            sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(
                sum(num_upsample_filters),
                num_anchor_per_loc * num_direction_bins, 1)

        if self._use_rc_net:
            self.conv_rc = nn.Conv2d(
                sum(num_upsample_filters), num_anchor_per_loc * box_code_size,
                1)

    def forward(self, x):
        # t = time.time()
        # torch.cuda.synchronize()

        x = self.block1(x)
        up1 = self.deconv1(x)
        x = self.block2(x)
        up2 = self.deconv2(x)
        x = self.block3(x)
        up3 = self.deconv3(x)
        x = torch.cat([up1, up2, up3], dim=1)
        box_preds = self.conv_box(x)
        cls_preds = self.conv_cls(x)

        # [N, C, y(H), x(W)]
        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
        cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
        ret_dict = {
            "box_preds": box_preds,
            "cls_preds": cls_preds,
        }
        if self._use_direction_classifier:
            dir_cls_preds = self.conv_dir_cls(x)
            dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
            ret_dict["dir_cls_preds"] = dir_cls_preds
        if self._use_rc_net:
            rc_preds = self.conv_rc(x)
            rc_preds = rc_preds.permute(0, 2, 3, 1).contiguous()
            ret_dict["rc_preds"] = rc_preds
        # torch.cuda.synchronize()
        # print("rpn forward time", time.time() - t)

        return ret_dict
Exemplo n.º 6
0
class RPN(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1, 2, 4],
                 num_upsample_filters=[256, 256, 256],
                 num_input_filters=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 name='rpn'):
        super(RPN, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(
                layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(
                np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        if use_bev:
            self.bev_extractor = Sequential(
                Conv2d(6, 32, 3, padding=1),
                BatchNorm2d(32),
                nn.ReLU(),
                # nn.MaxPool2d(2, 2),
                Conv2d(32, 64, 3, padding=1),
                BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            block2_input_filters += 64

        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_input_filters,
                   num_filters[0],
                   3,
                   stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(block2_input_filters,
                   num_filters[1],
                   3,
                   stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(num_filters[2],
                            num_upsample_filters[2],
                            upsample_strides[2],
                            stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(sum(num_upsample_filters),
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters),
                                          num_anchor_per_loc * 2, 1)

    def forward(self, x, bev=None):
        x = self.block1(x)
        up1 = self.deconv1(x)
        if self._use_bev:
            bev[:, -1] = torch.clamp(torch.log(1 + bev[:, -1]) / np.log(16.0),
                                     max=1.0)
            x = torch.cat([x, self.bev_extractor(bev)], dim=1)
        x = self.block2(x)
        up2 = self.deconv2(x)
        x = self.block3(x)
        up3 = self.deconv3(x)
        x = torch.cat([up1, up2, up3], dim=1)
        box_preds = self.conv_box(x)
        cls_preds = self.conv_cls(x)
        # [N, C, y(H), x(W)]
        box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
        cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
        ret_dict = {
            "box_preds": box_preds,
            "cls_preds": cls_preds,
        }
        if self._use_direction_classifier:
            dir_cls_preds = self.conv_dir_cls(x)
            dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
            ret_dict["dir_cls_preds"] = dir_cls_preds
        return ret_dict
Exemplo n.º 7
0
class RPN_refine(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=(3, 5, 5),
                 layer_strides=(2, 2, 2),
                 num_filters=(128, 128, 256),
                 upsample_strides=(1, 2, 4),
                 num_upsample_filters=(256, 256, 256),
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 box_code_size=7,
                 num_direction_bins=2,
                 name='rpn'):
        super(RPN_refine, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        self._box_code_size = box_code_size
        self._num_class = num_class
        self._num_direction_bins = num_direction_bins
        upsample_strides = [
            np.round(u).astype(np.int64) for u in upsample_strides
        ]
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_input_features,
                   num_filters[0],
                   3,
                   stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(block2_input_filters,
                   num_filters[1],
                   3,
                   stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(num_filters[2],
                            num_upsample_filters[2],
                            upsample_strides[2],
                            stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls_coarse = nn.Conv2d(num_upsample_filters[0], num_cls, 1)
        self.conv_box_coarse = nn.Conv2d(num_upsample_filters[0],
                                         num_anchor_per_loc * box_code_size, 1)
        self.conv_cls = nn.Conv2d(num_upsample_filters[0], num_cls, 1)
        self.conv_box = nn.Conv2d(num_upsample_filters[0],
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(
                num_upsample_filters[0],
                num_anchor_per_loc * num_direction_bins, 1)

    def forward(self, x):
        H, W = x.shape[2:]
        box_refine = self.conv_box_coarse(x)
        box_refine = box_refine.view(-1, self._num_anchor_per_loc,
                                     self._box_code_size, H,
                                     W).permute(0, 1, 3, 4, 2).contiguous()
        cls_constraint = self.conv_cls_coarse(x)
        cls_constraint = cls_constraint.view(-1, self._num_anchor_per_loc,
                                             self._num_class, H,
                                             W).permute(0, 1, 3, 4,
                                                        2).contiguous()
        x = self.block1(x)
        up1 = self.deconv1(x)
        x = self.block2(x)
        up2 = self.deconv2(x)
        x = self.block3(x)
        up3 = self.deconv3(x)
        x = up1 + up2 + up3
        box_preds = self.conv_box(x)
        cls_preds = self.conv_cls(x)
        # [N, C, y(H), x(W)]
        box_preds = box_preds.view(-1, self._num_anchor_per_loc,
                                   self._box_code_size, H,
                                   W).permute(0, 1, 3, 4, 2).contiguous()
        cls_preds = cls_preds.view(-1, self._num_anchor_per_loc,
                                   self._num_class, H,
                                   W).permute(0, 1, 3, 4, 2).contiguous()
        ret_dict = {
            "box_refine": box_refine,
            "cls_constraint": cls_constraint,
            "box_preds": box_preds,
            "cls_preds": cls_preds,
        }
        if self._use_direction_classifier:
            dir_cls_preds = self.conv_dir_cls(x)
            dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc,
                                               self._num_direction_bins, H,
                                               W).permute(0, 1, 3, 4,
                                                          2).contiguous()
            ret_dict["dir_cls_preds"] = dir_cls_preds

        return x, ret_dict
Exemplo n.º 8
0
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1, 2, 4],
                 num_upsample_filters=[256, 256, 256],
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 use_rc_net=False,
                 name='rpn'):
        super(RPNV2, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._use_rc_net = use_rc_net
        # assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        """
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        """
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        in_filters = [num_input_features, *num_filters[:-1]]
        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        blocks = []
        deblocks = []

        for i, layer_num in enumerate(layer_nums):
            block = Sequential(
                nn.ZeroPad2d(1),
                Conv2d(in_filters[i],
                       num_filters[i],
                       3,
                       stride=layer_strides[i]),
                BatchNorm2d(num_filters[i]),
                nn.ReLU(),
            )
            for j in range(layer_num):
                block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1))
                block.add(BatchNorm2d(num_filters[i]))
                block.add(nn.ReLU())
            blocks.append(block)
            deblock = Sequential(
                ConvTranspose2d(num_filters[i],
                                num_upsample_filters[i],
                                upsample_strides[i],
                                stride=upsample_strides[i]),
                BatchNorm2d(num_upsample_filters[i]),
                nn.ReLU(),
            )
            deblocks.append(deblock)
        self.blocks = nn.ModuleList(blocks)
        self.deblocks = nn.ModuleList(deblocks)
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(sum(num_upsample_filters),
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters),
                                          num_anchor_per_loc * 2, 1)

        if self._use_rc_net:
            self.conv_rc = nn.Conv2d(sum(num_upsample_filters),
                                     num_anchor_per_loc * box_code_size, 1)
Exemplo n.º 9
0
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1, 2, 4],
                 num_upsample_filters=[256, 256, 256],
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 use_rc_net=False,
                 name='rpn'):
        super(RPN_FUSION, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._use_rc_net = use_rc_net
        # assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        """
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        """
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        in_filters = [num_input_features, *num_filters[:-1]]
        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        blocks = []
        deblocks = []

        for i, layer_num in enumerate(layer_nums):
            # in_f = 256 if i == 0 else in_filters[i]
            in_f = in_filters[i]
            block = Sequential(
                nn.ZeroPad2d(1),
                Conv2d(in_f, num_filters[i], 3, stride=layer_strides[i]),
                BatchNorm2d(num_filters[i]),
                nn.ReLU(),
            )
            for j in range(layer_num):
                block.add(Conv2d(num_filters[i], num_filters[i], 3, padding=1))
                block.add(BatchNorm2d(num_filters[i]))
                block.add(nn.ReLU())
            blocks.append(block)
            deblock = Sequential(
                ConvTranspose2d(num_filters[i],
                                num_upsample_filters[i],
                                upsample_strides[i],
                                stride=upsample_strides[i]),
                BatchNorm2d(num_upsample_filters[i]),
                nn.ReLU(),
            )
            deblocks.append(deblock)
        self.blocks = nn.ModuleList(blocks)
        self.deblocks = nn.ModuleList(deblocks)
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        #########################
        det_num = sum(num_upsample_filters)
        #########################
        self.conv_cls = nn.Conv2d(det_num, num_cls, 1)
        self.conv_box = nn.Conv2d(det_num, num_anchor_per_loc * box_code_size,
                                  1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(det_num, num_anchor_per_loc * 2, 1)

        if self._use_rc_net:
            self.conv_rc = nn.Conv2d(det_num,
                                     num_anchor_per_loc * box_code_size, 1)
        ##########################################################
        self.f_in_planes_det = 64
        net_type = 'FPN18'
        if net_type == 'FPN50':
            num_blocks = [3, 4, 6, 3]
            bb_block = Bottleneck
        elif net_type == 'FPN18':
            num_blocks = [2, 2, 2, 2]
            bb_block = BasicBlock

        # For RGB Feature Network
        self.conv1 = nn.Conv2d(3,
                               64,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer_det(bb_block,
                                           64,
                                           num_blocks[0],
                                           stride=1)
        self.layer2 = self._make_layer_det(bb_block,
                                           128,
                                           num_blocks[1],
                                           stride=2)
        self.layer3 = self._make_layer_det(bb_block,
                                           256,
                                           num_blocks[2],
                                           stride=2)
        self.layer4 = self._make_layer_det(bb_block,
                                           512,
                                           num_blocks[3],
                                           stride=2)
        if net_type == 'FPN18':
            fpn_sizes = [
                self.layer2[1].conv2.out_channels,
                self.layer3[1].conv2.out_channels,
                self.layer4[1].conv2.out_channels
            ]
        else:
            fpn_sizes = [
                self.layer2[num_blocks[1] - 1].conv3.out_channels,
                self.layer3[num_blocks[2] - 1].conv3.out_channels,
                self.layer4[num_blocks[3] - 1].conv3.out_channels
            ]

        self.fpn = PyramidFeatures(fpn_sizes[0], fpn_sizes[1], fpn_sizes[2])
        ####################################################################
        # Fusion Layer
        num_z_feat = 3
        n_feats = 128
        self.rgb_refine = Sequential(
            nn.Conv2d(256 * num_z_feat,
                      256,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, n_feats, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(n_feats),
            nn.ReLU(),
        )
        self.fusion_refine = Sequential(
            nn.Conv2d(n_feats * 2,
                      n_feats * 2,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.BatchNorm2d(n_feats * 2),
            nn.ReLU(),
            nn.Conv2d(n_feats * 2, n_feats, kernel_size=1, stride=1,
                      padding=0),
            nn.BatchNorm2d(n_feats),
            nn.ReLU(),
        )
        self.bev_gate = BasicGate(n_feats)
        self.crop_gate = BasicGate(n_feats)
Exemplo n.º 10
0
class Sp2RPN(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=(1, 1),
                 layer_strides=(1, 2),
                 num_filters=(256, 256),
                 upsample_strides=(1, 2),
                 num_upsample_filters=(128, 128),
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 num_direction_bins=2):
        super().__init__()
        self.name = 'Sp2RPN2'
        self._num_class = num_class
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._num_direction_bins = num_direction_bins
        self._box_code_size = box_code_size
        self._use_bev = use_bev

        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)

        # upsample_strides is defined as double...
        upsample_strides = [int(s) for s in upsample_strides]

        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        self.block1 = Sequential()
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())

        self.deconv1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )

        self.block2 = Sequential()
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())

        self.deconv2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )

        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(sum(num_upsample_filters),
                                  num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(sum(num_upsample_filters),
                                          num_anchor_per_loc * 2, 1)

    def forward(self, x):
        x1, x2 = x
        x1 = self.block1(x1)
        up1 = self.deconv1(x1)
        x2 = self.block2(x2)
        up2 = self.deconv2(x2)
        x_cat = torch.cat([up1, up2], dim=1)
        box_preds = self.conv_box(x_cat)
        cls_preds = self.conv_cls(x_cat)

        # [N, C, y(H), x(W)]
        C, H, W = box_preds.shape[1:]
        box_preds = box_preds.view(-1, self._num_anchor_per_loc,
                                   self._box_code_size, H,
                                   W).permute(0, 1, 3, 4, 2).contiguous()
        cls_preds = cls_preds.view(-1, self._num_anchor_per_loc,
                                   self._num_class, H,
                                   W).permute(0, 1, 3, 4, 2).contiguous()
        # box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
        # cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()

        ret_dict = {
            "box_preds": box_preds,
            "cls_preds": cls_preds,
        }
        if self._use_direction_classifier:
            dir_cls_preds = self.conv_dir_cls(x_cat)
            dir_cls_preds = dir_cls_preds.view(-1, self._num_anchor_per_loc,
                                               self._num_direction_bins, H,
                                               W).permute(0, 1, 3, 4,
                                                          2).contiguous()
            # dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
            ret_dict["dir_cls_preds"] = dir_cls_preds
        return ret_dict
Exemplo n.º 11
0
class res_fpn(nn.Module):
    def __init__(self,
                 in_channels=128,
                 num_of_convs=4,
                 prior_prob=0.01,
                 use_norm=True,
                 num_class=2,
                 num_convs=12,
                 layer_nums=(3, 5, 5),
                 layer_strides=(1, 2, 2),
                 num_filters=(128, 128, 256),
                 upsample_strides=(1, 2, 4),
                 num_upsample_filters=(256, 256, 256),
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 box_code_size=7,
                 name='rpn'):
        """
        Arguments:
            input = (batch, channel, x, y)
            output = ret_dict
            in_channels (int): number of channels of the input feature
        """
        super(res_fpn, self).__init__()
        # convs_fpn head

        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(
                layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(
                np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_input_features,
                   num_filters[0],
                   3,
                   stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(num_filters[0],
                            num_upsample_filters[0],
                            upsample_strides[0],
                            stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(block2_input_filters,
                   num_filters[1],
                   3,
                   stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(num_filters[1],
                            num_upsample_filters[1],
                            upsample_strides[1],
                            stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(num_filters[2],
                            num_upsample_filters[2],
                            upsample_strides[2],
                            stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )

    def forward(self, x):
        fpn_head = []
        # fpn
        x = self.block1(x)
        up1 = self.deconv1(x)
        x = self.block2(x)
        up2 = self.deconv2(x)
        x = self.block3(x)
        up3 = self.deconv3(x)
        #fpn_head.append(up1)
        #fpn_head.append(up2)
        #fpn_head.append(up3)
        #this is for single head
        x = torch.cat([up1, up2, up3], dim=1)
        fpn_head.append(x)

        return fpn_head
Exemplo n.º 12
0
class img_extractor_VGG16(nn.Module):
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 img_input_channel=3,
                 img_extractor_layer_nums=[2, 3],
                 layer_strides=[2, 2],
                 num_filters=[32, 64],
                 upsample_strides=[1, 2],
                 num_upsample_filters=[128, 128],
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 box_code_size=7,
                 name='img_extractor_SSD_like'):
        super(img_extractor_VGG16, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        assert len(layer_strides) == len(img_extractor_layer_nums)
        assert len(num_filters) == len(img_extractor_layer_nums)
        assert len(upsample_strides) == len(img_extractor_layer_nums)
        assert len(num_upsample_filters) == len(img_extractor_layer_nums)
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(num_groups=num_groups,
                                                  eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(eps=1e-3, momentum=0.01)(
                    nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)

        self.block1 = Sequential()
        for i in range(img_extractor_layer_nums[0]):
            if i == 0:
                block1_in = 3
            else:
                block1_in = num_filters[0]
            self.block1.add(Conv2d(block1_in, num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU(inplace=False))
        self.block1.add(torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.block2 = Sequential()
        for i in range(img_extractor_layer_nums[1]):
            if i == 0:
                block2_in = num_filters[0]
            else:
                block2_in = num_filters[1]
            self.block2.add(Conv2d(block2_in, num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU(inplace=False))
        self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2))

        self.block3 = Sequential()
        for i in range(img_extractor_layer_nums[2]):
            if i == 0:
                block2_in = num_filters[1]
            else:
                block2_in = num_filters[2]
            self.block2.add(Conv2d(block2_in, num_filters[2], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[2]))
            self.block2.add(nn.ReLU(inplace=False))
        self.block2.add(torch.nn.MaxPool2d(kernel_size=2, stride=2))

    def forward(self, inputs, bev=None):  # x: [1, 3, 375, 1240]
        img_feat_block1 = self.block1(inputs)  # [1, 64, 188, 620]
        img_feat_block2 = self.block2(img_feat_block1)  # [1,128, 94, 310]
        img_feat_block3 = self.block3(img_feat_block2)
        return img_feat_block3