Exemplo n.º 1
0
    def __init__(self, phase, conf):
        super(RPN, self).__init__()
        train = phase.lower() == 'train'

        self.base = resnet.ResNetDilate(num_layer=50)
        self.depthnet = resnet.ResNetDilate(num_layer=50)

        # settings
        self.phase = phase
        self.num_classes = len(conf['lbls']) + 1
        self.num_anchors = conf['anchors'].shape[0]
        self.prev_num = 1 if conf.prev_num is None else conf.prev_num
        self.dropout_rate = 0.2 if conf.dropout_rate is None else conf.dropout_rate

        self.prop_feats = nn.Sequential(
            nn.Conv2d(2048, 512, 3, padding=1),
            nn.ReLU(inplace=True),
        )

        self.channel_attention_1 = ResidualGroup(RCAB,
                                                 n_resblocks=12,
                                                 n_feat=512,
                                                 kernel_size=3,
                                                 reduction=16,
                                                 act=nn.LeakyReLU(0.2, True),
                                                 norm=False)

        self.channel_attention_2 = ResidualGroup(RCAB,
                                                 n_resblocks=12,
                                                 n_feat=512,
                                                 kernel_size=3,
                                                 reduction=16,
                                                 act=nn.LeakyReLU(0.2, True),
                                                 norm=False)

        self.channel_attention_3 = ResidualGroup(RCAB,
                                                 n_resblocks=12,
                                                 n_feat=512,
                                                 kernel_size=3,
                                                 reduction=16,
                                                 act=nn.LeakyReLU(0.2, True),
                                                 norm=False)

        self.prop_feats_multi = nn.Sequential(
            nn.Conv2d(2048 * 3, 512, 3, padding=1),
            nn.ReLU(inplace=True),
        )

        self.dropout = nn.Dropout(p=self.dropout_rate)
        self.dropout_channel = nn.Dropout2d(p=0.3)

        # outputs
        self.cls = nn.Conv2d(self.prop_feats[0].out_channels,
                             self.num_classes * self.num_anchors, 1)

        # motion
        self.motion_x = nn.Conv2d(self.prop_feats_multi[0].out_channels,
                                  self.num_anchors, 1)
        self.motion_y = nn.Conv2d(self.prop_feats_multi[0].out_channels,
                                  self.num_anchors, 1)
        self.motion_z = nn.Conv2d(self.prop_feats_multi[0].out_channels,
                                  self.num_anchors, 1)

        # bbox 2d
        self.bbox_x = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_y = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_w = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_h = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)

        # bbox 3d
        self.bbox_x3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_y3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_z3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_w3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_h3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_l3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_rY3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                   self.num_anchors, 1)

        self.softmax = nn.Softmax(dim=1)

        self.feat_stride = conf.feat_stride
        self.feat_size = calc_output_size(np.array(conf.crop_size),
                                          self.feat_stride)
        self.rois = locate_anchors(conf.anchors,
                                   self.feat_size,
                                   conf.feat_stride,
                                   convert_tensor=True)
        self.rois = self.rois.type(torch.cuda.FloatTensor)
        self.anchors = conf.anchors

        motion_scale = np.load(
            './lib/anchor_motion_scale_multi_objects_split1.npy')
        self.motion_scale = torch.from_numpy(motion_scale).unsqueeze(
            0).unsqueeze(2).unsqueeze(3)
Exemplo n.º 2
0
    def __init__(self, phase, conf):
        super(RPN, self).__init__()

        self.base = resnet.ResNetDilate(conf.base_model)
        self.adaptive_diated = conf.adaptive_diated
        self.dropout_position = conf.dropout_position
        self.use_dropout = conf.use_dropout
        self.drop_channel = conf.drop_channel
        self.use_corner = conf.use_corner
        self.corner_in_3d = conf.corner_in_3d
        self.deformable = conf.deformable

        if conf.use_rcnn_pretrain:
            # print(self.base.state_dict().keys())
            if conf.base_model == 101:
                pretrained_model = torch.load(
                    'faster_rcnn_1_10_14657.pth')['model']
                rename_dict = {
                    'RCNN_top.0': 'layer4',
                    'RCNN_base.0': 'conv1',
                    'RCNN_base.1': 'bn1',
                    'RCNN_base.2': 'relu',
                    'RCNN_base.3': 'maxpool',
                    'RCNN_base.4': 'layer1',
                    'RCNN_base.5': 'layer2',
                    'RCNN_base.6': 'layer3'
                }
                change_dict = {}
                for item in pretrained_model.keys():
                    for rcnn_name in rename_dict.keys():
                        if rcnn_name in item:
                            change_dict[item] = item.replace(
                                rcnn_name, rename_dict[rcnn_name])
                            break
                pretrained_model = {
                    change_dict[k]: v
                    for k, v in pretrained_model.items() if k in change_dict
                }
                self.base.load_state_dict(pretrained_model)

            elif conf.base_model == 50:
                pretrained_model = torch.load(
                    'res50_faster_rcnn_iter_1190000.pth',
                    map_location=lambda storage, loc: storage)
                pretrained_model = {
                    k.replace('resnet.', ''): v
                    for k, v in pretrained_model.items() if 'resnet' in k
                }
                # print(pretrained_model.keys())
                self.base.load_state_dict(pretrained_model)

        self.depthnet = resnet.ResNetDilate(50)

        if self.adaptive_diated:
            self.adaptive_layers = nn.Sequential(
                nn.AdaptiveMaxPool2d(3),
                nn.Conv2d(512, 512 * 3, 3, padding=0),
            )
            self.adaptive_softmax = nn.Softmax(dim=3)

        if self.deformable:
            self.deform_layer = DeformConv2d(512,
                                             512,
                                             3,
                                             padding=1,
                                             bias=False,
                                             modulation=True)

        # settings
        self.phase = phase
        self.num_classes = len(conf['lbls']) + 1
        self.num_anchors = conf['anchors'].shape[0]

        self.prop_feats = nn.Sequential(
            nn.Conv2d(2048, 512, 3, padding=1),
            nn.ReLU(inplace=True),
        )
        if self.use_dropout:
            self.dropout = nn.Dropout(p=conf.dropout_rate)

        if self.drop_channel:
            self.dropout_channel = nn.Dropout2d(p=0.3)

        # outputs
        self.cls = nn.Conv2d(self.prop_feats[0].out_channels,
                             self.num_classes * self.num_anchors, 1)

        # bbox 2d
        self.bbox_x = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_y = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_w = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_h = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)

        # bbox 3d
        self.bbox_x3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_y3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_z3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_w3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_h3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_l3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_rY3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                   self.num_anchors, 1)

        if self.corner_in_3d:
            self.bbox_3d_corners = nn.Conv2d(self.prop_feats[0].out_channels,
                                             self.num_anchors * 18,
                                             1)  # 2 * 8 + 2
            self.bbox_vertices = nn.Conv2d(self.prop_feats[0].out_channels,
                                           self.num_anchors * 24, 1)  # 3 * 8
        elif self.use_corner:
            self.bbox_vertices = nn.Conv2d(self.prop_feats[0].out_channels,
                                           self.num_anchors * 24, 1)

        self.softmax = nn.Softmax(dim=1)

        self.feat_stride = conf.feat_stride
        self.feat_size = calc_output_size(np.array(conf.crop_size),
                                          self.feat_stride)
        self.rois = locate_anchors(conf.anchors,
                                   self.feat_size,
                                   conf.feat_stride,
                                   convert_tensor=True)
        self.rois = self.rois.type(torch.cuda.FloatTensor)
        self.anchors = conf.anchors
Exemplo n.º 3
0
    def __init__(self, phase, conf):
        super(RPN, self).__init__()

        self.base = resnet.ResNetDilate(
            conf.base_model
        )  # 去掉最后全连接层和池化层并修改layer4中部分网络的预训练好参数的ResNet-50,作为特征提取网络的主干
        self.adaptive_diated = conf.adaptive_diated  # True
        self.dropout_position = conf.dropout_position  # 'early'   # 'early'  'late' 'adaptive'
        self.use_dropout = conf.use_dropout  # True
        self.drop_channel = conf.drop_channel  # True
        self.use_corner = conf.use_corner  # False
        self.corner_in_3d = conf.corner_in_3d  # False
        self.deformable = conf.deformable  # False

        # 遮挡模块:
        self.occlusion = conf.occlusion if "occlusion" in conf else False
        self.threshold = conf.threshold if "threshold" in conf else 1

        if conf.use_rcnn_pretrain:  # False
            # print(self.base.state_dict().keys())
            if conf.base_model == 101:
                pretrained_model = torch.load(
                    'faster_rcnn_1_10_14657.pth')['model']
                rename_dict = {
                    'RCNN_top.0': 'layer4',
                    'RCNN_base.0': 'conv1',
                    'RCNN_base.1': 'bn1',
                    'RCNN_base.2': 'relu',
                    'RCNN_base.3': 'maxpool',
                    'RCNN_base.4': 'layer1',
                    'RCNN_base.5': 'layer2',
                    'RCNN_base.6': 'layer3'
                }
                change_dict = {}
                for item in pretrained_model.keys():
                    for rcnn_name in rename_dict.keys():
                        if rcnn_name in item:
                            change_dict[item] = item.replace(
                                rcnn_name, rename_dict[rcnn_name])
                            break
                pretrained_model = {
                    change_dict[k]: v
                    for k, v in pretrained_model.items() if k in change_dict
                }
                self.base.load_state_dict(pretrained_model)

            elif conf.base_model == 50:
                pretrained_model = torch.load(
                    'res50_faster_rcnn_iter_1190000.pth',
                    map_location=lambda storage, loc: storage)
                pretrained_model = {
                    k.replace('resnet.', ''): v
                    for k, v in pretrained_model.items() if 'resnet' in k
                }
                # print(pretrained_model.keys())
                self.base.load_state_dict(pretrained_model)

        self.depthnet = resnet.ResNetDilate(50)

        if self.adaptive_diated:  # True
            self.adaptive_softmax = nn.Softmax(dim=3)

            self.adaptive_layers = nn.Sequential(
                nn.AdaptiveMaxPool2d(
                    3
                ),  # 常见的池化参数为kernel_size,‘’图像‘’的输出尺寸另算(可变);AdaptiveMaxPool2d()参数为output_size,对于任何输入大小,其‘’图像‘’输出尺寸就是output_siz;
                nn.Conv2d(512, 512 * 3, 3, padding=0),
            )  # 用在layer2与layer3之间;    # size: ->(512,512*3,1,1)
            self.adaptive_bn = nn.BatchNorm2d(512)
            self.adaptive_relu = nn.ReLU(inplace=True)

            self.adaptive_layers1 = nn.Sequential(
                nn.AdaptiveMaxPool2d(3),
                nn.Conv2d(1024, 1024 * 3, 3, padding=0),
            )  # # 用在layer3与layer4之间;  # size: ->(1024,1024*3,1,1)
            self.adaptive_bn1 = nn.BatchNorm2d(1024)
            self.adaptive_relu1 = nn.ReLU(inplace=True)

        if self.deformable:  # False
            self.deform_layer = DeformConv2d(512,
                                             512,
                                             3,
                                             padding=1,
                                             bias=False,
                                             modulation=True)

        # settings
        self.phase = phase
        self.num_classes = len(conf['lbls']) + 1  # +1的理解:不属于3个中的任何一类?
        self.num_anchors = conf['anchors'].shape[0]

        self.prop_feats = nn.Sequential(
            nn.Conv2d(2048, 512, 3, padding=1),
            nn.ReLU(inplace=True),
        )  # [N,2018,H,W]->[N,512,H,W]
        if self.use_dropout:  # True
            self.dropout = nn.Dropout(p=conf.dropout_rate)

        if self.drop_channel:  # True
            self.dropout_channel = nn.Dropout2d(
                p=0.3)  # 通常输入为 nn.Conv2d modules.

        # outputs
        self.cls = nn.Conv2d(self.prop_feats[0].out_channels,
                             self.num_classes * self.num_anchors,
                             1)  # [N,512,H,W]->[N,36*4,H,W]

        # bbox 2d
        self.bbox_x = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors,
                                1)  # # [N,512,H,W]->[N,36,H,W]
        self.bbox_y = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_w = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)
        self.bbox_h = nn.Conv2d(self.prop_feats[0].out_channels,
                                self.num_anchors, 1)

        # bbox 3d
        self.bbox_x3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_y3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_z3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_w3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_h3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_l3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                  self.num_anchors, 1)
        self.bbox_rY3d = nn.Conv2d(self.prop_feats[0].out_channels,
                                   self.num_anchors, 1)

        # 遮挡模块:
        if self.occlusion:
            self.occ_correct = nn.Conv2d(self.prop_feats[0].out_channels, 7, 1)
            self.occ_correct = nn.Conv2d(self.prop_feats[0].out_channels,
                                         self.num_anchors, 1)
            self.avg_pool = nn.AdaptiveMaxPool2d((1, 1))
            self.fc1 = nn.Linear(self.occ_correct.out_channels, 133)
            self.fc2 = nn.Linear(133, 7)

        if self.corner_in_3d:  # False
            self.bbox_3d_corners = nn.Conv2d(self.prop_feats[0].out_channels,
                                             self.num_anchors * 18,
                                             1)  # 2 * 8 + 2
            self.bbox_vertices = nn.Conv2d(self.prop_feats[0].out_channels,
                                           self.num_anchors * 24, 1)  # 3 * 8
        elif self.use_corner:  # False
            self.bbox_vertices = nn.Conv2d(self.prop_feats[0].out_channels,
                                           self.num_anchors * 24, 1)

        self.softmax = nn.Softmax(dim=1)

        self.feat_stride = conf.feat_stride  # 16
        self.feat_size = calc_output_size(np.array(
            conf.crop_size), self.feat_stride)  # feat_size=[32,106];
        self.rois = locate_anchors(conf.anchors,
                                   self.feat_size,
                                   conf.feat_stride,
                                   convert_tensor=True)
        self.rois = self.rois.type(torch.cuda.FloatTensor)
        self.anchors = conf.anchors