Пример #1
0
    def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target,
                    rois_inside_ws, rois_outside_ws):

        # bounding box regression L1 loss
        RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                         rois_inside_ws, rois_outside_ws)

        # classification loss
        RCNN_loss_cls = F.cross_entropy(
            cls_score, rois_label)  # cls_score: [N, 2], rois_label: [N]

        return RCNN_loss_cls, RCNN_loss_bbox
Пример #2
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                            rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Пример #3
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)  # (batch_size/5L, rois_nums/128L, 5L)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # pooled_feat: (batch_size*rois_nums/640L, channels/1024L, pooled_height/7L, pooled_width/7L)
        # pooled_feat: (batch_size*rois_nums/640L, channels/2048L)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Пример #4
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # Bottom-up
        c1 = self.RCNN_layer0(im_data)
        c2 = self.RCNN_layer1(c1)
        c3 = self.RCNN_layer2(c2)
        c4 = self.RCNN_layer3(c3)
        c5 = self.RCNN_layer4(c4)
        c6 = self.RCNN_layer5(c5)

        # Top-down
        p6 = self.RCNN_toplayer(c6)
        p5 = self.RCNN_latlayer1(c5) + p6
        p4 = self.RCNN_latlayer2(c4) + p5
        p3 = self._upsample_add(p4, self.RCNN_latlayer3(c3))
        p3 = self.RCNN_smooth1(p3)
        p2 = self._upsample_add(p3, self.RCNN_latlayer4(c2))
        p2 = self.RCNN_smooth2(p2)

        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            rpn_feature_maps, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        # print('before pooling, cfg', cfg.POOLING_MODE)
        # print('before pooling, get_cfg', get_cfg().POOLING_MODE)
        # pooling features based on rois, output 14x14 map
        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois,
                                              im_info)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(roi_pool_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.long().view(rois_label.size(0), 1,
                                       1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        # cls_prob = F.softmax(cls_score)   ----------------not be used ---------------

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # loss (cross entropy) for object classification
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # loss (l1-norm) for bounding box regression
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        # cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1))   ----------------not be used ---------------
        bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        # 2nd-----------------------------
        # decode
        rois = bbox_decode(rois, bbox_pred, batch_size, self.class_agnostic,
                           self.n_classes, im_info, self.training)

        # proposal_target
        if self.training:
            roi_data = self.RCNN_proposal_target(rois,
                                                 gt_boxes,
                                                 num_boxes,
                                                 stage=2)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois,
                                              im_info)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail_2nd(roi_pool_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred_2nd(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.long().view(rois_label.size(0), 1,
                                       1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score_2nd(pooled_feat)
        # cls_prob_2nd = F.softmax(cls_score) ----------------not be used ---------------

        RCNN_loss_cls_2nd = 0
        RCNN_loss_bbox_2nd = 0

        if self.training:
            # loss (cross entropy) for object classification
            RCNN_loss_cls_2nd = F.cross_entropy(cls_score, rois_label)
            # loss (l1-norm) for bounding box regression
            RCNN_loss_bbox_2nd = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        # cls_prob_2nd = cls_prob_2nd.view(batch_size, -1, cls_prob_2nd.size(1))  ----------------not be used ---------
        bbox_pred_2nd = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        # 3rd---------------
        # decode
        rois = bbox_decode(rois, bbox_pred_2nd, batch_size,
                           self.class_agnostic, self.n_classes, im_info,
                           self.training)

        # proposal_target
        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois,
                                                 gt_boxes,
                                                 num_boxes,
                                                 stage=3)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:

            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois,
                                              im_info)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail_3rd(roi_pool_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred_3rd(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.long().view(rois_label.size(0), 1,
                                       1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score_3rd(pooled_feat)
        cls_prob_3rd = F.softmax(cls_score)

        RCNN_loss_cls_3rd = 0
        RCNN_loss_bbox_3rd = 0

        if self.training:
            # loss (cross entropy) for object classification
            RCNN_loss_cls_3rd = F.cross_entropy(cls_score, rois_label)
            # loss (l1-norm) for bounding box regression
            RCNN_loss_bbox_3rd = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        cls_prob_3rd = cls_prob_3rd.view(batch_size, -1, cls_prob_3rd.size(1))
        bbox_pred_3rd = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        if not self.training:
            # 3rd_avg
            # 1st_3rd
            pooled_feat_1st_3rd = self._head_to_tail(roi_pool_feat)
            cls_score_1st_3rd = self.RCNN_cls_score(pooled_feat_1st_3rd)
            cls_prob_1st_3rd = F.softmax(cls_score_1st_3rd)
            cls_prob_1st_3rd = cls_prob_1st_3rd.view(batch_size, -1,
                                                     cls_prob_1st_3rd.size(1))

            # 2nd_3rd
            pooled_feat_2nd_3rd = self._head_to_tail_2nd(roi_pool_feat)
            cls_score_2nd_3rd = self.RCNN_cls_score_2nd(pooled_feat_2nd_3rd)
            cls_prob_2nd_3rd = F.softmax(cls_score_2nd_3rd)
            cls_prob_2nd_3rd = cls_prob_2nd_3rd.view(batch_size, -1,
                                                     cls_prob_2nd_3rd.size(1))

            cls_prob_3rd_avg = (cls_prob_1st_3rd + cls_prob_2nd_3rd +
                                cls_prob_3rd) / 3
        else:
            cls_prob_3rd_avg = cls_prob_3rd

        return rois, cls_prob_3rd_avg, bbox_pred_3rd, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, RCNN_loss_cls_2nd, RCNN_loss_bbox_2nd, RCNN_loss_cls_3rd, RCNN_loss_bbox_3rd, rois_label
Пример #5
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        '''

        :param base_feat: shape=(b,1024,w,h)特征提取的输出
        :param im_info: shape=(b,3)  3=[W,H,2.2901]最后一个2.2901含义还不太清楚
        :param gt_boxes: shape=(b,20,5)应该不是每个图片都有20个目标,可能20取的是一个比较大的值,并不是20里面都有gt的数据
        :param num_boxes:shape=(b) b=[k,j,...]应该是代表这个batch里面第一张图片上有k个gt,第二张图片上有j个gt
        :return:
        '''
        #base_feat.shape=(b,1024,w,h)
        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat),
                           inplace=True)  #shape=(b,512,w,h)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(
            rpn_conv1)  #shape=(b,2*9,w,h)也即是每个点9个anchor的前背景概率预测

        rpn_cls_score_reshape = self.reshape(rpn_cls_score,
                                             2)  #shape=(b,2,9*w,h)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape,
                                         1)  #shape=(b,2,9*w,h)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape,
                                    self.nc_score_out)  #shape=(b,2*9,w,h)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)  #shape=(b,4*9,w,h)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        '''
        得到vgg的特征图之后,分别用两个卷积预测每一个anchor的分数(前背景。shape=(b,2*9,w,h)) 以及每一个anchor的回归值(shape=(b,4*9,w,h))
        '''

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))
        # rois=output.shape=(b,2000,5)  5:[当前box在那个图片上(0-batchsize),x1,y1,x2,y2]
        '''
        这里的rois产生的过程:
        1:rpn预测每一个anchor的分数以及回归值
        2:self.RPN_proposal模块先根据预测的回归值调整初始的anchor
            然后取出调整之后的anchor 分数前12000个box,将这12000个box计算nms,,得到保留的box 的索引
            然后根据nms得到的索引取出保留的k个box的坐标以及他们的前景分数
            在按照分数从k个box(nms之后还保留的box)取出分数前2000个框的坐标
        3:最后得到rpn 网络proposal 的结果(2000个roi)
        
        '''

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))
            #rpn_cls_score.shape=(b,2*9,w,h)也即是每个点9个anchor的前背景概率预测
            #gt_boxes: shape=(b,20,5)应该不是每个图片都有20个目标,可能20取的是一个比较大的值,并不是20里面都有gt的数据
            #im_info: shape=(b,3)  3=[W,H,2.2901]最后一个2.2901含义还不太清楚
            #num_boxes: shape=(b) b=[k,j,...]应该是代表这个batch里面第一张图片上有k个gt,第二张图片上有j个gt
            '''
            rpn_data = outputs=[   labels.shape=(b,1,9*h,w),   所有anchor的标签  1:正样本  0:负样本  -1:ignore
                                    bbox_targets.shape=(b,9*4,h,w),  所有anchor的目标回归值
                                    bbox_inside_weights.shape=(b,4*9, h,w), 所有anchor的回归inside权重
                                    bbox_outside_weights.shape=(b,4*9,h,w)所有anchor的回归outside权重
                                ]
                                
            self.RPN_anchor_target 函数为每一个anchor(9*w*h个anchor)(是为原始的anchor匹配的标签,而不是经过预测调整之后的achor)匹配到了标签,(1:正样本  0:负样本  -1:ignore),以及他们的回归target
                                同时已经处理了正负样本过多的问题(将一些标记为-1)
            
            '''

            # compute classification loss

            # rpn_cls_score_reshape.shape=(b,2,9*w,h)
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(
                    batch_size, -1,
                    2)  #shape=(b,9*w*h,2)也即是每个点9个anchor的前背景概率预测
            rpn_label = rpn_data[0].view(
                batch_size, -1)  #rpn_label.shape=(b,9*h*w)标记了每个anchor的标签

            rpn_keep = Variable(
                rpn_label.view(-1).ne(-1).nonzero().view(-1))  #shape=
            '''
            rpn_label=[[ 1.,  1.,  0.,  1., -1.,  0., -1.,  1.,  0.],
                        [ 0.,  0.,  0., -1.,  1.,  1., -1.,  0.,  0.]]
            rpn_keep = [ 0,  1,  2,  3,  5,  7,  8,  9, 10, 11, 13, 14, 16, 17] 也就是将rpn_label先拉平,然后将里面非-1的值的索引取出,也就是将正、负样本的索引取出
            '''

            #下面将正负样本的预测分数和标签找出来,计算分类损失(前景背景),只计算正负样本的分类损失
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)
            #rpn_cls_score.view(-1, 2).shape=(b*9*h*w,2)  假设rpn_keep.shape=(k)也就是正样本和负样本总数为k,里面存了正负样本的索引
            #rpn_cls_score.shape=(k) 将这个batch里面所有图片正负样本的预测分数找出来(正、负样本数量共k个)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            #rpn_label.shape=(k) 将这个batch里面所有的图片的正负样本的标签取出, k:[1,1,0,1,0,0...]
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            '''算出了rpn 的分类loss'''
            fg_cnt = torch.sum(
                rpn_label.data.ne(0))  #正样本的数量(统计rpn_label里面非0的元素的个数)

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]
            #rpn_bbox_targets = bbox_targets.shape = (b, 9 * 4, h, w), 所有anchor的目标回归值
            #rpn_bbox_inside_weights = bbox_inside_weights.shape = (b, 4 * 9, h, w), 所有anchor的回归inside权重  值为0或1,正样本为1,负样本为0,代表负样本不算回归损失
            #rpn_bbox_outside_weights = bbox_outside_weights.shape = (b, 4 * 9, h, w) 值为0或1/k  k是正、负样本的总数(一个batch里面的总数)
            # 所有anchor的回归outside权重

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            ##rpn_bbox_pred.shape=(b,4*9,w,h)预测的anchor的回归值
            #计算正负样本的回顾损失(值计算了回归损失,只计算正样本的回归损失,负样本和ignore的回归损失被置为0),计算每张图片的正样本损失(求和再除以正负样本总数),然后求每个图片的损失均值
            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])
            #self.rpn_loss_box=(2.36)  是一个值,代表一个batch里面各个图片上的回归损失求的平均

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Пример #6
0
    def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes):

        n_feat_maps = len(rpn_feature_maps)

        rpn_cls_scores = []
        rpn_cls_probs = []
        rpn_bbox_preds = []
        rpn_shapes = []

        for i in range(n_feat_maps):
            feat_map = rpn_feature_maps[i]
            batch_size = feat_map.size(0)

            # return feature map after convrelu layer
            rpn_conv1 = F.relu(self.RPN_Conv(feat_map), inplace=True)
            # get rpn classification score
            rpn_cls_score = self.RPN_cls_score(rpn_conv1)

            rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
            rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape)
            rpn_cls_prob = self.reshape(rpn_cls_prob_reshape,
                                        self.nc_score_out)

            # get rpn offsets to the anchor boxes
            rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

            rpn_shapes.append(
                [rpn_cls_score.size()[2],
                 rpn_cls_score.size()[3]])
            rpn_cls_scores.append(
                rpn_cls_score.permute(0, 2, 3,
                                      1).contiguous().view(batch_size, -1, 2))
            rpn_cls_probs.append(
                rpn_cls_prob.permute(0, 2, 3,
                                     1).contiguous().view(batch_size, -1, 2))
            rpn_bbox_preds.append(
                rpn_bbox_pred.permute(0, 2, 3,
                                      1).contiguous().view(batch_size, -1, 4))

        rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1)
        rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1)
        rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1)

        n_rpn_pred = rpn_cls_score_alls.size(1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data, im_info, cfg_key,
             rpn_shapes))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes,
                 rpn_shapes))

            # compute classification loss
            rpn_label = rpn_data[0].view(batch_size, -1)
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1, 2),
                                               0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_inside_weights.size(1), 4))
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_outside_weights.size(1), 4))
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3)

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Пример #7
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = self.RPN_Conv(base_feat)

        reg_loss = torch.FloatTensor([0.]).cuda()
        if self.reg_weight != 0.:
            reg_loss = (rpn_conv1 ** 2).mean() * self.reg_weight

        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # sample loc data
        bbox_deltas = rpn_bbox_pred.permute(0, 2, 3, 1).contiguous()
        ori_shape = bbox_deltas.shape
        bbox_deltas = bbox_deltas.view(batch_size, -1, 8)

        # sample loc data
        normal_dist = torch.randn(batch_size, bbox_deltas.size(1), 4).float().cuda()
        log_sigma_2 = bbox_deltas[:, :, :4]
        miu = bbox_deltas[:, :, 4:]
        sigma = torch.exp(log_sigma_2 / 2.)
        sample_loc_data = normal_dist * sigma * self.sample_sigma + miu
        rpn_bbox_pred = sample_loc_data.view(batch_size, ori_shape[1], ori_shape[2], ori_shape[3] // 2)
        rpn_bbox_pred = rpn_bbox_pred.permute(0, 3, 1, 2).contiguous()

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        # scores is a list of foreground_scores after nms
        rois, scores = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
                                          im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights, sigma=3, dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box, scores, reg_loss
Пример #8
0
    def forward(self, data):
        #batch_size = im_data.size(0)
        if self.pathway == 'two_pathway':
            chan = data[1].shape[2]
            img_h = data[1].shape[3]
            img_w = data[1].shape[4]

            im_info = (data[0][3].view(-1, 3)).to(device="cuda")
            gt_boxes = (data[0][1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                        self.classes + 4)).to(device="cuda")
            num_boxes = (data[0][2].view(-1)).to(device="cuda")

            im_data1 = (data[0][0].view(-1, chan, img_h,
                                        img_w)).to(device="cuda")
            batch_size = im_data1.shape[0]
            im_data2 = (data[1].view(-1, chan, img_h, img_w)).to(device="cuda")

            # feed image data to base model to obtain base feature map
            #slow TSM way
            base_feat1 = self.RCNN_base1(im_data1)
            #fast non TSM way
            base_feat2 = self.RCNN_base2(im_data2)

            #changes
            base_feat = self.fuselayer(base_feat1, base_feat2)

        else:
            chan = data[0].shape[2]
            height = data[0].shape[3]
            width = data[0].shape[4]

            im_info = (data[3].view(-1, 3)).to(device="cuda")
            gt_boxes = (data[1].view(-1, cfg.MAX_NUM_GT_BOXES,
                                     self.classes + 4)).to(device="cuda")
            num_boxes = (data[2].view(-1)).to(device="cuda")

            im_data = (data[0].view(-1, chan, height, width)).to(device="cuda")
            batch_size = im_data.shape[0]

            base_feat = self.RCNN_base1(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois,
                                                 gt_boxes,
                                                 num_boxes,
                                                 val=0)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == "align":
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "crop":  #TODO
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == "pool":
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            if self.loss_type == 'focal' or self.loss_type == 'sigmoid':
                # select the corresponding columns according to roi labels
                rois_label = Variable(rois_label.view(
                    -1, self.classes))  #.long()) #modified
                rois_target = Variable(
                    rois_target.view(-1, rois_target.size(2)))
                rois_inside_ws = Variable(
                    rois_inside_ws.view(-1, rois_inside_ws.size(2)))
                rois_outside_ws = Variable(
                    rois_outside_ws.view(-1, rois_outside_ws.size(2)))
                proposal_num = torch.nonzero(rois_label)[:, 0]
                class_num = torch.nonzero(rois_label)[:, 1]
                bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                                int(bbox_pred.size(1) / 4), 4)
                bbox_pred_select = bbox_pred_view.new(bbox_pred.size(0), 1,
                                                      4).zero_()
                for i in range(proposal_num.shape[0]):
                    dup = torch.nonzero(proposal_num == proposal_num[i])
                    if (dup.shape[0] > 1):
                        bbox_pred_select[proposal_num[i]] = bbox_pred_view[
                            proposal_num[i], class_num[dup], :].mean(0)

                    else:
                        bbox_pred_select[proposal_num[i]] = bbox_pred_view[
                            proposal_num[i], class_num[i], :]

                bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        if self.loss_type == "sigmoid":
            cls_prob = torch.sigmoid(cls_score)
        if self.loss_type == "softmax":
            cls_prob = torch.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.loss_type == "sigmoid":
                RCNN_loss_cls = F.binary_cross_entropy_with_logits(
                    cls_score, rois_label)

            elif self.loss_type == "softmax":
                rois_label = Variable(rois_label.view(-1, self.classes).long())
                rois_label_select = rois_label.new(rois_label.size(0)).zero_()
                proposal_num = torch.nonzero(rois_label)[:, 0]
                class_num = (torch.nonzero(rois_label)[:, 1])
                rois_label_select[proposal_num] = class_num

                rois_target = Variable(
                    rois_target.view(-1, rois_target.size(2)))
                rois_inside_ws = Variable(
                    rois_inside_ws.view(-1, rois_inside_ws.size(2)))
                rois_outside_ws = Variable(
                    rois_outside_ws.view(-1, rois_outside_ws.size(2)))

                bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                                int(bbox_pred.size(1) / 4), 4)
                bbox_pred_select = torch.gather(
                    bbox_pred_view, 1,
                    rois_label_select.view(rois_label_select.size(0), 1,
                                           1).expand(rois_label_select.size(0),
                                                     1, 4))
                bbox_pred = bbox_pred_select.squeeze(1)
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label_select)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \
                RCNN_loss_cls, RCNN_loss_bbox, rois_label
        else:
            return rois, cls_prob, bbox_pred
Пример #9
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        '''

        :param im_data: shape=(b,3,W,H)
        :param im_info:shape=(b,3)  3=[W,H,2.2901]最后一个2.2901含义还不太清楚
        :param gt_boxes:shape=(b,20,5)应该不是每个图片都有20个目标,可能20取的是一个比较大的值,并不是20里面都有gt的数据(固定的(b,20,5)维度) 前n为没张图片上的gt,后面20-n全为0
        :param num_boxes:shape=(b) b=[k,j,...]应该是代表这个batch里面第一张图片上有k个gt,第二张图片上有j个gt
        :return:
        '''
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        #base_feat.shape=(b,1024,w,h)  w和h是原图的16分之一
        '''到此得到了前面特征提取网络的结果'''

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
        '''
        #rois=output.shape=(b,2000,5)  5:[当前box在那个图片上(0-batchsize),x1,y1,x2,y2]
        #rpn_loss_cls:rpn网络的分类loss   只计算了正样本和负样本的分类loss(根据匹配的标签,先得到正样本和负样本的索引,然后从预测的分数中取出
        #        正样本和负样本的分类分数,然后在从匹配的标签中取出正样本和负样本的标签(1,0),然后两者之间算交叉熵loss)
        #rpn_loss_box=(2.36)  是一个值,代表一个batch里面各个图片上的回归损失求的平均

        
        到此为止,rpn的功能就结束了,产生2000个proposal ,以及算所有anchor的回归损失和分类损失
        '''



        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            #rois.shape=(b,2000,5) 5:[当前box在那个图片上(0-batchsize),x1,y1,x2,y2]
            #gt_boxes: shape =(b, 20, 5)应该不是每个图片都有20个目标,可能20取的是一个比较大的值,并不是20里面都有gt的数据(固定的(b, 20, 5)维度)
            #num_boxes:shape=(b) b=[k,j,...]应该是代表这个batch里面第一张图片上有k个gt,第二张图片上有j个gt





            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            '''
            rois.shape=(b, 128, 5) 记录了这128个roi的box(是从rpn预测出来的2020个proposal里面选出的128个box) 5:第一个数i代表该图片是该batch中的第i张图片
            rois_label=labels.shape=(b, 128) 记录了每张图片上128个样本(正、负)的类别target(就是与其iou最大的gt的类别,精确到哪一类,而不是前背景类)
            rois_target=bbox_targets.shape=(b,128,4) 存了正样本和负样本的回归target(负样本的回归目标在 self._get_bbox_regression_labels_pytorch函数中被设置成0了)
            rois_inside_ws=box_inside_weights.shape=(b,128,4) 4=[1,1,1,1]或者[0,0,0,0] 正样本的权重是[1,1,1,1] 负样本的权重是[0,0,0,0]
            rois_outside_ws=bbox_outside_weights.shape=(b,128,4) 4=[1,1,1,1]或者[0,0,0,0] 正样本的权重是[1,1,1,1] 负样本的权重是[0,0,0,0]
            
            '''

            rois_label = Variable(rois_label.view(-1).long()) #shape=(b*128) 保存了送入rcnn网络的每张图片128个roi的类别标签
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))#shape=(b*128,4)保存了送入rcnn网络的每张图片128个roi的回归target
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))#shape=(b*128,4) 保存了每张图片128个roi的内权重(见损失公式)正样本的权重[1,1,1,1] f负样本[0,0,0,0]
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))#shape=[b*128,4]保存了每张图片128个roi的外权重(见损失公式)正样本的权重[1,1,1,1] f负样本[0,0,0,0]
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        #train : rois.shape=(b, 128, 5) 记录了这128个roi的box(是从rpn预测出来的2020个proposal里面选出的128个box) 5:第一个数i代表该图片是该batch中的第i张图片
        #test : rois.shape=(b,2000,5)  5:[当前box在那个图片上(0-batchsize),x1,y1,x2,y2]


        # do roi pooling based on predicted rois
        '''
        这里从特征图上获得roi box的特征使用的方式是:align(虽然cfg.POOLING_MODE = ’crop‘,但是前面应该是哪里把它改成了align )
        '''
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            # base_feat.shape=(b,1024 ,w,h)  w和h是原图的16分之一    base_feat.size()[2:] = [w,h]


            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':#运行align
            #base_feat.shape=(b,1024 ,w,h)  w和h是原图的16分之一
            #train : rois.view(-1,5) .shape=(b*128,5)   test : rois.view(-1,5) .shape=(b*2000,5)
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            #train : pooled_feat.shape=(b*128,1024,7,7)  test : pooled_feat.shape=(b*2000,1024,7,7)
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        '''这里只看训练时候:train : pooled_feat.shape=(b*128,1024,7,7)'''
        pooled_feat = self._head_to_tail(pooled_feat)#这里父类(class faster rcnn)调用了子类(class VGG)的head函数
        ##pooled_feat.shape=(b*128,2048)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)#bbox_pred.shape=(b*128,21*4)
        #看了训练的时候的输出:self.class_agnostic=false 那么就是需要针对每个类别预测回归值
        if self.training and not self.class_agnostic: #self.class_agnostic=false
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)#bbox_pred_view.shape=(b*128,21,4)
            #rois_labels.shape=(b*128) 保存了送入rcnn网络的每张图片128个roi的类别标签
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            #bbox_pred_select.shape=(b*128,1,4) 从针对每一类预测的回归值(21,4)中取出该roi匹配的gt的类别(也就是该roi的类别target)的回归值
            bbox_pred = bbox_pred_select.squeeze(1)
            #bbox_pred.shape=(b*128,4)

        '''
        train: bbox_pred.shape=(b*128,4)
        test:  bbox_pred.shape=(b*2000, 21*4)
        '''

        '''到此位置得到的 128个roi经过rcnn预测的回归值(针对target类别的回归值,也就是说,如果这个roi的类别target是汽车,那么就从21*4中取出关于汽车这个类的预测回归值)'''


        # compute object classification probability

        ##pooled_feat.shape=(b*128,2048)
        cls_score = self.RCNN_cls_score(pooled_feat)#shape=(b*128,21) 预测属于每个类的分数
        cls_prob = F.softmax(cls_score, 1)#shape=(b*128,21) 得到属于每个类的概率(预测)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            ##bbox_pred.shape=(b*128,4) 预测的每个roi的的回归值(针对他们的类别target的回归值)
            ##rois_target.shape=(b*128,4)保存了送入rcnn网络的每张图片128个roi的回归target
            #rois_inside_ws.shape=(b*128,4) 保存了每张图片128个roi的内权重(见损失公式)正样本的权重[1,1,1,1] f负样本[0,0,0,0]
            #rois_outside_ws.shape=[b*128,4]保存了每张图片128个roi的外权重(见损失公式)正样本的权重[1,1,1,1] f负样本[0,0,0,0]


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)#shape(b,128,21)得到属于每个类的概率(预测) 如果是test:shape=(b,2000,21)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)#shape=(b,128,4)预测的每个roi的的回归值(针对他们的类别target的回归值) 如果是test:shape=(b,2000,4)
        #test: bbox_pred.shaep=(b,2000,21*4)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Пример #10
0
    def forward(self, base_feat, gt_twins
                ):  # [(1, 512, 96, 7, 7), (1, 20, 3)] gt_twins前两列代表起止帧,第三列代表标签

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv1(base_feat), inplace=True)  # tx(1)
        rpn_conv2 = F.relu(self.RPN_Conv2(rpn_conv1), inplace=True)  # tx(1)
        rpn_output_pool = self.RPN_output_pool(
            rpn_conv2
        )  # (1,512,96,1,1) 此处经历了第二个网络:Temporal Proposal Subnet(类似RPN)

        rpn_output_pool = F.relu(self.Conv_up(rpn_output_pool),
                                 inplace=True)  # tx(3)  (1,512,128,1,1)

        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(
            rpn_output_pool)  # (1,512,96,1,1)->(1,20,96,1,1) 二分类
        rpn_cls_score_reshape = self.reshape(rpn_cls_score,
                                             2)  # (1,20,96,1,1)->(1,2,960,1,1)
        rpn_cls_prob_reshape = F.softmax(
            rpn_cls_score_reshape,
            dim=1)  # 按行SoftMax,行和为1(可以理解成把像素归一化,且每行为一个batch) (1,2,960,1,1)
        rpn_cls_prob = self.reshape(
            rpn_cls_prob_reshape,
            self.nc_score_out)  #(1,2,960,1,1)->(1,20,96,1,1)

        # get rpn offsets to the anchor twins
        rpn_twin_pred = self.RPN_twin_pred(
            rpn_output_pool)  # (1,512,96,1,1)->(1,20,96,1,1) 回归(中心点、长度)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        # rois = self.RPN_proposal((rpn_cls_prob.data, rpn_twin_pred.data, cfg_key))
        if self.out_scores:  # False
            rois, rois_score = self.RPN_proposal(
                (rpn_cls_prob.data, rpn_twin_pred.data, cfg_key))
        else:  # (回归)
            rois = self.RPN_proposal(
                (rpn_cls_prob.data, rpn_twin_pred.data, cfg_key)
            )  #(1,2000,3)[其中(1,<960,3)<960的部分是前景,第一列全0存放未来的21类标签,后两列是可能的前景的起止帧;(960,2000)的部分全0,可能代表背景]
        self.rpn_loss_cls = 0
        self.rpn_loss_twin = 0
        self.rpn_loss_mask = 0
        self.rpn_label = None

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_twins is not None
            # rpn_data = [label_targets, twin_targets, twin_inside_weights, twin_outside_weights]
            # label_targets: (batch_size, 1, A * length, height, width)
            # twin_targets: (batch_size, A*2, length, height, width), the same as twin_inside_weights and twin_outside_weights
            # (二分类)
            #                                 (1,20,96,1,1)二分类,     (1, 20, 3) gt_twins前两列代表真实起止帧,第三列代表标签
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_twins)
            )  #rpn_data 0:(1, 1, 960, 1, 1) 1:(1, 20, 96, 1, 1) 2:(1,20,96,1,1) 3:(1,20,96,1,1) 2分类 多分类(好像是窗口回归) 窗口回归(内窗口) 窗口回归(外窗口)

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 4,
                1).contiguous().view(batch_size, -1,
                                     2)  # (1,960,2) 二分类,后两列可能是前景和背景概率分数

            self.rpn_label = rpn_data[0].view(
                batch_size,
                -1)  # (1, 960) 前876个数为处理后的labels(包含1,0,-1),后面几个数填充-1
            rpn_keep = Variable(
                self.rpn_label.view(-1).ne(-1).nonzero().view(
                    -1))  #(256)非0元素所在所有列(索引),即所有1和-1的索引(好像是1和0)(与前景有关)
            rpn_cls_score = torch.index_select(
                rpn_cls_score.view(-1, 2), 0, rpn_keep
            )  #(256,2)256个1和-1(好像是1和0)(前景和无关项(可能的前景))对应的前景和背景概率(以rpn_keep(256)的值为索引,取出256个rpn_cls_score的第0维(行))
            self.rpn_label = torch.index_select(
                self.rpn_label.view(-1), 0, rpn_keep.data
            )  #(256)256个1和-1(好像是1和0)(前景和无关项(可能的前景))对应的labels(相当于去除了背景0)
            self.rpn_label = Variable(
                self.rpn_label.long())  #(256)256个1和0对应的labels(相当于去除了-1)
            #          (256,2)对应帧的1和0对应的前背景概率(预测值),(256)对应帧的1和0(真实值)
            self.rpn_loss_cls = F.cross_entropy(
                rpn_cls_score, self.rpn_label
            )  #(二分类损失)交叉熵函数,因为函数公式中有个-log,当预测越接近1(预测正确),函数值越小,所以可看成损失,即损失(函数值)越小,预测得越正确
            fg_cnt = torch.sum(self.rpn_label.data.ne(0))
            # 真实窗口起止帧,   列表中前景部分为1(少量),    列表基本接近于0(背景)
            rpn_twin_targets, rpn_twin_inside_weights, rpn_twin_outside_weights = rpn_data[
                1:]  # (1, 20, 96, 1, 1)窗口回归

            # compute twin regression loss
            rpn_twin_inside_weights = Variable(rpn_twin_inside_weights)
            rpn_twin_outside_weights = Variable(rpn_twin_outside_weights)
            rpn_twin_targets = Variable(rpn_twin_targets)
            #                                    预测窗口起止帧,  真实窗口起止帧,     真实窗口 前景填充为1(少量), 真实窗口 全是背景(0)
            self.rpn_loss_twin = _smooth_l1_loss(
                rpn_twin_pred,
                rpn_twin_targets,
                rpn_twin_inside_weights,
                rpn_twin_outside_weights,
                sigma=3,
                dim=[1, 2, 3, 4])  #(窗口回归损失)有对应公式,同样损失值越小代表预测越准

        if self.out_scores:  # False
            return rois, rois_score, rpn_cls_prob, rpn_twin_pred, self.rpn_loss_cls, self.rpn_loss_twin, self.rpn_label, self.rpn_loss_mask
        else:
            # (1,2000,3),(1,20,96,1,1),(1,20,96,1,1), (1),               (1),                (256),          (1) =0
            return rois, rpn_cls_prob, rpn_twin_pred, self.rpn_loss_cls, self.rpn_loss_twin, self.rpn_label, self.rpn_loss_mask
Пример #11
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        # shape of base_feat: batch_size, channel ,h , w

        batch_size = base_feat.size(0)

        #进行一次卷积
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat),
                           inplace=True)  # shape : batch_size, 512 ,h , w

        rpn_cls_score = self.RPN_cls_score(
            rpn_conv1)  # shape : batch_size, 18(9*2) ,h , w

        rpn_cls_score_reshape = self.reshape(
            rpn_cls_score, 2)  # shape : batch_size, 2 ,h*9 , w
        rpn_cls_prob_reshape = F.softmax(
            rpn_cls_score_reshape,
            1)  # 在1维度上进行比较 # shape : batch_size, 2 ,h*9 , w
        rpn_cls_prob = self.reshape(
            rpn_cls_prob_reshape,
            self.nc_score_out)  # shape : batch_size, 9*2 ,h , w

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(
            rpn_conv1)  # shape : batch_size, 9*4 ,h , w

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key
             ))  # shape batch_size, post_nms_topN , 5 ; 0 of 5 is batch_idx

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Пример #12
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        # shape:  im_data [1,c,w,h]  im_info[1,3]   gt_boxes[1,20,5]  num_boxes[1]
        batch_size = im_data.size(0)
        # im_data 为原始图像blob[1,3,850,600]

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        # feed base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # 测试阶段rois格式为[1,300,5]维度为5,第一列全是0,
        # 并不表示roi的标签,仅仅是batch的index标识。gt_boxes的维度是(x,5),x是object的数量。
        # do roi pooling based on predicted rois
        # POOLING_MODE = align
        pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        #feed pooled feature to top model
        pooled_feat = self.head_to_tail(pooled_feat)

        # compute bbox offset ,roi池化后提取的roi特征计算边框预测值
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according ti roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

         # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)
        # 测试阶段cls_score为[300, 21], bbox_pred为[300, 84]
        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)  # 测试阶段[1, 300, 21]
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)  # 测试阶段[1, 300, 84]

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Пример #13
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        #get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)
        # 为了便于softmax分类,[1,18,36,57]到[1,2,36*9,57],宽度为9999的堆叠得到324宽度,57高度
        # 2深度,对所有宽高对应的2个值求softmax,再reshape回去,18对应9个anchor连续前景得分+连续背景得分
        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        # _ProposalLayer 传入预测的rpn_box得分和坐标修正值,图像im_info,
        # 在ProposalLayer中特征图每个位置生成anchor,对anchor进行修正,排序,裁剪等处理。
        # 只返回rois,并不包含任何loss,loss只在训练过程中使用,通过_AnchorTargetLayer计算
        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))
        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and bulid the rpn loss
        if self.training:
            assert gt_boxes is not None

            # _AnchorTargetLayer传入rpn_box得分预测,gt_box以及im_info,num_boxes等信息。_AnchorTargetLayer中同样在特征图每个位置生成anchor,
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss  [1, 2, 36*9, 57]->[1, 36*9, 57, 2]->[1, 36*9*57, 2]
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)  # [1, 17649]
            # torch.ne(input, other, out=Tensor) -> Tensor 如果tensor != other 为True,返回1
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(
                -1))  # 通过ne去掉-1,返回非0索引[17649],索引中包含所有正负样本的索引
            rpn_cls_score = torch.index_select(rpn_cls_score.view(
                -1, 2), 0, rpn_keep)  # 根据索引选出在[17649,2]的0维度选择score
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep)  # 同样根据索引选择label
            rpn_label = Variable(rpn_label.long())
            # 损失函数,rpn只负责预测anchor是前景还是背景,因此只有二分类
            # [b*9*w*h, 2] 和[b*9*w*h]  由于使用的交叉熵损失函数,里面会计算Softmax。
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_indide_weights, rpn_bbox_outside_weights = rpn_data[
                1:]  # 取anchor_target_layer的返回值

            # compute bbox regression loss
            rpn_bbox_indide_weights = Variable(rpn_bbox_indide_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_indide_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
Пример #14
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
Пример #15
0
    def forward(self, video_data, gt_twins):
        batch_size = video_data.size(0)
        # print(batch_size)

        gt_twins = gt_twins.data
        # prepare data
        video_data = self.prepare_data(video_data)  # 这个video_data有变化?  (1, 3, 768, 112, 112)
        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(video_data)  # 经过c3d的前五层得到特征图(得到512 x L/8 x H/16 x W/16大小的特征图) (1,512,96,7,7)

        # feed base feature map tp RPN to obtain rois
        # (1,2000,3),(1),           (1)                      (1,512,96,7,7), (1,20,3)gt_twins前两列代表起止帧,第三列代表标签
        rois, _, _, rpn_loss_cls, rpn_loss_twin, _, _ = self.RCNN_rpn(base_feat, gt_twins)  # 经过rpn网络得到rois
        # rois[其中(1,<960,3)<960的部分是前景,第1列全0存21类标签,后两列是前景的起止帧;(960,2000)的部分全0,可能代表背景]
        # if it is training phase, then use ground truth twins for refining
        if self.training:   # 走这条(暂时理解成对rois的一些限制)
            roi_data = self.RCNN_proposal_target(rois, gt_twins)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            # (1,128,3),(1,128),(1,128,2), (1,128,2),      (1,128,2)
            rois_label = Variable(rois_label.view(-1).long())   # (128)
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))   # (128,2)
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))  # (128,2)
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))   # (128,2)
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_twin = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'pool':  # True
            pooled_feat = self.RCNN_roi_temporal_pool(base_feat, rois.view(-1, 3))  # (128, 512, 4, 2, 2)
        if cfg.USE_ATTENTION:   # False
            pooled_feat = self.RCNN_attention(pooled_feat)
        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)   # 分类网络  (128,4096)

        # compute twin offset, twin_pred will be (128, 402)
        twin_pred = self.RCNN_twin_pred(pooled_feat)    # nn.Linear(4096, 2 * 21)  实际是(128,42)

        if self.training:   # 走这条
            # select the corresponding columns according to roi labels, twin_pred will be (128, 2)
            twin_pred_view = twin_pred.view(twin_pred.size(0), int(twin_pred.size(1) / 2), 2)
            twin_pred_select = torch.gather(twin_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 2))
            twin_pred = twin_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)    # nn.Linear(4096, 21)
        cls_prob = F.softmax(cls_score, dim=1)  # 多分类

        if DEBUG:   # False
            print("tdcnn.py--base_feat.shape {}".format(base_feat.shape))
            print("tdcnn.py--rois.shape {}".format(rois.shape))
            print("tdcnn.py--tdcnn_tail.shape {}".format(pooled_feat.shape))
            print("tdcnn.py--cls_score.shape {}".format(cls_score.shape))
            print("tdcnn.py--twin_pred.shape {}".format(twin_pred.shape))

        RCNN_loss_cls = 0
        RCNN_loss_twin = 0

        if self.training:   # 走这条
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)  # (多分类损失)

            # bounding box regression L1 loss
            RCNN_loss_twin = _smooth_l1_loss(twin_pred, rois_target, rois_inside_ws, rois_outside_ws)   # (回归损失)

            # RuntimeError caused by mGPUs and higher pytorch version: https://github.com/jwyang/faster-rcnn.pytorch/issues/226
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_twin = torch.unsqueeze(rpn_loss_twin, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_twin = torch.unsqueeze(RCNN_loss_twin, 0)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)  # 分类预测?
        twin_pred = twin_pred.view(batch_size, rois.size(1), -1)   # 回归预测?

        if self.training:   # 走这条
            return rois, cls_prob, twin_pred, rpn_loss_cls, rpn_loss_twin, RCNN_loss_cls, RCNN_loss_twin, rois_label
        else:
            return rois, cls_prob, twin_pred
Пример #16
0
    def _forward_train_joint(self, frame_1_box, frame_2, frame_2_box, num_box):
        """
        This function use the rcnn_base_mv branch and the rcnn_base_residual. The following data are all Variables.
        We are trying to predict the offset between the boxes in frame 1 and frame 2 (i.e. frame_1_box and
        frame_2_box). We crop the feature using PSRoIPooling based frame_1_box to predict the offsets.

        :param frame_1_box: 3D tensor, bs x num_box x 6, each row is (x1, y1, x2, y2, class_id, target_id)
        :param frame_2: 4D tensor, bs x 5 x h x w, the 0:2 channel is motion vector, 2:5 channel is the residual
        :param frame_2_box: 2D tensor, bs x num_box x 6, each row is (x1, y1, x2, y2, class_id, target_id)
        :param num_box: 1D tensor, [bs], the number of gt boxes in differernt frames. Noted that the boxes in
            two frames that in one pair are the same.

        """
        # we set a trace, if the weight of this layer has nan, then it will pause:
        # we use the fact that nan != nan is true
        feature_add_conv_weight = self.feature_add_conv.state_dict()['weight']
        if (feature_add_conv_weight != feature_add_conv_weight).sum() > 0:
            print('\n there is nan in the weight of one layer\n')
            pdb.set_trace()

        batch_size = frame_2.size()[0]

        # get the base features
        feat_mv = self.RCNN_base_mv(frame_2[:, 0:2, :, :].contiguous())
        feat_residual = self.RCNN_base_residual(frame_2[:, 2:5].contiguous())

        # concate the features
        base_feat = torch.cat((feat_mv, feat_residual), dim=1)
        base_feat = self.feature_add_conv(base_feat)
        base_feat_loc = self.RCNN_bbox_base(base_feat)

        # PSRoIPooling
        frame_1_box_tmp = frame_1_box.data.contiguous(
        )  # [bs, num_box, 6], each row is [x1, y1, x2, y2, class_id, target_id]
        frame_2_box_tmp = frame_2_box.data.contiguous()  # [bs, num_box, 6]

        # (1) generate rois
        rois_1 = frame_1_box_tmp.new(
            batch_size,
            frame_1_box_tmp.size()[1],
            5).zero_()  # each row is [batch_index, x1, y1, x2, y2]
        rois_1[:, :, 1:5] = frame_1_box_tmp[:, :, 0:4].clone()
        for bs_idx in range(batch_size):
            rois_1[bs_idx, :, 0] = bs_idx
        rois_1 = Variable(rois_1)

        # (2) pooling to get the offset
        pooled_feat_loc = self.RCNN_psroi_pool_loc(
            base_feat_loc,
            rois_1.view(-1, 5))  # [num_box, 4, pooled_size, pooled_size]
        bbox_pred = self.pooling(pooled_feat_loc)  # [num_box, 4, 1, 1]
        bbox_pred = bbox_pred.squeeze()  # [num_box, 4]

        bbox_pred = bbox_pred.view(batch_size, -1, 4)

        # compute the box regression target
        rois_1 = frame_1_box_tmp[:, :, 0:4].clone().contiguous()
        rois_2 = frame_2_box_tmp[:, :, 0:4].clone().contiguous()

        regression_targets = self._compute_bbox_targets(rois_1, rois_2)

        # compute the inside weights and outside weights
        num_box_1_tmp = num_box.data.int()
        # if (num_box_1_tmp == 0).sum() > 0:
        #     a = 1

        inside_weight = regression_targets.new(batch_size,
                                               regression_targets.size(1),
                                               4).zero_()
        outside_weight = regression_targets.new(batch_size,
                                                regression_targets.size(1),
                                                4).zero_()
        for bs_idx in range(batch_size):
            if num_box_1_tmp[bs_idx] > 0:
                inside_weight[bs_idx, 0:num_box_1_tmp[bs_idx], :] = 1
                outside_weight[
                    bs_idx,
                    0:num_box_1_tmp[bs_idx], :] = 1.0 / num_box_1_tmp[bs_idx]

        # get the loss
        regression_targets = Variable(regression_targets)
        inside_weight = Variable(inside_weight)
        outside_weight = Variable(outside_weight)
        loss_bbox = _smooth_l1_loss(bbox_pred,
                                    regression_targets,
                                    inside_weight,
                                    outside_weight,
                                    dim=[2, 1])

        # #
        # outside_weight = outside_weight.data
        # outside_weight[outside_weight > 0] = 1
        # outside_weight = Variable(outside_weight)
        # bbox_pred = bbox_pred.view(-1, 4)
        # regression_targets = regression_targets.view(-1, 4)
        # inside_weight = inside_weight.view(-1, 4)
        # outside_weight = outside_weight.view(-1, 4)
        # loss_bbox_1 = _smooth_l1_loss(bbox_pred, regression_targets, inside_weight, outside_weight)

        return bbox_pred, loss_bbox
Пример #17
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, fg_scores, rpn_reg_loss = \
            self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        rpn_prior_loss = torch.FloatTensor([0.]).cuda()

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            if self.rpn_prior_weight != 0.:
                for i in range(batch_size):
                    gt_num = num_boxes[i].detach().cpu().item()
                    score = fg_scores[i]
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)  # p * log(p)
                    rpn_prior_loss += (-1. * log_score.sum() / float(gt_num))

                rpn_prior_loss /= batch_size
                rpn_prior_loss *= self.rpn_prior_weight
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = torch.FloatTensor([0.]).cuda()
            rpn_loss_bbox = torch.FloatTensor([0.]).cuda()

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        head_reg_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_reg_weight != 0.:
            head_reg_loss = (pooled_feat**2).mean() * self.head_reg_weight

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # sample loc data
        normal_dist = torch.randn(bbox_pred.size(0), 4).float().cuda()
        log_sigma_2 = bbox_pred[:, :4]
        miu = bbox_pred[:, 4:]
        sigma = torch.exp(log_sigma_2 / 2.)
        sample_loc_data = normal_dist * sigma * self.sample_sigma + miu
        bbox_pred = sample_loc_data

        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = torch.FloatTensor([0.]).cuda()
        RCNN_loss_bbox = torch.FloatTensor([0.]).cuda()

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        head_prior_loss = torch.FloatTensor([0.]).cuda()
        if self.training and self.head_prior_weight != 0.:
            scores = cls_prob.data  # [batch, num_rois, classes]
            scores_gradient = cls_prob  # [batch, num_rois, classes]
            boxes = rois.data[:, :, 1:5]  # [batch, num_rois, 4]
            if cfg.TRAIN.BBOX_REG:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred.data  # [batch, num_rois, 4]
                if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
                    # Optionally normalize targets by a precomputed mean and stdev
                    if self.class_agnostic:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1, 4)
                    else:
                        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
                        box_deltas = box_deltas.view(batch_size, -1,
                                                     4 * len(self.classes))

                pred_boxes = bbox_transform_inv(boxes, box_deltas, batch_size)
                pred_boxes = clip_boxes(pred_boxes, im_info.data, batch_size)
            else:
                # Simply repeat the boxes, once for each class
                print("no use bbox head in IB")
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            pred_boxes /= im_info[:, 2].data[:, None,
                                             None]  # [batch, num_rois, 4]
            loss_count = 0.
            gt_classes = gt_boxes[:, :, -1].data  # [batch, num(0 pad to 20)]
            for i in range(batch_size):
                for j in range(1, len(self.classes)):  # skip background class
                    if not (gt_classes[i] == j).any():  # no such class in gt
                        continue
                    # there are gt for this class
                    inds = torch.nonzero(
                        scores[i, :, j] > self.nms_threshold).view(-1)
                    if inds.numel() == 0:
                        continue
                    cls_scores = scores[i, :, j][inds]  # [num]
                    cls_scores_gradient = scores_gradient[i, :, j][inds]
                    _, order = torch.sort(cls_scores, 0, True)
                    if self.class_agnostic:
                        cls_boxes = pred_boxes[i, inds, :]  # [num, 4]
                    else:
                        cls_boxes = pred_boxes[i, inds][:, j * 4:(j + 1) * 4]
                    cls_scores_gradient = cls_scores_gradient[order]
                    keep = nms(cls_boxes[order, :], cls_scores[order],
                               cfg.TEST.NMS)
                    score = cls_scores_gradient[keep.view(
                        -1).long()]  # [num_keep]
                    gt_num = (gt_classes[i] == j).sum().detach().cpu().item()
                    if score.size(0) <= gt_num:
                        continue
                    score_sum = score.sum().detach().cpu().item()
                    score = score / score_sum
                    log_score = score * torch.log(score + 1e-6)
                    head_prior_loss += (-1. * log_score.sum() / float(gt_num))
                    loss_count += 1.

            head_prior_loss /= loss_count
            head_prior_loss *= self.head_prior_weight

        return rois, cls_prob, bbox_pred, \
               rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \
               rpn_prior_loss, rpn_reg_loss, head_prior_loss, head_reg_loss