示例#1
0
    def detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws):
        # classification loss
        RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

        # bounding box regression L1 loss
        RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        return RCNN_loss_cls, RCNN_loss_bbox
示例#2
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
                                 im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
                                                            rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#3
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#4
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                # pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
                # add different sample methods
                pooled_feat = F.max_pool2d(pooled_feat, cfg.ALIGN_SAMPLE_NUM,
                                           cfg.ALIGN_SAMPLE_NUM)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#6
0
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                domain=None,
                l=0,
                loss_start=False):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes,
                                                 domain, self.transfer)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws, domain_label = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        #-----------------------transfer learning----------------------------#
        #print(domain)
        dom_loss = 0
        valid_index = torch.zeros(0)

        #base line: transfer == False
        if self.training and self.transfer:
            if self.grl:
                dom_input = ReverseLayerF.apply(pooled_feat, l)
            else:
                dom_input = pooled_feat

            dom_pred = self._domain_classify(dom_input)
            domain_label = Variable(domain_label.cpu().cuda().view(-1).long())

            ############Process Tranfer Loss Weight#########
            if loss_start:
                p_target = F.softmax(dom_pred * self.transfer_gamma)[:, 0]
                domain_label.data = domain_label.data.type(
                    torch.FloatTensor).cuda()
                l_target = domain_label

                self.weight = p_target**l_target

                #drop zero weight
                valid_index = torch.nonzero(self.weight.data).cuda()

                if len(valid_index.size()) == 0:
                    valid_index = torch.zeros(1,
                                              1).type(torch.LongTensor).cuda()

                valid_index = valid_index.squeeze(1)

            ###############################################

            ##############DOMAIN LOSS SELECTION##########

            else:
                ids = torch.LongTensor(1).cuda()

                # random select
                if self.transfer_select == 'RANDOM':
                    perm = torch.randperm(rois.size(1))
                    ids = perm[:rois.size(1) / 8].cuda()

                # select positive sample and predicted postive sample
                elif self.transfer_select == 'CONDITION':
                    ids = torch.range(0, rois.size(1) / 8 - 1)
                    ids = torch.Tensor.long(ids).cuda()

                # select all postive sample
                elif self.transfer_select == 'POSITIVE':
                    ids = torch.nonzero(rois_label.data)
                    ids = torch.squeeze(ids).cuda()

                # select all postive sample
                elif self.transfer_select == 'BALANCE':
                    ids_p = torch.nonzero(rois_label.data)
                    ids_p = torch.squeeze(ids_p).cuda()

                    ids_n = (rois_label.data == 0).nonzero()
                    ids_n = torch.squeeze(ids_n).cuda()
                    ids_n = ids_n[:ids_p.size(0)]

                    ids = torch.cat((ids_p, ids_n), 0).cuda()

                # select all sample
                if self.transfer_select == 'ALL':
                    dom_pred_loss = dom_pred
                    dom_label_loss = domain_label
                else:
                    dom_pred_loss = dom_pred[ids]
                    dom_label_loss = domain_label[ids]

                ##########DOMAIN LOSS SELECTION DONE##########

                dom_loss = F.cross_entropy(dom_pred_loss, dom_label_loss)

                dom_loss = dom_loss * (
                    self.transfer_weight.expand_as(dom_loss))
        #---------------------transfer learning done-------------------------#

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if self.transfer and loss_start:
                rois_label_loss = torch.eye(
                    self.n_classes)[rois_label.data.cpu()].type(
                        torch.FloatTensor)
                rois_label_loss = Variable(rois_label_loss.cuda())
                rois_label_loss = rois_label_loss[valid_index]

                weight_cls_loss = self.weight.view(rois_label.size(0),
                                                   1).repeat(
                                                       1, self.n_classes)
                weight_cls_loss = weight_cls_loss[valid_index]

                cls_score_loss = cls_score[valid_index]

                RCNN_loss_cls = F.binary_cross_entropy_with_logits(
                    cls_score_loss, rois_label_loss, weight_cls_loss)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws, True, True,
                                                 self.weight, valid_index)

            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                 rois_inside_ws,
                                                 rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, dom_loss
示例#7
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, tgt_im_data,
                tgt_im_info, tgt_gt_boxes, tgt_num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        tgt_im_info = tgt_im_info.data
        tgt_gt_boxes = tgt_gt_boxes.data
        tgt_num_boxes = tgt_num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)
        tgt_base_feat = self.RCNN_base(tgt_im_data)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:

            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox, rpn_cls_prob, rois_select = self.RCNN_rpn(
                base_feat, im_info, gt_boxes, num_boxes)
            tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox, tgt_rpn_cls_prob, tgt_rois_select = self.RCNN_rpn(
                tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes)

            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))

            tgt_roi_data = self.RCNN_proposal_target(tgt_rois, tgt_gt_boxes,
                                                     tgt_num_boxes)
            tgt_rois, tgt_rois_label, tgt_rois_target, tgt_rois_inside_ws, tgt_rois_outside_ws = tgt_roi_data

            tgt_rois_label = Variable(tgt_rois_label.view(-1).long())
            tgt_rois_target = Variable(
                tgt_rois_target.view(-1, tgt_rois_target.size(2)))
            tgt_rois_inside_ws = Variable(
                tgt_rois_inside_ws.view(-1, tgt_rois_inside_ws.size(2)))
            tgt_rois_outside_ws = Variable(
                tgt_rois_outside_ws.view(-1, tgt_rois_outside_ws.size(2)))
        else:

            # feed base feature map tp RPN to obtain rois
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                base_feat, im_info, gt_boxes, num_boxes)
            tgt_rois, tgt_rpn_loss_cls, tgt_rpn_loss_bbox = self.RCNN_rpn(
                tgt_base_feat, tgt_im_info, tgt_gt_boxes, tgt_num_boxes)

            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

            tgt_rois_label = None
            tgt_rois_target = None
            tgt_rois_inside_ws = None
            tgt_rois_outside_ws = None
            tgt_rpn_loss_cls = 0
            tgt_rpn_loss_bbox = 0

        rois = Variable(rois)
        tgt_rois = Variable(tgt_rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # for RCNN
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)

            tgt_grid_xy = _affine_grid_gen(tgt_rois.view(-1, 5),
                                           tgt_base_feat.size()[2:],
                                           self.grid_size)
            tgt_grid_yx = torch.stack(
                [tgt_grid_xy.data[:, :, :, 1], tgt_grid_xy.data[:, :, :, 0]],
                3).contiguous()
            tgt_pooled_feat = self.RCNN_roi_crop(
                tgt_base_feat,
                Variable(tgt_grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                tgt_pooled_feat = F.max_pool2d(tgt_pooled_feat, 2, 2)

            # for RPN adaptive loss
            if self.training:
                grid_xy_ = _affine_grid_gen(rois_select,
                                            base_feat.size()[2:],
                                            self.grid_size)
                grid_yx_ = torch.stack(
                    [grid_xy_.data[:, :, :, 1], grid_xy_.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat_ = self.RCNN_roi_crop(base_feat,
                                                  Variable(grid_yx_).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat_ = F.max_pool2d(pooled_feat_, 2, 2)

                tgt_grid_xy_ = _affine_grid_gen(tgt_rois_select,
                                                tgt_base_feat.size()[2:],
                                                self.grid_size)
                tgt_grid_yx_ = torch.stack([
                    tgt_grid_xy_.data[:, :, :, 1], tgt_grid_xy_.data[:, :, :,
                                                                     0]
                ], 3).contiguous()
                tgt_pooled_feat_ = self.RCNN_roi_crop(
                    tgt_base_feat,
                    Variable(tgt_grid_yx_).detach())
                if cfg.CROP_RESIZE_WITH_MAX_POOL:
                    tgt_pooled_feat_ = F.max_pool2d(tgt_pooled_feat_, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            # for RCNN
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            tgt_pooled_feat = self.RCNN_roi_align(tgt_base_feat,
                                                  tgt_rois.view(-1, 5))

            # for RPN adaptive loss
            if self.training:
                pooled_feat_ = self.RCNN_roi_align(base_feat, rois_select)
                tgt_pooled_feat_ = self.RCNN_roi_align(tgt_base_feat,
                                                       tgt_rois_select)
        elif cfg.POOLING_MODE == 'pool':
            # for RCNN
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))
            tgt_pooled_feat = self.RCNN_roi_pool(tgt_base_feat,
                                                 tgt_rois.view(-1, 5))

            # for RPN adaptive loss
            if self.training:
                pooled_feat_ = self.RCNN_roi_pool(base_feat, rois_select)
                tgt_pooled_feat_ = self.RCNN_roi_pool(tgt_base_feat,
                                                      tgt_rois_select)

        # get the adaptive feature for RPN
        if self.training:
            rpn_adapt_feat = self.rpn_adapt_feat(
                pooled_feat_.view(pooled_feat.size(0), -1))
            tgt_rpn_adapt_feat = self.rpn_adapt_feat(
                tgt_pooled_feat_.view(tgt_pooled_feat.size(0), -1))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        tgt_pooled_feat = self._head_to_tail(tgt_pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        tgt_bbox_pred = self.RCNN_bbox_pred(tgt_pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

            tgt_bbox_pred_view = tgt_bbox_pred.view(
                tgt_bbox_pred.size(0), int(tgt_bbox_pred.size(1) / 4), 4)
            tgt_bbox_pred_select = torch.gather(
                tgt_bbox_pred_view, 1,
                tgt_rois_label.view(tgt_rois_label.size(0), 1,
                                    1).expand(tgt_rois_label.size(0), 1, 4))
            tgt_bbox_pred = tgt_bbox_pred_select.squeeze(1)

        # compute object classification probability
        adapt_feat = self.RCNN_adapt_feat(pooled_feat)
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        tgt_adapt_feat = self.RCNN_adapt_feat(tgt_pooled_feat)
        tgt_cls_score = self.RCNN_cls_score(tgt_pooled_feat)
        tgt_cls_prob = F.softmax(tgt_cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        tgt_RCNN_loss_cls = 0
        tgt_RCNN_loss_bbox = 0
        RCNN_loss_intra = 0
        RCNN_loss_inter = 0
        RPN_loss_intra = 0
        RPN_loss_inter = 0
        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            tgt_RCNN_loss_cls = F.cross_entropy(tgt_cls_score, tgt_rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)
            tgt_RCNN_loss_bbox = _smooth_l1_loss(tgt_bbox_pred,
                                                 tgt_rois_target,
                                                 tgt_rois_inside_ws,
                                                 tgt_rois_outside_ws)

            # intra-class and inter-class adaptation loss
            # pull same classes and push away different classes of source and target domains
            if self.mode == 'adapt':
                RCNN_loss_intra, RCNN_loss_inter = self.adaptive_loss(
                    adapt_feat, cls_prob, tgt_adapt_feat, tgt_cls_prob,
                    batch_size)
            # use gcn to cluster the representation of every class
            elif self.mode == 'gcn_adapt':
                RCNN_loss_intra, RCNN_loss_inter = self.gcn_adaptive_loss(
                    adapt_feat, cls_prob, rois, tgt_adapt_feat, tgt_cls_prob,
                    tgt_rois, batch_size)

            # intra-class and inter-class losses for RPN
            # pull same classes and push away different classes of source and target domains
            if self.rpn_mode == 'adapt':
                RPN_loss_intra, RPN_loss_inter = self.adaptive_loss_rpn(
                    rpn_adapt_feat, rpn_cls_prob, tgt_rpn_adapt_feat,
                    tgt_rpn_cls_prob, batch_size)
            # use gcn to cluster the representation of every class
            elif self.rpn_mode == 'gcn_adapt':
                RPN_loss_intra, RPN_loss_inter = self.gcn_adaptive_loss(
                    rpn_adapt_feat, rpn_cls_prob, rois, tgt_rpn_adapt_feat,
                    tgt_rpn_cls_prob, tgt_rois, batch_size)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        tgt_cls_prob = tgt_cls_prob.view(batch_size, tgt_rois.size(1), -1)
        tgt_bbox_pred = tgt_bbox_pred.view(batch_size, tgt_rois.size(1), -1)

        if self.training:
            return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls.view(-1), tgt_rpn_loss_cls.view(-1), \
             rpn_loss_bbox.view(-1), tgt_rpn_loss_bbox.view(-1), RCNN_loss_cls.view(-1), tgt_RCNN_loss_cls.view(-1), RCNN_loss_bbox.view(-1), \
             tgt_RCNN_loss_bbox.view(-1), RCNN_loss_intra.view(-1), RCNN_loss_inter.view(-1), rois_label, tgt_rois_label, \
                   RPN_loss_intra.view(-1), RPN_loss_inter.view(-1)
        else:
            return rois, tgt_rois, cls_prob, tgt_cls_prob, bbox_pred, tgt_bbox_pred, rpn_loss_cls, tgt_rpn_loss_cls, rpn_loss_bbox, \
             tgt_rpn_loss_bbox, RCNN_loss_cls, tgt_RCNN_loss_cls, RCNN_loss_bbox, tgt_RCNN_loss_bbox, \
             RCNN_loss_intra, RCNN_loss_inter, rois_label, tgt_rois_label, RPN_loss_intra, RPN_loss_inter
示例#8
0
    def forward(self, im_data, query, im_info, gt_boxes, num_boxes, alpha):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        detect_feat = self.RCNN_base(im_data)
        query_feat = self.RCNN_base_sketch(query)

        if self.model_type == "match_net":
            rpn_feat, act_feat, act_aim, c_weight = self.match_net(
                detect_feat, query_feat)
        c_weight = None

        if self.model_type == "attention":
            act_feat, act_aim, attention_map = self.attention_net(
                detect_feat, query_feat)
            act_feat = torch.cat([act_feat, detect_feat], dim=1)
            act_feat = self.projection(act_feat)

        if self.model_type == "basic":
            act_feat = detect_feat
            act_aim = query_feat

        if self.model_type in ["basic", "attention"]:
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                act_feat, im_info, gt_boxes, num_boxes)

        if self.model_type == "match_net":
            rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
                rpn_feat, im_info, gt_boxes, num_boxes)
        attention_map = None

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            # if True:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            margin_loss = 0
            rpn_loss_bbox = 0
            score_label = None

        rois = Variable(rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(act_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(act_feat, rois.view(-1, 5))

        pooled_feat = self._head_to_tail(pooled_feat)
        query_feat = self._head_to_tail(act_aim)

        batch_size = query_feat.shape[0]

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        pooled_feat = pooled_feat.view(batch_size, rois.size(1), -1)
        query_feat = query_feat.unsqueeze(1).repeat(1, rois.size(1), 1)

        pooled_feat = torch.cat(
            (pooled_feat.expand_as(query_feat), query_feat),
            dim=2).view(-1, 4096)

        # compute object classification probability
        score = self.RCNN_cls_score(pooled_feat)

        score_prob = F.softmax(score, 1)[:, 1]

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            score_label = rois_label.view(batch_size, -1).float()
            gt_map = torch.abs(
                score_label.unsqueeze(1) - score_label.unsqueeze(-1))

            score_prob = score_prob.view(batch_size, -1)
            pr_map = torch.abs(
                score_prob.unsqueeze(1) - score_prob.unsqueeze(-1))
            target = -((gt_map - 1)**2) + gt_map

            RCNN_loss_cls = F.cross_entropy(score, rois_label)

            margin_loss = 3 * self.triplet_loss(pr_map, gt_map, target)

            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = score_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, margin_loss, RCNN_loss_bbox, rois_label, c_weight, attention_map
示例#9
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so onif self.training:

        # part_threshold = 0.25
        #
        # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls)
        overlaps = bbox_overlaps_batch(rois, rois)[0]

        N_node, _ = overlaps.shape

        node_list = [i for i in range(N_node)]

        for j in range(N_node):
            for k in range(N_node):
                if overlaps[j][k] != 0:
                    overlaps[j][k] = 1
                if k == j:
                    overlaps[j][k] = 0

        idx_subgraph, vertex_subgraph = subgraph_split(overlaps)

        # max_overlaps_rois2gt, roi2gt_assignment = torch.max(overlaps, 1)
        #
        # # second, calculate the overlaps among rois and rois_select,
        # # using threshold to select roi for each rois_select (node_part)
        #
        # rois_cls_tmp = rois[:, roi2gt_assignment[0], :]
        # rois_cls_num = np.argwhere(gt_boxes[:, :, 4].cpu().data.numpy()[0] != 0).shape[0]
        # rois_cls_tmp = rois_cls_tmp[:,:rois_cls_num, :]
        # rois_cls = rois_cls_tmp.new(rois_cls_tmp.size(0), rois_cls_tmp.size(1), 5).zero_()
        # rois_cls[:, :, :4] = rois_cls_tmp[:, :, 1:5]
        # rois_cls[:, :, 4] = rois_cls_tmp[:, :, 0]
        #
        # # rois_cls_idx_list is the idx related from rois_cls to rois
        # roi_cls_idx_list = roi2gt_assignment[0][:rois_cls_num]
        #
        # overlaps = bbox_overlaps_batch(rois, rois_cls)
        # max_overlaps_rois2cls, roi2cls_assignment = torch.max(overlaps, 2)
        #
        # roi_part_match_overlap = max_overlaps_rois2cls.cpu().data.numpy()
        # roi_part_match = roi2cls_assignment.cpu().data.numpy()
        #
        # # roi_part_idx_list is the idx related from rois_part to rois
        # roi_part_idx_list = []
        # roi_part_match_idx = np.unique(roi_part_match[0])
        # for roi_cls_idx in roi_part_match_idx:
        #     match_idx_tmp = np.transpose(np.argwhere(roi_part_match[0] == roi_cls_idx))[0]
        #     match_overlap_tmp = roi_part_match_overlap[0][match_idx_tmp]
        #     # use threshold to select rois_part
        #     match_idx_tmp_select = np.transpose(np.argwhere(match_overlap_tmp > part_threshold))[0]
        #     match_idx_tmp = match_idx_tmp[match_idx_tmp_select]
        #     roi_part_idx_list.append(torch.from_numpy(match_idx_tmp))

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # # update 20191027: build graph for rois based on index (default: batch_size = 1)
        # adj_jud = np.zeros((0))
        # adj_rois = torch.zeros(0).cuda().long()
        # for i in range(roi_cls_idx_list.shape[0]):
        #     adj_jud = np.concatenate((adj_jud, [1]))
        #     adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i+1]))
        #     try:
        #         adj_jud = np.concatenate((adj_jud, np.zeros((roi_part_idx_list[i].shape[0]))))
        #         adj_rois = torch.cat((adj_rois, roi_part_idx_list[i].cuda()))
        #     except IndexError:
        #         print ('IndexError happen, continue')
        #         continue
        #
        # node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0]
        #
        # adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # # link edges for node_cls to node_cls
        # for k in range(len(node_cls_idx)-1):
        #     idx_node_cls_1 = node_cls_idx[k]
        #     idx_node_cls_2 = node_cls_idx[k + 1]
        #     adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1
        #     adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1
        #
        # # link edges for node_cls to related node_part
        # for k in range(len(node_cls_idx)-1):
        #     idx_start = node_cls_idx[k]
        #     idx_end = node_cls_idx[k + 1]
        #     for s in range(idx_start, idx_end):
        #         for t in range(idx_start, idx_end):
        #             if s == t:
        #                 adj_matrix_bin[s, t] = 0
        #             else:
        #                 adj_matrix_bin[s, t] = 1

        # # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes
        # adj_matrix = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        #
        # for s in range(len(adj_jud)):
        #     for t in range(len(adj_jud)):
        #         if adj_matrix_bin[s, t] == 1:
        #             node_feat_s = pooled_feat[adj_rois[s], :]
        #             node_feat_t = pooled_feat[adj_rois[t], :]
        #             adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
        #         else:
        #             adj_matrix[s, t] = 0
        #
        # adj_matrix = torch.from_numpy(adj_matrix).float().cuda()
        #
        # pooled_feat[adj_rois, :] = F.relu(self.gcn1(pooled_feat[adj_rois, :], adj_matrix))
        # pooled_feat[adj_rois, :] = F.relu(self.gcn2(pooled_feat[adj_rois, :], adj_matrix))

        # adj_jud = np.zeros((N_node, N_node))
        adj_matrix = np.zeros((N_node, N_node))
        #
        # for k in range(idx_subgraph):
        #     idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0]
        #     for s in range(idx_k.shape[0]):
        #         for t in range(idx_k.shape[0]):
        #             if s == t:
        #                 adj_jud[s, t] = 0
        #             else:
        #                 adj_jud[s, t] = 1
        #
        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

        for s in range(N_node):
            for t in range(N_node):
                #if adj_jud[s,t] != 0:
                if s != t:
                    node_feat_s = pooled_feat[s, :]
                    node_feat_t = pooled_feat[t, :]
                    adj_matrix[s, t] = cos(node_feat_s, node_feat_t)

        adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

        pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
        pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#10
0
    def forward(self, im_data_list, im_info_list, gt_boxes_list, num_boxes_list,
                average_shot=None, mean_class_attentions=None):
        # return attentions for testing
        if average_shot:
            prn_data = im_data_list[0]  # len(metaclass)*4*224*224
            attentions = self.prn_network(prn_data)
            return attentions
        # extract attentions for training
        if self.meta_train and self.training:
            prn_data = im_data_list[0]  # len(metaclass)*4*224*224
            # feed prn data to prn_network
            attentions = self.prn_network(prn_data)
            prn_cls = im_info_list[0]  # len(metaclass)

        im_data = im_data_list[-1]
        im_info = im_info_list[-1]
        gt_boxes = gt_boxes_list[-1]
        num_boxes = num_boxes_list[-1]

        batch_size = im_data.size(0)
        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(self.rcnn_conv1(im_data))

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))  # (b*128)*1024*7*7
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)  # (b*128)*2048

        # meta training phase
        if self.meta_train:
            rcnn_loss_cls = []
            rcnn_loss_bbox = []

            # pooled feature maps need to operate channel-wise multiplication with
            # the corresponding class's attentions of every roi of image
            for b in range(batch_size):
                zero = Variable(torch.FloatTensor([0]).cuda())
                proposal_labels = rois_label[b * 128:(b + 1) * 128].data.cpu().numpy()[0]
                unique_labels = list(np.unique(proposal_labels))  # the unique rois labels of the input image

                for i in range(attentions.size(0)):  # attentions len(attentions)*2048
                    if prn_cls[i].numpy()[0] + 1 not in unique_labels:
                        rcnn_loss_cls.append(zero)
                        rcnn_loss_bbox.append(zero)
                        continue

                    roi_feat = pooled_feat[b * cfg.TRAIN.BATCH_SIZE:(b + 1) * cfg.TRAIN.BATCH_SIZE, :]  # 128*2048
                    cls_feat = attentions[i].view(1, -1, 1, 1)  # 1*2048*1*1

                    diff_feat = roi_feat - cls_feat.squeeze()
                    corr_feat = F.conv2d(roi_feat.unsqueeze(-1).unsqueeze(-1),
                                         cls_feat.permute(1, 0, 2, 3),
                                         groups=2048).squeeze()

                    # subtraction + correlation: [bs, 2048]
                    channel_wise_feat = torch.cat((self.corr_fc(corr_feat), self.diff_fc(diff_feat)), dim=1)

                    # combined with the roi feature: [bs, 2048 * 2]
                    channel_wise_feat = torch.cat((channel_wise_feat, roi_feat), dim=1)

                    # compute object bounding box regression
                    bbox_pred = self.RCNN_bbox_pred(channel_wise_feat)  # 128*4
                    if self.training and not self.class_agnostic:
                        # select the corresponding columns according to roi labels
                        bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
                        batch_rois_label = rois_label[b * cfg.TRAIN.BATCH_SIZE:(b + 1) * cfg.TRAIN.BATCH_SIZE]
                        bbox_pred_select = torch.gather(
                            bbox_pred_view, 1, batch_rois_label.view(
                                batch_rois_label.size(0), 1, 1).expand(batch_rois_label.size(0), 1, 4))
                        bbox_pred = bbox_pred_select.squeeze(1)

                    # compute object classification probability
                    cls_score = self.RCNN_cls_score(channel_wise_feat)

                    if self.training:
                        # classification loss
                        RCNN_loss_cls = F.cross_entropy(cls_score, rois_label[b * 128:(b + 1) * 128])
                        rcnn_loss_cls.append(RCNN_loss_cls)
                        # bounding box regression L1 loss
                        RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target[b * 128:(b + 1) * 128],
                                                         rois_inside_ws[b * 128:(b + 1) * 128],
                                                         rois_outside_ws[b * 128:(b + 1) * 128])

                        rcnn_loss_bbox.append(RCNN_loss_bbox)

            # meta attentions loss
            if self.meta_loss:
                attentions_score = self.Meta_cls_score(attentions)
                meta_loss = F.cross_entropy(attentions_score, Variable(torch.cat(prn_cls, dim=0).cuda()))
            else:
                meta_loss = 0

            return rois, rpn_loss_cls, rpn_loss_bbox, rcnn_loss_cls, rcnn_loss_bbox, rois_label, 0, 0, meta_loss

        # meta testing phase
        elif self.meta_test:
            cls_prob_list = []
            bbox_pred_list = []
            for i in range(len(mean_class_attentions)):
                mean_attentions = mean_class_attentions[i]

                cls_feat = mean_attentions.view(1, -1, 1, 1)  # 1*2048*1*1

                diff_feat = pooled_feat - cls_feat.squeeze()
                corr_feat = F.conv2d(pooled_feat.unsqueeze(-1).unsqueeze(-1),
                                     cls_feat.permute(1, 0, 2, 3),
                                     groups=2048).squeeze()

                # subtraction + correlation: [bs, 2048]
                channel_wise_feat = torch.cat((self.corr_fc(corr_feat), self.diff_fc(diff_feat)), dim=1)

                # combined with the roi feature: [bs, 2048 * 2]
                channel_wise_feat = torch.cat((channel_wise_feat, pooled_feat), dim=1)

                # compute bbox offset
                bbox_pred = self.RCNN_bbox_pred(channel_wise_feat)
                if self.training and not self.class_agnostic:
                    # select the corresponding columns according to roi labels
                    bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
                    bbox_pred_select = torch.gather(
                        bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
                    bbox_pred = bbox_pred_select.squeeze(1)

                # compute object classification probability
                cls_score = self.RCNN_cls_score(channel_wise_feat)
                cls_prob = F.softmax(cls_score)

                RCNN_loss_cls = 0
                RCNN_loss_bbox = 0

                cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
                bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
                cls_prob_list.append(cls_prob)
                bbox_pred_list.append(bbox_pred)

            return rois, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, cls_prob_list, bbox_pred_list, 0

        # original faster-rcnn implementation
        else:
            bbox_pred = self.RCNN_bbox_pred(pooled_feat)
            if self.training and not self.class_agnostic:
                # select the corresponding columns according to roi labels
                bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
                bbox_pred_select = torch.gather(
                    bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
                bbox_pred = bbox_pred_select.squeeze(1)

            # compute object classification probability
            cls_score = self.RCNN_cls_score(pooled_feat)  # 128 * 1001
            cls_prob = F.softmax(cls_score)

            RCNN_loss_cls = 0
            RCNN_loss_bbox = 0

            if self.training:
                # classification loss
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

                # bounding box regression L1 loss
                RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

            cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
            bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, cls_prob, bbox_pred, 0
示例#11
0
    def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes):

        n_feat_maps = len(rpn_feature_maps)

        rpn_cls_scores = []
        rpn_cls_probs = []
        rpn_bbox_preds = []
        rpn_shapes = []

        for i in range(n_feat_maps):
            feat_map = rpn_feature_maps[i]
            batch_size = feat_map.size(0)

            # return feature map after convrelu layer
            rpn_conv1 = F.relu(self.RPN_Conv(feat_map), inplace=True)
            # get rpn classification score
            rpn_cls_score = self.RPN_cls_score(rpn_conv1)

            rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
            rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, dim=1)
            rpn_cls_prob = self.reshape(rpn_cls_prob_reshape,
                                        self.nc_score_out)

            # get rpn offsets to the anchor boxes
            rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

            rpn_shapes.append(
                [rpn_cls_score.size()[2],
                 rpn_cls_score.size()[3]])
            rpn_cls_scores.append(
                rpn_cls_score.permute(0, 2, 3,
                                      1).contiguous().view(batch_size, -1, 2))
            rpn_cls_probs.append(
                rpn_cls_prob.permute(0, 2, 3,
                                     1).contiguous().view(batch_size, -1, 2))
            rpn_bbox_preds.append(
                rpn_bbox_pred.permute(0, 2, 3,
                                      1).contiguous().view(batch_size, -1, 4))

        rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1)
        rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1)
        rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1)

        n_rpn_pred = rpn_cls_score_alls.size(1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data, im_info, cfg_key,
             rpn_shapes))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes,
                 rpn_shapes))

            # compute classification loss
            rpn_label = rpn_data[0].view(batch_size, -1)
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1, 2),
                                               0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_inside_weights.size(1), 4))
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_outside_weights.size(1), 4))
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3)

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#12
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes, dl_data):
        # batch_size = im_data.size(0)
        batch_size, c, h, w = im_data.size()

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        start_tic = time.time()
        # Start add by Jie, use mobilenetV2 as the backbone network for feature extraction.
        if self.dlb:
            # padding

            if h % 32 != 0:  # 720*1280 -->736*1280
                m, n = divmod(h, 32)
                ph = int(((m+1)*32-h)/2)
                im_data = F.pad(im_data, (0, 0, ph, ph), "constant", 0)
            if w % 32 != 0:
                m, n = divmod(w, 32)
                pw = int(((m+1)*32-w)/2)
                im_data = F.pad(im_data, (pw, pw, 0, 0), "constant", 0)  # (padLeft, padRight, padTop, padBottom)
            # print('im_data', im_data.size())

            low_level_features = self.RCNN_low_base(im_data) #1/4
            # print('low_level_features', low_level_features.size())

            mid_level_features = self.RCNN_mid_base(low_level_features) #1/8

            base_feat = self.RCNN_base(mid_level_features) #1/16
            # print('base_feat', base_feat.size())

            base_toc = time.time()

            # ----- Do segmentation
            seg_feat = self.RCNN_top(base_feat)
            # print('seg_feat', seg_feat.size())

            # the previous implementation
            # drive_line = self.SegDecoder(seg_feat, low_level_features)

            # print('drive_line', drive_line.size())
            # TODO here we need to pass all the feature into the decoder
            drive_line = self.SegDecoder(low_level_features,
                                         mid_level_features,
                                         base_feat)

            # print("drive line size", drive_line.size())
            if h % 32 != 0:
                drive_line = drive_line[:, :, ph:h+ph, :]
            if w % 32 != 0:
                drive_line = drive_line[:, :, :, pw:h+pw]

            drive_toc = time.time()

        # End add
        else:
            low_level_features = self.RCNN_low_base(im_data)

            # feed image data to base model to obtain base feature map
            base_feat = self.RCNN_base(low_level_features)

            # print('base_feat.size()', base_feat.size())
            # print('drive_line = 0')
            # drive_line = 0

        # ------ No Detection
        """
        rois_label = None
        rois_target = None
        rois_inside_ws = None
        rois_outside_ws = None
        rpn_loss_cls = 0
        rpn_loss_bbox = 0
        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        drive_line_loss = 0
        rois = 0
        cls_prob = 0
        bbox_pred = 0
        """
        # ------ End: No Detection

        # ------ With Detection
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        det_toc = time.time()
        # print('base_time {:.3f}s  driveline {:.3f}s   detection {:.3f}s\r' \
        #                  .format( base_toc - start_tic, drive_toc - base_toc, det_toc - drive_toc))

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)


        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        drive_line_loss = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # Add by Jie, TODO: add resample
            # print('Calc drive line segmentation loss')
            # print('faster rcnn: forward, drive_line.shape, dl_data.shape', drive_line.shape, dl_data.shape)
            neg_rate = 5
            resample = True if neg_rate < 100 else False
            # if resample:  # TODO, use torch instead of numpy
            #     target = dl_data
            #     bs, h, w = target.shape
            #     y_true = target.reshape(-1)
            #     y_true_0_dix = torch.where(y_true == 0)  # ???
            #     num_neg = torch.sum(y_true == 0)
            #     num_pos = torch.sum(y_true == 1)
            #     num_ign = min(max(int(num_neg - neg_rate * num_pos), 0), int((num_neg + num_pos) * 0.95))
            #     inds = torch.multinomial(y_true_0_dix[0], num_ign, replacement=False, out=None)
            #     # inds = np.random.choice(y_true_0_dix[0], num_ign, replace=False)
            #     y_true[inds] = 255  # ignore
            #     y_true = y_true.reshape(bs, h, w)
            #     y_true = torch.from_numpy(y_true).long().cuda()
            # else:
            #     y_true = dl_data

            if resample:
                target = dl_data.cpu().numpy()
                # print('target.shape', target.shape, np.amax(target))
                bs, h, w = target.shape
                y_true = target.reshape(-1)
                y_true_0_dix = np.where(y_true == 0)
                # ---
                num_neg = np.sum(np.array(y_true == 0))
                num_pos = np.sum(np.array(y_true == 1))
                # count = np.bincount(y_true)
                # num_neg = count[0]
                # num_pos = count[1]  # when only have neg sample, count[1] outof index
                # ---
                num_ign = min(max(int(num_neg - neg_rate * num_pos), 0), int((num_neg + num_pos) * 0.95))
                inds = np.random.choice(y_true_0_dix[0], num_ign, replace=False)

                y_true[inds] = 255  # ignore
                y_true = y_true.reshape(bs, h, w)
                y_true = torch.from_numpy(y_true).long().cuda()
            else:
                y_true = dl_data

            drive_line_loss = F.cross_entropy(drive_line, dl_data)

            # drive_line_loss = F.cross_entropy(drive_line, y_true, ignore_index=255)  # TODO: Use Segmentation

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:  # for python 2.7
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)
            drive_line_loss = torch.unsqueeze(drive_line_loss, 0)

        # Drive Line Segmentation
        # print('torch.max(drive_line)', torch.max(drive_line), drive_line.size())
        # ------ END: With Detection

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, drive_line, drive_line_loss
示例#13
0
文件: meta.py 项目: Tung-I/FRCNN
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                support_ims,
                all_cls_gt_boxes=None):
        if self.training:
            self.num_of_rois = cfg.TRAIN.BATCH_SIZE
        else:
            self.num_of_rois = cfg.TEST.RPN_POST_NMS_TOP_N
        batch_size = im_data.size(0)
        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        all_cls_gt_boxes = all_cls_gt_boxes.data

        # feature extraction
        base_feat = self.RCNN_base(im_data)
        if self.training:
            support_ims = support_ims.view(-1, support_ims.size(2),
                                           support_ims.size(3),
                                           support_ims.size(4))
            support_feats = self.prn_network(support_ims)
            support_feats = support_feats.view(-1, self.n_way * self.n_shot,
                                               support_feats.size(1))
            pos_support_feat = support_feats[:, :self.n_shot, :].mean(1)
            neg_support_feat = support_feats[:, self.n_shot:self.n_way *
                                             self.n_shot, :].mean(1)
        else:
            support_ims = support_ims.view(-1, support_ims.size(2),
                                           support_ims.size(3),
                                           support_ims.size(4))
            support_feats = self.prn_network(support_ims)
            support_feats = support_feats.view(-1, self.n_shot,
                                               support_feats.size(1))
            pos_support_feat = support_feats[:, :self.n_shot, :].mean(1)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, all_cls_gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            ## rois [B, rois_per_image(128), 5]
            ### 5 is [batch_num, x1, y1, x2, y2]
            ## rois_label [B, 128]
            ## rois_target [B, 128, 4]
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
        rois = Variable(rois)

        # do roi pooling based on predicted rois, pooled_feat = [B*128, 1024, 7, 7]
        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)  # [B*128, 2048]

        # rcnn head
        if self.training:
            bbox_pred, cls_prob, cls_score_all = self.rcnn_head(
                pooled_feat, pos_support_feat)
            _, neg_cls_prob, neg_cls_score_all = self.rcnn_head(
                pooled_feat, neg_support_feat)
            cls_prob = torch.cat([cls_prob, neg_cls_prob], dim=0)
            cls_score_all = torch.cat([cls_score_all, neg_cls_score_all],
                                      dim=0)
            neg_rois_label = torch.zeros_like(rois_label)
            rois_label = torch.cat([rois_label, neg_rois_label], dim=0)
        else:
            bbox_pred, cls_prob, cls_score_all = self.rcnn_head(
                pooled_feat, pos_support_feat)
        # losses
        if self.training:
            ## bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)
            ## classification loss, 2-way, 1:2:1
            fg_inds = (rois_label == 1).nonzero().squeeze(-1)
            bg_inds = (rois_label == 0).nonzero().squeeze(-1)
            cls_score_softmax = torch.nn.functional.softmax(cls_score_all,
                                                            dim=1)
            bg_cls_score_softmax = cls_score_softmax[bg_inds, :]
            bg_num_0 = max(
                1, min(fg_inds.shape[0] * 2, int(rois_label.shape[0] * 0.25)))
            bg_num_1 = max(1, min(fg_inds.shape[0], bg_num_0))
            _sorted, sorted_bg_inds = torch.sort(bg_cls_score_softmax[:, 1],
                                                 descending=True)
            real_bg_inds = bg_inds[sorted_bg_inds]  # sort the real_bg_inds
            real_bg_topk_inds_0 = real_bg_inds[real_bg_inds < int(
                rois_label.shape[0] * 0.5)][:bg_num_0]  # pos support
            real_bg_topk_inds_1 = real_bg_inds[real_bg_inds >= int(
                rois_label.shape[0] * 0.5)][:bg_num_1]  # neg_support
            topk_inds = torch.cat(
                [fg_inds, real_bg_topk_inds_0, real_bg_topk_inds_1], dim=0)
            RCNN_loss_cls = F.cross_entropy(cls_score_all[topk_inds],
                                            rois_label[topk_inds])
        else:
            RCNN_loss_cls = 0
            RCNN_loss_bbox = 0

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#14
0
    def forward(self,
                im_data,
                im_info,
                meta_data,
                gt_boxes,
                num_boxes,
                run_partial=False):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        meta_data = meta_data.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        avg_feat = self.spatial_pool(
            base_feat,
            [base_feat.size()[2], base_feat.size()[3]], 14)

        weather_label = Variable(meta_data[:, 0].view(-1).long())
        altitude_label = Variable(meta_data[:, 1].view(-1).long())
        angle_label = Variable(meta_data[:, 2].view(-1).long())
        softmax = nn.Softmax(dim=1)

        altitude_score = self.RCNN_altitude_score(
            self.RCNN_altitude(avg_feat).mean(-1).mean(-1))
        RCNN_loss_altitude = F.cross_entropy(altitude_score, altitude_label)
        # RCNN_loss_altitude_adv = torch.mean(torch.sum(- altitude_score.new_full(altitude_score.size(), 1 / 3.0) * torch.log(torch.clamp(softmax(altitude_score), min=1e-10, max=1.0)), 1))
        RCNN_loss_altitude_adv = torch.mean(
            torch.sum(
                softmax(altitude_score) * torch.log(
                    torch.clamp(softmax(altitude_score), min=1e-10, max=1.0)),
                1))
        correct = altitude_score.max(1)[1].type_as(altitude_label).eq(
            altitude_label)
        correct = correct.sum().type(torch.FloatTensor).cuda()
        RCNN_acc_altitude = correct / altitude_label.size(0)

        if run_partial:
            if self.training:
                RCNN_loss_altitude = torch.unsqueeze(RCNN_loss_altitude, 0)
                RCNN_loss_altitude_adv = torch.unsqueeze(
                    RCNN_loss_altitude_adv, 0)
                RCNN_acc_altitude = torch.unsqueeze(RCNN_acc_altitude, 0)
            return RCNN_loss_altitude, RCNN_loss_altitude_adv, RCNN_acc_altitude
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)
            RCNN_loss_altitude = torch.unsqueeze(RCNN_loss_altitude, 0)
            RCNN_loss_altitude_adv = torch.unsqueeze(RCNN_loss_altitude_adv, 0)
            RCNN_acc_altitude = torch.unsqueeze(RCNN_acc_altitude, 0)

            return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, \
                   RCNN_loss_altitude, RCNN_loss_altitude_adv, RCNN_acc_altitude, \
                   rois_label
        return rois, cls_prob, bbox_pred, RCNN_acc_altitude
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            # rois_label is sub classlable

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # return roi_data
        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(
                rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(
                base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pspool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        # (256,2048,7,7)

        # nongt_dim = cfg.TRAIN.RPN_POST_NMS_TOP_N if self.training else cfg.TEST.RPN_POST_NMS_TOP_N
        nongt_dim = 300 if self.training else cfg.TEST.RPN_POST_NMS_TOP_N

        position_matrix = self.extract_position_matrix(
            rois.view(-1, 5)[:, :4].clone(), nongt_dim=nongt_dim)
        position_embedding = self.extract_position_embedding(
            position_matrix, feat_dim=64)

        pooled_feat = self.fc1(pooled_feat)
        attention_feat_1 = self.attention_1(pooled_feat, position_embedding)
        pooled_feat = pooled_feat + attention_feat_1
        pooled_feat = self.fc2(pooled_feat)
        attention_feat_2 = self.attention_2(pooled_feat, position_embedding)
        pooled_feat = pooled_feat + attention_feat_2

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(
                bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(
                rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(
                bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#16
0
    def forward(self,
                base_feat,
                im_info,
                gt_boxes,
                num_boxes,
                crowdsourced_classes=None,
                alpha_con=None):
        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        # rpn_conv1 torch.Size([1, 512, 50, 37])
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
        # get rpn classification score
        # rpn_cls_score torch.Size([1, 18, 50, 37])
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        # rpn_cls_prob_reshape torch.Size([1, 2, 450, 37])
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        # rpn_cls_prob torch.Size([1, 18, 50, 37])
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes
        # rpn_bbox_pred torch.Size([1, 36, 50, 37])
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            # rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
            # rpn_label = rpn_data[0].view(batch_size, -1)
            # rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            # rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
            # rpn_label = Variable(rpn_label.long())
            # print('before rpn_label: ', rpn_label);

            # Aggregation Layer
            if self.label_source == 2:
                gt_boxes = self.RPN_aggregation(
                    (rpn_cls_prob.data, gt_boxes, num_boxes, im_info,
                     crowdsourced_classes, alpha_con))

            # 生成anchor标签(rpn_cls_score 仅提供size 参考)
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            # 不计算label为-1的分类交叉熵 loss
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
                                               rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            if DEBUG:
                print('after rpn_label: ', rpn_label)

            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)

            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            # torch.Size([1, 36, 50, 37])
            rpn_bbox_targets = Variable(rpn_bbox_targets)
            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#17
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, num_proposal = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so on

        iou_threshold = 0.7
        dis_threshold = 0.01
        # part_size = 10
        # relation_size = 5
        iou_size = 6
        edge_size = 4
        child_size = 4
        batch = 0
        if True:
            if not self.training:
                rois = rois[:, :num_proposal, :]
                pooled_feat = pooled_feat[:num_proposal, :]

            # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1
            overlaps = bbox_overlaps_batch(rois, rois)
            # overlaps_bin = overlaps.cpu().data.numpy().copy()

            _, N_node, _ = overlaps.shape
            # second, calculate the distance among rois, set weights in edges between nodes iou=0 and
            distances = bbox_distances_batch(rois, rois)
            # update 20191115: build graph for rois based on index (default: batch_size = 1)
            # feature cosine similarity

            # similarity in PGCN
            dot_product_mat = torch.mm(pooled_feat,
                                       torch.transpose(pooled_feat, 0, 1))
            len_vec = torch.unsqueeze(torch.sqrt(
                torch.sum(pooled_feat * pooled_feat, dim=1)),
                                      dim=0)
            len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec)
            pooled_feat_sim_mat = dot_product_mat / len_mat

            # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

            # calculate the adj_mat based on iou and distance, the weights on edges are the cosine similarity between nodes
            mask = torch.eye(N_node, N_node).cuda()
            for s in range(N_node):

                overlap_node_index = (overlaps[batch][s] >=
                                      iou_threshold).nonzero()
                overlap_node_size = iou_size if overlap_node_index.shape[
                    0] > iou_size else overlap_node_index.shape[0]
                overlap_node_random = torch.randperm(
                    overlap_node_index.shape[0])[0:overlap_node_size]
                overlap_node_index_select = overlap_node_index[
                    overlap_node_random]

                # TODO(junjie) remove the iou box in distance box.

                distance_node_index = (distances[batch][s] <
                                       dis_threshold).nonzero()
                distance_node_size = iou_size if distance_node_index.shape[
                    0] > iou_size else distance_node_index.shape[0]
                distance_node_random = torch.randperm(
                    distance_node_index.shape[0])[0:distance_node_size]
                distance_node_index_select = distance_node_index[
                    distance_node_random]

                _node_index_select = torch.cat(
                    (overlap_node_index_select, distance_node_index_select),
                    dim=0)
                if _node_index_select.shape[0] == 0:
                    continue
                else:
                    _node_index_select = _node_index_select.squeeze(dim=1)
                _node_size = child_size if _node_index_select.shape[
                    0] > child_size else _node_index_select.shape[0]
                _node_index_select_random = torch.randperm(
                    _node_index_select.shape[0])[0:_node_size]
                node_index_select = _node_index_select[
                    _node_index_select_random]

                mask[s, node_index_select] = 1
                # print("test ")

            adj_matrix = torch.mul(mask, pooled_feat_sim_mat)

            pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
            pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#18
0
文件: fpn.py 项目: nguyenvantui/myocr
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        c1 = self.rcnn_layer0(im_data)
        c2 = self.rcnn_layer1(c1)
        c3 = self.rcnn_layer2(c2)
        c4 = self.rcnn_layer3(c3)
        c5 = self.rcnn_layer4(c4)


        # p4 = self.pyramid(p5, c4)
        # p3 = self.pyramid(p4, c3)
        # p2 = self.pyramid(p3, c2)
        # =====================================================
        p5 = self.rcnn_toplayer(c5)
        p4 = self.merge(p5, self.rcnn_latlayer1(c4))
        p4 = self.rcnn_smooth1(p4)
        p3 = self.merge(p4, self.rcnn_latlayer2(c3))
        p3 = self.rcnn_smooth2(p3)
        p2 = self.merge(p3, self.rcnn_latlayer3(c2))
        p2 = self.rcnn_smooth3(p2)
        p6 = self.maxpool2d(p5)
        # ==========================================================
        # c6 = self.rcnn_layer5(c5)
        # p6 = self.rcnn_toplayer(c6)
        # p5 = self.rcnn_latlayer1(c5) + p6
        # p4 = self.rcnn_latlayer2(c4) + p5
        # p3 = self.merge(p4, self.rcnn_latlayer3(c3))
        # p3 = self.rcnn_smooth1(p3)
        # p2 = self.merge(p3, self.rcnn_latlayer4(c2))
        # p2 = self.rcnn_smooth2(p2)
        # =============================================================

        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        rois, rpn_loss_cls, rpn_loss_bbox = self.rcnn_rpn(rpn_feature_maps, im_info, gt_boxes, num_boxes)

        if self.training == True:
            roi_data = self.rcnn_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:

            rois_label, gt_assign,rois_target,rois_inside_ws,rois_outside_ws = None, None, None, None, None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        roi_pool_feat = self.pyramid_roi(mrcnn_feature_maps, rois, im_info)
        pooled_feat = self._head_to_tail(roi_pool_feat)
        bbox_pred = self.rcnn_bbox_pred(pooled_feat)

        if self.training == True:

            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.long().view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)


        cls_score = self.rcnn_cls_score(pooled_feat)
        objectiness = F.softmax(cls_score, dim =1)

        rcnn_loss_cls = 0
        rcnn_loss_bbox = 0

        if self.training == True:
            rcnn_loss_cls = F.cross_entropy(cls_score, rois_label)
            rcnn_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        objectiness = objectiness.view(batch_size, -1, objectiness.size(1))
        # bp()
        bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        loss = rpn_loss_cls + rpn_loss_bbox + rcnn_loss_cls + rcnn_loss_bbox
        return rois, objectiness, bbox_pred, rois_label, loss
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # ========= Union Box ==========
        whole_box = union_box_layer(rois, im_info)
        whole_box = whole_box.reshape(whole_box.shape[0], 1, 5)
        whole = torch.from_numpy(whole_box)
        whole = whole.type(torch.cuda.FloatTensor)
        # whole = whole_box.view([-1, 5])

        # edges = edge_box_layer(rois, im_info)
        # edges = torch.from_numpy(edges)
        # edge = edges.view([-1, 12])

        edges_all = edge_whole_layer(rois, im_info)
        edges_all = torch.from_numpy(edges_all)

        # whole_rois = torch.cat((whole, rois), 1)

        rois = Variable(rois)

        # print rois.size()
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            whole_pool_feat = self.RCNN_roi_align_whole(
                base_feat, whole.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))
            whole_pool_feat = self.RCNN_roi_pool(base_feat, whole.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        whole_pool_feat = self._head_to_tail(whole_pool_feat)

        ##########structure_inference_spmm#################

        # pooled_feat = structure_inference_spmm(pooled_feat , whole_pool_feat, edges, rois.size()[1])
        pooled_feat = self.Structure_inference(edges_all, pooled_feat,
                                               whole_pool_feat,
                                               rois.size()[1])

        # print 'pooled_feat.shape:   ',  pooled_feat.shape
        # print 'rois.shape:   ', rois.shape
        # print 'edges.shape: ', edges.shape

        #coordinate = self.coor_fc( rois[:,:,1:].reshape(rois.shape[1], 4) )
        #pooled_feat = torch.cat(( coordinate ,pooled_feat),1)
        #pooled_feat = torch.add(coordinate, pooled_feat)

        # #########  external_dim ###########
        #
        # external_feature = rois[:,:,3:].view([128,2])
        # pooled_feat = self.External(pooled_feat,external_feature)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#20
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):

        batch_size = base_feat.size(0)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)

        # get rpn classification score,即每个anchor前景和背景的得分
        rpn_cls_score = self.RPN_cls_score(rpn_conv1)

        rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
        rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)

        # get rpn offsets to the anchor boxes,得到每个anchor偏移的预测值
        rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        #rois的shape=(batch, post_top_n, 5), 是排序后并经过nms后的post_top_n个anchor
        # (经过网络预测的delta修正原始anchor之后的anchor),这些anchor都是映射回MxN的图像的,
        # 并且经过剪切, 不会超出图像的大小, 每个anchor由1个占位和x1, y1, x2, y2这4个坐标组成
        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            #
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size, -1)

            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
            rpn_cls_score = torch.index_select(
                rpn_cls_score.view(-1, 2).cpu(), 0, rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#21
0
    def forward(self, im_data1, im_data2, im_info, gt_boxes, num_boxes):

        batch_size = im_data1.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        if self.fusion_mode == "early":
            im_data = torch.cat((im_data1,im_data2), dim=1)
            im_data = self.NIN(im_data)
            base_feat = self.RCNN_base(im_data)


        if self.fusion_mode == "half":
            base_feat1 = self.RCNN_base_half(im_data1)
            base_feat2 = self.RCNN_base_half(im_data2)
            
            base_feat = torch.cat((base_feat1, base_feat2), dim=1)
            #print('hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',base_feat.size())
            base_feat = torch.unsqueeze(base_feat, 1)
           # print('hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',base_feat.size())
            base_feat = self.NIN(base_feat)
           # print('hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',base_feat.size())
            base_feat =torch.squeeze(base_feat ,1)
            #print('hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh',base_feat.size())
            base_feat = self.RCNN_base_fusion(base_feat)

        # feed image data to base model to obtain base feature map


        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1))
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # get the rpn loss.
        rpn_loss = rpn_loss_cls + rpn_loss_bbox

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            label = rois_label.long()
            self.fg_cnt = torch.sum(label.data.ne(0))
            self.bg_cnt = label.data.numel() - self.fg_cnt

            self.RCNN_loss_cls = F.cross_entropy(cls_score, label)

            # bounding box regression L1 loss
            self.RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                                  rois_inside_ws,
                                                  rois_outside_ws)

        rcnn_loss = self.RCNN_loss_cls + self.RCNN_loss_bbox

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss, rcnn_loss
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())

            # TODO


            rois_main_label = Variable(rois_label.view(-1).long())
            rois_sub_class = list(map(
                lambda x: self.sub_classes[x], rois_main_label))
            rois_main_class = list(
                map(lambda x: sub2main_dict[x], rois_sub_class))
            rois_main_label = list(map(
                lambda x: self.main_classes.index(x), rois_main_class))
            rois_main_label = torch.cuda.LongTensor(rois_main_label)
            rois_main_label = Variable(rois_main_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_main_label = None
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        # return roi_data
        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(
                rois.view(-1, 5), base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous()
            pooled_feat = self.RCNN_roi_crop(
                base_feat, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        elif cfg.POOLING_MODE == 'pspool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # main Rcnn branch
        # feed pooled features to top model
        pooled_feat_main = self._head_to_tail_main(pooled_feat)
        # compute bbox offset
        bbox_pred_main = self.RCNN_bbox_pred_main(pooled_feat_main)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view_main = bbox_pred_main.view(
                bbox_pred_main.size(0), int(bbox_pred_main.size(1) / 4), 4)
            bbox_pred_select_main = torch.gather(bbox_pred_view_main, 1, rois_main_label.view(
                rois_main_label.size(0), 1, 1).expand(rois_main_label.size(0), 1, 4))
            bbox_pred_main = bbox_pred_select_main.squeeze(1)

        # compute object classification probability
        cls_score_main = self.RCNN_cls_score_main(pooled_feat_main)
        cls_prob_main = F.softmax(cls_score_main, 1)


        # sub Rcnn branch

        pooled_feat_sub = self._head_to_tail_sub(pooled_feat)
        bbox_pred_sub = self.RCNN_bbox_pred_sub(pooled_feat_sub)
        if self.training and not self.class_agnostic:
            bbox_pred_view_sub = bbox_pred_sub.view(
                bbox_pred_sub.size(0), int(bbox_pred_sub.size(1) / 4), 4)
            bbox_pred_select_sub = torch.gather(bbox_pred_view_sub, 1, rois_label.view(
                rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred_sub = bbox_pred_select_sub.squeeze(1)

        cls_score_sub = self.RCNN_cls_score_sub(pooled_feat_sub)

        #pdb.set_trace()
        # process weight of main classes to sub score
        if 'score' in self.casecade_type:
            main_cls_weight = torch.cuda.FloatTensor(
                cls_score_main.size()[0], len(self.sub_classes))
            for key, val in self.main2sub_idx_dict.items():
                for column_idx in val:
                    main_cls_weight[:, column_idx] = cls_score_main[:, key]
            if self.casecade_type == 'add_score':
                cls_score_sub += main_cls_weight
            elif self.casecade_type == 'mul_score':
                cls_score_sub *= main_cls_weight

        cls_prob_sub = F.softmax(cls_score_sub, 1)

        # process weight of main classes to sub prob
        if 'prob' in self.casecade_type:
            main_cls_weight = torch.cuda.FloatTensor(
                cls_prob_main.size()[0], len(self.sub_classes))
            for key, val in self.main2sub_idx_dict.items():
                for column_idx in val:
                    main_cls_weight[:, column_idx] = cls_prob_main[:, key]
            if self.casecade_type == 'add_prob':
                # TODO normalized
                cls_prob_sub = cls_prob_sub * self.alpha + (1-self.alpha) * main_cls_weight





        RCNN_loss_cls_main = 0
        RCNN_loss_bbox_main = 0

        RCNN_loss_cls_sub = 0
        RCNN_loss_bbox_sub = 0

        if self.training:
            # classification loss
            RCNN_loss_cls_main = F.cross_entropy(
                cls_score_main, rois_main_label)

            # TODO roi_lable should
            RCNN_loss_cls_sub = F.cross_entropy(cls_score_sub, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox_main = _smooth_l1_loss(
                bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox_sub = _smooth_l1_loss(
                bbox_pred_main, rois_target, rois_inside_ws, rois_outside_ws)

        cls_prob_main = cls_prob_main.view(batch_size, rois.size(1), -1)
        bbox_pred_main = bbox_pred_main.view(batch_size, rois.size(1), -1)

        cls_prob_sub = cls_prob_sub.view(batch_size, rois.size(1), -1)
        bbox_pred_sub = bbox_pred_sub.view(batch_size, rois.size(1), -1)

        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls_main = torch.unsqueeze(RCNN_loss_cls_main, 0)
            RCNN_loss_bbox_main = torch.unsqueeze(RCNN_loss_bbox_main, 0)
            RCNN_loss_cls_sub = torch.unsqueeze(RCNN_loss_cls_sub, 0)
            RCNN_loss_bbox_sub = torch.unsqueeze(RCNN_loss_bbox_sub, 0)

        return rois, cls_prob_main, bbox_pred_main, cls_prob_sub, bbox_pred_sub, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls_sub, RCNN_loss_bbox_sub, RCNN_loss_cls_main, RCNN_loss_bbox_main, rois_label
示例#24
0
    def forward(self, rpn_feature_maps, im_info, gt_boxes, num_boxes):

        n_feat_maps = len(rpn_feature_maps)

        rpn_cls_scores = []
        rpn_cls_probs = []
        rpn_bbox_preds = []
        rpn_shapes = []
        rpn_rank_inds = []
        level_ids = []

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'
        nms_pre = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        batch_size = rpn_feature_maps[0].size(0)
        for i in range(n_feat_maps):
            feat_map = rpn_feature_maps[i]
            # batch_size = feat_map.size(0)

            # return feature map after convrelu layer
            rpn_conv1 = F.relu(self.RPN_Conv(feat_map), inplace=True)
            # get rpn classification score
            rpn_cls_score = self.RPN_cls_score(rpn_conv1)
            rpn_cls_prob = rpn_cls_score.sigmoid()

            # get rpn offsets to the anchor boxes
            rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)

            rpn_shapes.append(
                [rpn_cls_score.size()[2],
                 rpn_cls_score.size()[3]])

            rpn_cls_score = rpn_cls_score.permute(0, 2, 3,
                                                  1).contiguous().view(
                                                      batch_size, -1, 1)
            rpn_cls_prob = rpn_cls_prob.permute(0, 2, 3, 1).contiguous().view(
                batch_size, -1, 1)
            rpn_bbox_pred = rpn_bbox_pred.permute(0, 2, 3,
                                                  1).contiguous().view(
                                                      batch_size, -1, 4)

            ranked_scores, rank_inds = rpn_cls_prob.sort(dim=1,
                                                         descending=True)
            rank_inds = rank_inds.view(-1)

            if rpn_cls_score.shape[1] > nms_pre:
                rank_inds = rank_inds[:nms_pre]
                rpn_cls_score = rpn_cls_score[:, rank_inds, :]
                rpn_bbox_pred = rpn_bbox_pred[:, rank_inds, :]
                rpn_cls_prob = rpn_cls_prob[:, rank_inds, :]

            rpn_rank_inds.append(rank_inds)
            rpn_cls_scores.append(rpn_cls_score)
            rpn_cls_probs.append(rpn_cls_prob)
            rpn_bbox_preds.append(rpn_bbox_pred)
            level_ids.append((rpn_cls_score[0].view(-1)).new_full(
                ((rpn_cls_score[0].view(-1)).size(0), ), i, dtype=torch.long))

        rpn_cls_score_alls = torch.cat(rpn_cls_scores, 1)
        rpn_cls_prob_alls = torch.cat(rpn_cls_probs, 1)
        rpn_bbox_pred_alls = torch.cat(rpn_bbox_preds, 1)
        ids = torch.cat(level_ids)

        n_rpn_pred = rpn_cls_score_alls.size(1)

        rois = self.RPN_proposal(
            (rpn_cls_prob_alls.data, rpn_bbox_pred_alls.data, im_info, cfg_key,
             rpn_shapes, rpn_rank_inds, ids))

        self.rpn_loss_cls = torch.zeros(1).cuda()
        self.rpn_loss_cls_neg = torch.zeros(1).cuda()
        self.rpn_loss_box = torch.zeros(1).cuda()

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None
            BCE = nn.BCEWithLogitsLoss()
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score_alls.data, gt_boxes, im_info, num_boxes,
                 rpn_shapes, rpn_rank_inds))
            # compute classification loss
            rpn_label = rpn_data[0].view(batch_size, -1)
            rpn_keep = rpn_label.view(-1).ne(-1).nonzero().view(-1)
            rpn_cls_score = torch.index_select(rpn_cls_score_alls.view(-1), 0,
                                               rpn_keep)
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            self.rpn_loss_cls = BCE(rpn_cls_score, rpn_label)
            # print('rpn_loss_cls', self.rpn_loss_cls)
            # rpn_label = rpn_label.view(batch_size,-1)
            # rpn_cls_score = rpn_cls_score.view(batch_size,-1)
            # for i in range(batch_size):
            #     rpn_label_t = rpn_label[i]
            #     rpn_cls_score_t = rpn_cls_score[i]
            #     rpn_loss_cls_t = BCE(rpn_cls_score_t, rpn_label_t)
            #     print('rpn_loss_cls_t',rpn_loss_cls_t)

            # self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)

            fg_cnt = torch.sum(rpn_label.data.ne(0))
            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]
            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_inside_weights.size(1), 4))
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights.unsqueeze(2) \
                    .expand(batch_size, rpn_bbox_outside_weights.size(1), 4))
            rpn_bbox_targets = Variable(rpn_bbox_targets)
            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred_alls,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3)

        return rois, self.rpn_loss_cls, self.rpn_loss_box, self.rpn_loss_cls_neg
示例#25
0
    def forward(self, im_data, im_info, gt_boxes, gt_boxes_sens, num_boxes):
        batch_size = im_data[0].size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        gt_boxes_sens = gt_boxes_sens.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat_c = self.RCNN_base_c(im_data[0])
        base_feat_t = self.RCNN_base_t(im_data[1])
        base_feat_fused = 0.5 * (base_feat_c + base_feat_t)
        base_feat_fused = self.RCNN_base_fused(base_feat_fused)
        conv5_c = self.RCNN_base_f1(base_feat_c)
        conv5_t = self.RCNN_base_f2(base_feat_t)

        # feed fused base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat_fused, im_info, gt_boxes, num_boxes)

        # if it is training phase, then use ground truth bboxes for refining
        if self.training:
            # 50% jitter probability
            if np.random.rand(1)[0]>0.5:
                jitter = (torch.randn(1,256,4)/20).cuda()
            else:
                jitter = (torch.zeros(1,256,4)).cuda()
            # feed jitter to obtain rois_align_target
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, gt_boxes_sens, num_boxes, jitter, im_info)
            rois, rois_jittered, rois_label, rois_target, rois_align_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_align_target = Variable(rois_align_target.view(-1, rois_align_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_jittered = copy.deepcopy(rois)
            rois_label = None
            rois_target = None
            rois_align_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0


        # Region Feature Alignment module
        ctx_rois = bbox_contextual_batch(rois)
        clip_boxes(ctx_rois[:,:,1:], im_info, batch_size)
        ctx_rois = Variable(ctx_rois)
        ctx_rois_jittered = bbox_contextual_batch(rois_jittered)
        clip_boxes(ctx_rois_jittered[:,:,1:], im_info, batch_size)
        ctx_rois_jittered = Variable(ctx_rois_jittered)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(ctx_rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(ctx_rois_jittered.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, ctx_rois.view(-1, 5))    
            pooled_feat_t = self.RCNN_roi_align(conv5_t, ctx_rois_jittered.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, ctx_rois.view(-1,5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, ctx_rois_jittered.view(-1,5))
        
        pooled_feat_res = pooled_feat_t - pooled_feat_c

        # feed pooled features to top model
        pooled_feat_res = self._head_to_tail_align(pooled_feat_res)
        bbox_align_pred = self.RCNN_bbox_align_pred(pooled_feat_res)

        RCNN_loss_bbox_align = 0
        
        # Apply bounding-box regression deltas
        box_deltas = bbox_align_pred.data
        box_deltas_zeros = torch.zeros(box_deltas.shape).cuda()
        box_deltas = torch.cat((box_deltas, box_deltas_zeros), 1)


        # Optionally normalize targets by a precomputed mean and stdev
        # The roi alignment process is class_agnostic
        box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \
                     + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()
        box_deltas = box_deltas.view(batch_size, -1, 4)

        rois_sens = rois_jittered.new(rois_jittered.size()).zero_()
        rois_sens[:,:,1:5] = bbox_transform_inv(rois_jittered[:,:,1:5], box_deltas, batch_size)

        clip_boxes(rois_sens[:,:,1:5], im_info, batch_size)
        


        rois = Variable(rois)
        rois_sens = Variable(rois_sens)

        if cfg.POOLING_MODE == 'crop':
            grid_xy = _affine_grid_gen(rois.view(-1, 5), conv5_c.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_c = self.RCNN_roi_crop(conv5_c, Variable(grid_yx).detach())
            grid_xy = _affine_grid_gen(rois_sens.view(-1, 5), conv5_t.size()[2:], self.grid_size)
            grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous()
            pooled_feat_t = self.RCNN_roi_crop(conv5_t, Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat_c = F.max_pool2d(pooled_feat_c, 2, 2)
                pooled_feat_t = F.max_pool2d(pooled_feat_t, 2, 2)

        elif cfg.POOLING_MODE == 'align':
            pooled_feat_c = self.RCNN_roi_align(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_align(conv5_t, rois_sens.view(-1, 5))

        elif cfg.POOLING_MODE == 'pool':
            pooled_feat_c = self.RCNN_roi_pool(conv5_c, rois.view(-1, 5))
            pooled_feat_t = self.RCNN_roi_pool(conv5_t, rois_sens.view(-1, 5))
                                                        
        cls_score_ref = self.confidence_ref(self.RCNN_top_ref(pooled_feat_c.view(pooled_feat_c.size(0), -1)))
        cls_score_sens = self.confidence_sens(self.RCNN_top_sens(pooled_feat_t.view(pooled_feat_t.size(0), -1)))
        cls_prob_ref = F.softmax(cls_score_ref, 1)
        cls_prob_sens = F.softmax(cls_score_sens, 1)

        confidence_ref = torch.abs(cls_prob_ref[:,1]-cls_prob_ref[:,0])
        confidence_sens = torch.abs(cls_prob_sens[:,1]-cls_prob_sens[:,0])
        confidence_ref = confidence_ref.unsqueeze(1).unsqueeze(2).unsqueeze(3)
        confidence_sens = confidence_sens.unsqueeze(1).unsqueeze(2).unsqueeze(3)

        pooled_feat_c = confidence_ref * pooled_feat_c
        pooled_feat_t = confidence_sens * pooled_feat_t
        pooled_feat = pooled_feat_c + pooled_feat_t


        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_cls_ref = 0
        RCNN_loss_cls_sens = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            RCNN_loss_cls_ref = F.cross_entropy(cls_score_ref, rois_label)
            RCNN_loss_cls_sens = F.cross_entropy(cls_score_sens, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
            RCNN_loss_bbox_align = _smooth_l1_loss(bbox_align_pred, rois_align_target[:,:2], rois_inside_ws[:,:2], rois_outside_ws[:,:2])


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, rois_sens, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_ref, RCNN_loss_cls_sens, RCNN_loss_bbox, RCNN_loss_bbox_align, rois_label
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so onif self.training:

        part_threshold = 0.5

        # the shape of rois is 1,300,5, however, there is no 300 proposal after nms, so the last of the rois is all 0s
        rois_none_idx = 300
        for i in range(rois.shape[1]):
            if rois[:, i, :].sum() <= 0:
                rois_none_idx = i
                break

        # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls)
        overlaps = bbox_overlaps_batch(rois[:, :rois_none_idx, :],
                                       rois[:, :rois_none_idx, :])[0]

        N_node, _ = overlaps.shape

        overlaps_bin = overlaps.cpu().data.numpy().copy()

        for j in range(N_node):
            for k in range(N_node):
                if overlaps_bin[j][k] >= part_threshold:
                    overlaps_bin[j][k] = 1
                else:
                    overlaps_bin[j][k] = 0
                if k == j:
                    overlaps_bin[j][k] = 0

        idx_subgraph, vertex_subgraph = subgraph_split(overlaps_bin)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # # update 20191105: build graph for rois based on index (default: batch_size = 1)

        roi_all_idx_list = []
        roi_cls_idx_list = []
        roi_part_idx_list = []

        adj_jud = np.zeros((0))
        adj_rois = torch.zeros(0).cuda().long()

        for k in range(idx_subgraph):
            idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0]
            roi_all_idx_list.append(idx_k)

        overlaps = overlaps.cpu().data.numpy()

        # 选度数最大的点作为node_cls
        for i in range(len(roi_all_idx_list)):
            rois_idx = roi_all_idx_list[i]

            # consider the size of rois_select larger than 5, the rois_select is probably an object
            if rois_idx.shape[0] < 5:
                continue

            overlaps_once = overlaps[rois_idx][:, rois_idx]
            overlaps_once_bin = overlaps_bin[rois_idx][:, rois_idx]

            N_node_once, _ = overlaps_once.shape

            ########## update 20191104: select IoU > threshold
            # for j in range(N_node_once):
            #     for k in range(N_node_once):
            #         if overlaps_once[j][k] >= part_threshold:
            #             overlaps_once[j][k] = 1
            #         else:
            #             overlaps_once[j][k] = 0
            #         if k == j:
            #             overlaps_once[j][k] = 0

            # overlaps_once = np.sum(overlaps_once, axis=1)
            #
            # rois_once_max_idx = np.argmax(overlaps_once)
            # roi_cls_idx_list.append(rois_idx[rois_once_max_idx])
            #
            # roi_part_tmp = []
            # for k in range(rois_idx.shape[0]):
            #     if overlaps[rois_idx[rois_once_max_idx]][k] == 0:
            #         continue
            #     roi_part_tmp.append(rois_idx[k])
            # roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp))
            # roi_part_idx_list.append(roi_part_tmp)

            ########## update 20191107: all proposal

            overlaps_once_bin = np.sum(overlaps_once_bin, axis=1)

            rois_once_max_idx = np.argmax(overlaps_once_bin)
            roi_cls_idx_list.append(rois_idx[rois_once_max_idx])

            roi_part_tmp = []
            roi_iou = overlaps_once[rois_once_max_idx]
            roi_part_num_threshold = 10
            if roi_iou.shape[0] >= roi_part_num_threshold:
                roi_order = np.argsort(roi_iou)[::-1]
                for ii in range(roi_part_num_threshold):
                    roi_part_tmp.append(rois_idx[roi_order[ii]])
            else:
                for k in range(rois_idx.shape[0]):
                    if overlaps[rois_idx[rois_once_max_idx]][k] == 0:
                        continue
                    roi_part_tmp.append(rois_idx[k])
            roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp))
            roi_part_idx_list.append(roi_part_tmp)

        roi_cls_idx_list = torch.from_numpy(np.array(roi_cls_idx_list)).cuda()

        for i in range(roi_cls_idx_list.shape[0]):
            adj_jud = np.concatenate((adj_jud, [1]))
            adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i + 1]))
            try:
                if roi_part_idx_list[i].shape[0] != 0:
                    adj_jud = np.concatenate(
                        (adj_jud, np.zeros((roi_part_idx_list[i].shape[0]))))
                    adj_rois = torch.cat(
                        (adj_rois, roi_part_idx_list[i].cuda()))
            except IndexError:
                print('IndexError happen, continue')
                continue

        node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0]

        adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud)))

        # link edges for node_cls to node_cls
        for k in range(len(node_cls_idx) - 1):
            idx_node_cls_1 = node_cls_idx[k]
            idx_node_cls_2 = node_cls_idx[k + 1]
            adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1
            adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1

        # link edges for node_cls to related node_part
        for k in range(len(node_cls_idx) - 1):
            idx_start = node_cls_idx[k]
            idx_end = node_cls_idx[k + 1]
            for s in range(idx_start, idx_end):
                for t in range(idx_start, idx_end):
                    if s == t:
                        adj_matrix_bin[s, t] = 0
                    else:
                        adj_matrix_bin[s, t] = 1

        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        adj_matrix = np.zeros((len(adj_jud), len(adj_jud)))

        for s in range(len(adj_jud)):
            for t in range(len(adj_jud)):
                if adj_matrix_bin[s, t] == 1:
                    node_feat_s = pooled_feat[adj_rois[s], :]
                    node_feat_t = pooled_feat[adj_rois[t], :]
                    adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                else:
                    adj_matrix[s, t] = 0

        adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

        try:
            pooled_feat[adj_rois, :] = F.relu(
                self.gcn1(pooled_feat[adj_rois, :], adj_matrix))
            pooled_feat[adj_rois, :] = F.relu(
                self.gcn2(pooled_feat[adj_rois, :], adj_matrix))
        except RuntimeError:
            print(pooled_feat[adj_rois, :].size())
            print(adj_matrix.size())

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#27
0
    def forward(self, feat, gt_boxes, num_boxes, stage):
        # feat: acoustic features (we use STFT) [batch_size, seq_len, feat_dim], default [8, 1000, 257]
        # gt_boxes: ground truth speech segments, the last dimension is (start_frame, end_frame, speaker index)
        # [batch_size, padded_len, 3], default [8, 20, 3]
        # num_boxes: number of speech segments in each audio [batch_size], default [8]
        # stage: specify the stage (can be train, dev or test)
        batch_size, seq_len, feat_dim = feat.size(0), feat.size(1), feat.size(
            2)

        feat = torch.unsqueeze(feat, 1)
        feat = torch.transpose(feat, 2, 3)
        im_info = torch.from_numpy(np.array([[feat_dim, seq_len]]))
        im_info = im_info.expand(batch_size, im_info.size(1))

        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # base_feat: deep features after backbone (ResNet101)
        # [batch_size, num_channels, h, w], default [8, 1024, 16, 63]
        base_feat = self.RCNN_base(feat)

        # feed base feature map to RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes, stage)
        # rois: region of interest(ROI), selected speech segment segments
        # The last dimension is (batch_idx, start_t, end_t)
        # [batch_size, number of rois, 3] default: [8, 100, 3]

        # if it is training phrase, then use ground truth bboxes for refining
        if stage == "train" or stage == "dev":
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            # rois: selected ROIs to compute loss, the last dimension is (batch_idx, start_t, end_t)
            # [batch_size, number of rois, 3], default [8, 64, 3]

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        elif stage == "test":
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
        else:
            raise ValueError("Condition not defined.")

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        rois_tmp = rois.new(rois.size(0), rois.size(1), 5).zero_()
        rois_tmp[:, :, np.array([0, 1, 3]).astype(int)] = rois
        rois_tmp[:, :, 4] = feat_dim - 1

        # default is 'align'
        if cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois_tmp.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois_tmp.view(-1, 5))
        else:
            raise ValueError("Pooling mode not supported.")
        # pooled_feat: the pooled feature for speech segments
        # [batch_size * number of rois, number of channels, 7, 7], default [512, 1024, 7, 7]

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)

        # compute object classification probability
        bg_cls_score = self.RCNN_bg_cls_score(pooled_feat)
        bg_cls_prob = F.softmax(bg_cls_score, 1)
        seg_embed = self.RCNN_embed(pooled_feat)
        cls_score = self.RCNN_cls_score(F.relu(seg_embed))
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_bg_cls = 0
        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0
        RCNN_loss_cls_spk = 0

        if stage == "train" or stage == "dev":
            # RCNN_loss_cls is the loss to classify fg/bg
            rois_bg_label = (rois_label > 0).long()
            RCNN_loss_cls = F.cross_entropy(bg_cls_score, rois_bg_label)
            cls_score_nonzero, rois_label_nonzero = cls_score[
                rois_label != 0, :], rois_label[rois_label != 0]

            # RCNN_loss_cls_spk is the loss to classify different speakers
            RCNN_loss_cls_spk = F.cross_entropy(cls_score_nonzero,
                                                rois_label_nonzero)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        bg_cls_prob = bg_cls_prob.view(batch_size, rois.size(1), -1)
        return rois, bg_cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_cls_spk, RCNN_loss_bbox, rois_label, seg_embed
示例#28
0
    def forward(self, base_feat, im_info, gt_boxes, num_boxes):
        # base_feat size: [nBacth, nChan, H, W], usually nChan = 1024

        batch_size = base_feat.size(0)
        if self.K > 1:
            assert batch_size == self.K
            # stack channels from all images making nBatch = 1
            bs, nc, h, w = base_feat.shape
            base_feat = base_feat.view(1, bs * nc, h, w)

        # return feature map after convrelu layer
        rpn_conv1 = F.relu(
            self.RPN_Conv(base_feat),
            inplace=True)  # size: [nBacth, nChan, H, W], usually nChan = 512
        # get rpn classification score
        rpn_cls_score = self.RPN_cls_score(
            rpn_conv1)  # size: [nBacth, 2 x nAnchors, H, W]
        if self.K > 1:
            # we predict same anchor score for all stack images, so we duplicate score making nBatch back to K
            rpn_cls_score = rpn_cls_score.repeat(self.K, 1, 1, 1)

        # reshape to perform softmax on bg/fg by sending bg/fg on dim=1
        rpn_cls_score_reshape = self.reshape(
            rpn_cls_score, 2)  # size [nBacth, 2, nAnchors x H, W]
        rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape,
                                         dim=1)  # same size as above
        rpn_cls_prob = self.reshape(
            rpn_cls_prob_reshape,
            self.nc_score_out)  # back to [nBacth, 2 x nAnchors, H, W]

        # get rpn offsets to the anchor boxes
        rpn_bbox_pred = self.RPN_bbox_pred(
            rpn_conv1)  # size [nBacth, 4 x nAnchors, H, W]
        if self.K > 1:
            # get the deviation for each stacked image making nBatch back to K
            rpn_bbox_pred = rpn_bbox_pred.view(self.K,
                                               self.nc_bbox_out / self.K, h, w)

        # proposal layer
        cfg_key = 'TRAIN' if self.training else 'TEST'

        # rois size: [nBacth, numTopProps, 1+4] (last dim: batch_id + 4 coords.), usually numTopProps = 2000
        # NOTE for K > 1: ensure that if filtering, NMS, sorting, etc... in RPN_proposal select one proposal
        # coming from a given anchor in an image, it is also selected in the other images.
        rois = self.RPN_proposal(
            (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0

        # generating training labels and build the rpn loss
        if self.training:
            assert gt_boxes is not None

            # gt_boxes size: [nBacth, maxGT, 4+class], usually maxGT = 20
            rpn_data = self.RPN_anchor_target(
                (rpn_cls_score.data, gt_boxes, im_info, num_boxes))

            # compute classification loss
            rpn_cls_score = rpn_cls_score_reshape.permute(
                0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
            rpn_label = rpn_data[0].view(batch_size,
                                         -1)  # size [nBatch, nAnchors x H x W]
            if self.K > 1:
                for k in range(self.K - 1):
                    assert not rpn_label[0].ne(rpn_label[k]).any()

            # get index we keep for classification: rpn_keep size [nBatch x rpnBatch] (this is a vector)
            rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))

            # select proposal we keep
            rpn_cls_score = torch.index_select(rpn_cls_score.view(
                -1, 2), 0, rpn_keep)  # size [nBatch x rpbBatch, 2]
            rpn_label = torch.index_select(rpn_label.view(-1), 0,
                                           rpn_keep.data)
            rpn_label = Variable(rpn_label.long())  # size [nBatch x rpbBatch]
            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
            fg_cnt = torch.sum(rpn_label.data.ne(0))

            # targets and associated weights have sizes: [nBacth, 4 x nAnchors, H, W]
            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]

            # compute bbox regression loss
            rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
            rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
            rpn_bbox_targets = Variable(rpn_bbox_targets)

            self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=3,
                                                dim=[1, 2, 3])

        return rois, self.rpn_loss_cls, self.rpn_loss_box
示例#29
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # Bottom-up
        c1 = self.RCNN_layer0(im_data)
        c2 = self.RCNN_layer1(c1)
        c3 = self.RCNN_layer2(c2)
        c4 = self.RCNN_layer3(c3)
        c5 = self.RCNN_layer4(c4)
        # Top-down
        p5 = self.RCNN_toplayer(c5)  #1X1的卷积得到M5特征。
        p4 = self._upsample_add(p5, self.RCNN_latlayer1(c4))
        p4 = self.RCNN_smooth1(p4)
        p3 = self._upsample_add(p4, self.RCNN_latlayer2(c3))
        p3 = self.RCNN_smooth2(p3)
        p2 = self._upsample_add(p3, self.RCNN_latlayer3(c2))
        p2 = self.RCNN_smooth3(p2)

        p6 = self.maxpool2d(p5)
        #到此6张特征图已经全部拿到

        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            rpn_feature_maps, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        # pooling features based on rois, output 14x14 map
        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois,
                                              im_info)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(roi_pool_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.long().view(rois_label.size(0), 1,
                                       1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # loss (cross entropy) for object classification
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # loss (l1-norm) for bounding box regression
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1))
        bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#30
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0, is_sup=False):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data
        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
            if not target:
                _, feat_pixel = self.netD_pixel(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel,domain_p#, diff
            _,feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel,domain_p#,diff
        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, mask_batch = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes, is_sup)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        #
        # if cfg.POOLING_MODE == 'align':
        #     pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        # elif cfg.POOLING_MODE == 'pool':
        #     pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5))
        pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:

            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)


        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)


        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,d_pixel, domain_p#,diff
示例#31
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        # Bottom-up
        c1 = self.RCNN_layer0(im_data)
        c2 = self.RCNN_layer1(c1)
        c3 = self.RCNN_layer2(c2)
        c4 = self.RCNN_layer3(c3)
        c5 = self.RCNN_layer4(c4)
        c6 = self.RCNN_layer5(c5)

        # Top-down
        p6 = self.RCNN_toplayer(c6)
        p5 = self.RCNN_latlayer1(c5) + p6
        p4 = self.RCNN_latlayer2(c4) + p5
        p3 = self._upsample_add(p4, self.RCNN_latlayer3(c3))
        p3 = self.RCNN_smooth1(p3)
        p2 = self._upsample_add(p3, self.RCNN_latlayer4(c2))
        p2 = self.RCNN_smooth2(p2)

        rpn_feature_maps = [p2, p3, p4, p5, p6]
        mrcnn_feature_maps = [p2, p3, p4, p5]

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feature_maps, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, gt_assign, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois = rois.view(-1, 5)
            rois_label = rois_label.view(-1).long()
            gt_assign = gt_assign.view(-1).long()
            pos_id = rois_label.nonzero().squeeze()
            gt_assign_pos = gt_assign[pos_id]
            rois_label_pos = rois_label[pos_id]
            rois_label_pos_ids = pos_id

            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)
            rois_label = Variable(rois_label)

            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            ## NOTE: additionally, normalize proposals to range [0, 1],
            #        this is necessary so that the following roi pooling
            #        is correct on different feature maps
            # rois[:, :, 1::2] /= im_info[0][1]
            # rois[:, :, 2::2] /= im_info[0][0]

            rois_label = None
            gt_assign = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0
            rois = rois.view(-1, 5)
            pos_id = torch.arange(0, rois.size(0)).long().type_as(rois).long()
            rois_label_pos_ids = pos_id
            rois_pos = Variable(rois[pos_id])
            rois = Variable(rois)

        # print('before pooling, cfg', cfg.POOLING_MODE)
        # print('before pooling, get_cfg', get_cfg().POOLING_MODE)
        # pooling features based on rois, output 14x14 map
        roi_pool_feat = self._PyramidRoI_Feat(mrcnn_feature_maps, rois, im_info)

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(roi_pool_feat)


        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.long().view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # loss (cross entropy) for object classification
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)
            # loss (l1-norm) for bounding box regression
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        rois = rois.view(batch_size, -1, rois.size(1))
        cls_prob = cls_prob.view(batch_size, -1, cls_prob.size(1))
        bbox_pred = bbox_pred.view(batch_size, -1, bbox_pred.size(1))

        if self.training:
            rois_label = rois_label.view(batch_size, -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel.mean())
            if not target:
                _, feat_pixel = self.netD_pixel(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta))
        base_feat = self.RCNN_base2(base_feat1)
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, domain_p  # , diff
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, domain_p  # ,diff
        # feed base feature map tp RPN to obtain rois

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p  # ,diff
    def forward(self,
                im_data,
                im_info,
                gt_boxes,
                num_boxes,
                target=False,
                eta=1.0):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # get all vector of class for label
        if self.training and target:
            cls_label_ind = torch.unique(gt_boxes[:, :, 4].cpu())
            cls_label = torch.zeros(self.n_classes)
            cls_label[cls_label_ind.long()] = 1
            # assume always have backgound categories
            cls_label[0] = 1
            cls_label = cls_label.cuda()
            cls_label.requires_grad = False

        # feed image data to base model to obtain base feature map
        base_feat1 = self.RCNN_base1(im_data)
        if self.lc:
            d_pixel, _ = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))
            # print(d_pixel)
            if not target:
                _, feat_pixel = self.netD_pixel_1(base_feat1.detach())
        else:
            d_pixel = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta))

        base_feat2 = self.RCNN_base2(base_feat1)
        if self.lc:
            d_pixel_2, _ = self.netD_pixel_2(
                grad_reverse(base_feat2, lambd=eta))
        else:
            d_pixel_2 = self.netD_pixel_2(grad_reverse(base_feat2, lambd=eta))

        base_feat3 = self.RCNN_base3(base_feat2)
        if self.lc:
            d_pixel_3, _ = self.netD_pixel_3(
                grad_reverse(base_feat3, lambd=eta))
        else:
            d_pixel_3 = self.netD_pixel_3(grad_reverse(base_feat3, lambd=eta))
            # print(d_pixel_3.mean())

        base_feat4 = self.RCNN_base4(base_feat3)
        if self.gc:
            d_pixel_4, _ = self.netD_1(grad_reverse(base_feat4, lambd=eta))
        else:
            d_pixel_4 = self.netD_1(grad_reverse(base_feat4, lambd=eta))

        # something wrong
        base_feat = self.RCNN_base5(base_feat4)
        # for target domain training, we need to return the d_pixel, domain_p
        if self.gc:
            domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
            _, feat = self.netD(base_feat.detach())
        else:
            domain_p = self.netD(grad_reverse(base_feat, lambd=eta))
            if target:
                return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training and not target:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois

        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)
        #feat_pixel = torch.zeros(feat_pixel.size()).cuda()
        if self.lc:
            feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat_pixel, pooled_feat), 1)
        if self.gc:
            feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1)
            pooled_feat = torch.cat((feat, pooled_feat), 1)
            # compute bbox offset

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic and not target:
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        # compute the sum of weakly score
        if False:
            #cls_prob_sum = torch.sum(cls_prob, 0)
            # x = max(1, x)
            #cls_prob_sum = cls_prob_sum.repeat(2, 1)
            #cls_prob_sum = torch.min(cls_prob_sum, 0)[0]
            max_roi_cls_prob = torch.max(cls_prob, 0)[0]
            #assert (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.)
            if not (max_roi_cls_prob.data.cpu().numpy().all() >= 0.
                    and max_roi_cls_prob.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            if not (cls_label.data.cpu().numpy().all() >= 0.
                    and cls_label.data.cpu().numpy().all() <= 1.):
                pdb.set_trace()
            BCE_loss = F.binary_cross_entropy(max_roi_cls_prob, cls_label)
            return d_pixel, domain_p, BCE_loss

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        # for weakly detection, concentrate the cls_score and calculate the loss

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p
        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p  # ,diff
    def forward(self, im_data, gt):
        batch_size = im_data.size(0)

        gt_boxes = gt['boxes']
        # for jacquard dataset, the bounding box labels are set to -1. For training, we set them to 1, which does not
        # affect the training process.
        if self.training:
            if gt_boxes[:, :, -1].sum().item() < 0:
                gt_boxes[:, :, -1] = -gt_boxes[:, :, -1]
        gt_grasps = gt['grasps']
        gt_grasp_inds = gt['grasp_inds']
        num_boxes = gt['num_boxes']
        num_grasps = gt['num_grasps']
        im_info = gt['im_info']

        for i in range(batch_size):
            if torch.sum(gt_grasp_inds[i]).item() == 0:
                gt_grasp_inds[i, :num_grasps[i].item()] = 1

        # features
        base_feat = self.base(im_data)

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
        else:
            rois_label = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if cfg.MGN.USE_FIXED_SIZE_ROI:
            _rois = rois.view(-1, 5)
            rois_cx = (_rois[:, 1:2] + _rois[:, 3:4]) / 2
            rois_cy = (_rois[:, 2:3] + _rois[:, 4:5]) / 2
            rois_xmin = torch.clamp(rois_cx - 100, min=1, max=600)
            rois_ymin = torch.clamp(rois_cy - 100, min=1, max=600)
            rois_xmax = rois_xmin + 200
            rois_ymax = rois_ymin + 200
            rois_for_grasp = torch.cat(
                [_rois[:, :1], rois_xmin, rois_ymin, rois_xmax, rois_ymax],
                dim=1)
            if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(rois_for_grasp,
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat = self.RCNN_roi_crop(base_feat,
                                                 Variable(grid_yx).detach())
                if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
                pooled_feat = self.RCNN_roi_align(base_feat, rois_for_grasp)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
                pooled_feat = self.RCNN_roi_pool(base_feat, rois_for_grasp)

        else:
            if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat = self.RCNN_roi_crop(base_feat,
                                                 Variable(grid_yx).detach())
                if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
                pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
                pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        # grasp top
        if self.training:
            if self._ROIGN_USE_POOLED_FEATS:
                rois_overlaps = bbox_overlaps_batch(rois, gt_boxes)
                # bs x N_{rois}
                _, rois_inds = torch.max(rois_overlaps, dim=2)
                rois_inds += 1
                grasp_rois_mask = rois_label.view(-1) > 0
            else:
                raise NotImplementedError

        if self.training:
            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._ROIGN_head_to_tail(
                    pooled_feat[grasp_rois_mask])
            else:
                # when there are no one positive rois:
                grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_bbox_loss = Variable(
                    torch.Tensor([0]).type_as(gt_grasps))
                grasp_cls_loss = Variable(torch.Tensor([0]).type_as(gt_grasps))
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._ROIGN_head_to_tail(pooled_feat)

        grasp_pred = self.ROIGN_classifier(grasp_feat)
        # bs*N x K*A x 5, bs*N x K*A x 2
        grasp_loc, grasp_conf = grasp_pred

        # generate anchors
        # bs*N x K*A x 5
        grasp_all_anchors = self._generate_anchors(grasp_conf.size(1),
                                                   grasp_conf.size(2), rois)
        # filter out negative samples
        grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps)
        if self.training:
            grasp_all_anchors = grasp_all_anchors[grasp_rois_mask]
            # bs*N x 1 x 1
            rois_w = (rois[:, :, 3] -
                      rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_h = (rois[:, :, 4] -
                      rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_w = rois_w[grasp_rois_mask]
            rois_h = rois_h[grasp_rois_mask]
            # bs*N x 1 x 1
            fsx = rois_w / grasp_conf.size(1)
            fsy = rois_h / grasp_conf.size(2)
            # bs*N x 1 x 1
            xleft = rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2)
            ytop = rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2)
            xleft = xleft[grasp_rois_mask]
            ytop = ytop[grasp_rois_mask]

        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_batch_size = grasp_loc.size(0)

        # bs*N x K*A x 2
        grasp_prob = F.softmax(grasp_conf, 2)

        grasp_bbox_loss = 0
        grasp_cls_loss = 0
        grasp_conf_label = None
        if self.training:
            # inside weights indicate which bounding box should be regressed
            # outside weidhts indicate two things:
            # 1. Which bounding box should contribute for classification loss,
            # 2. Balance cls loss and bbox loss
            grasp_gt_xywhc = points2labels(gt_grasps)
            # bs*N x N_{Gr_gt} x 5
            grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc,
                                                      gt_grasp_inds, rois_inds)
            # filter out negative samples
            grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]

            # absolute coords to relative coords
            grasp_gt_xywhc[:, :, 0:1] -= xleft
            grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1],
                                                    min=0)
            grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1],
                                                  rois_w)
            grasp_gt_xywhc[:, :, 1:2] -= ytop
            grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2],
                                                    min=0)
            grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2],
                                                  rois_h)

            # grasp training data
            grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.ROIGN_proposal_target(
                grasp_conf,
                grasp_gt_xywhc,
                grasp_all_anchors,
                xthresh=fsx / 2,
                ythresh=fsy / 2)

            grasp_keep = Variable(
                grasp_conf_label.view(-1).ne(-1).nonzero().view(-1))
            grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0,
                                            grasp_keep.data)
            grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0,
                                                  grasp_keep.data)
            grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label)

            grasp_iw = Variable(grasp_iw)
            grasp_ow = Variable(grasp_ow)
            grasp_loc_label = Variable(grasp_loc_label)
            grasp_bbox_loss = _smooth_l1_loss(grasp_loc,
                                              grasp_loc_label,
                                              grasp_iw,
                                              grasp_ow,
                                              dim=[2, 1])

        return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\
               grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors