示例#1
0
    def _match_priors_gt(self, priors, gt, thresh, num_boxes):
        batch_size = gt.size(0)
        num_priors = priors.size(0)
        overlaps = bbox_overlaps_batch(priors, gt)

        # [b, num_objects] best prior for each ground truth
        best_prior_overlap, best_prior_idx = overlaps.max(1)
        # [b, num_priors] best ground truth for each prior
        best_truth_overlap, best_truth_idx = overlaps.max(2)

        matches = torch.zeros(batch_size, num_priors, 5).type_as(priors)
        for num in range(batch_size):
            # select valid best prior idx
            best_prior_idx_valid = best_prior_idx[num][:num_boxes[num]]
            best_truth_overlap[num].index_fill_(0, best_prior_idx_valid,
                                                2)  # ensure best prior
            # TODO refactor: index  best_prior_idx with long tensor
            # ensure every gt matches with its prior of max overlap
            for j in range(best_prior_idx_valid.size(0)):
                best_truth_idx[num][best_prior_idx_valid[j]] = j
            matches[num] = gt[num][best_truth_idx[num]]

        loc = matches[:, :, :-1]  # Shape: [bs, num_priors,4]
        conf = matches[:, :, -1]  # Shape: [bs, num_priors]
        conf[best_truth_overlap < thresh] = 0  # label as background
        encoded_loc = bbox_transform_batch(priors, loc)
        if cfg.TRAIN.COMMON.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
            # Optionally normalize targets by a precomputed mean and stdev
            encoded_loc = ((encoded_loc -
                            self.BBOX_NORMALIZE_MEANS.expand_as(encoded_loc)) /
                           self.BBOX_NORMALIZE_STDS.expand_as(encoded_loc))
        return encoded_loc, conf
示例#2
0
    def forward(self, data_batch):
        im_data = data_batch[0]
        im_info = data_batch[1]
        gt_boxes = data_batch[2]
        gt_grasps = data_batch[3]
        num_boxes = data_batch[4]
        num_grasps = data_batch[5]
        gt_grasp_inds = data_batch[6]
        batch_size = im_data.size(0)
        if self.training:
            self.iter_counter += 1

        # for jacquard dataset, the bounding box labels are set to -1. For training, we set them to 1, which does not
        # affect the training process.
        if self.training:
            if gt_boxes[:, :, -1].sum().item() < 0:
                gt_boxes[:, :, -1] = 1

        for i in range(batch_size):
            if torch.sum(gt_grasp_inds[i]).item() == 0:
                gt_grasp_inds[i, :num_grasps[i].item()] = 1

        # features
        base_feat = self.FeatExt(im_data)

        # generate rois of RCNN
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)
        if not self.use_objdet_branch:
            rois_scores = rois[:, :, 5:].clone()
            rois = rois[:, :, :5].clone()

        if self.training:
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = \
                self._get_header_train_data(rois, gt_boxes, num_boxes)
        else:
            rois_label, rois_target, rois_inside_ws, rois_outside_ws = None, None, None, None
        pooled_feat = self._roi_pooling(base_feat, rois)

        cls_prob, bbox_pred, RCNN_loss_bbox, RCNN_loss_cls = \
            None, None, torch.Tensor([0]).type_as(rois), torch.Tensor([0]).type_as(rois)
        if self.use_objdet_branch:
            # object detection branch
            cls_score, cls_prob, bbox_pred = self._get_obj_det_result(
                pooled_feat)
            if self.training:
                RCNN_loss_bbox, RCNN_loss_cls = self._obj_det_loss_comp(
                    cls_score, cls_prob, bbox_pred, rois_label, rois_target,
                    rois_inside_ws, rois_outside_ws)
            cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
            bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
        else:
            cls_prob = torch.cat([1 - rois_scores, rois_scores], dim=-1)

        # grasp detection branch
        # 1. obtaining grasp features of the positive ROIs and prepare grasp training data
        if self.training:
            rois_overlaps = bbox_overlaps_batch(rois, gt_boxes)
            # bs x N_{rois}
            _, rois_inds = torch.max(rois_overlaps, dim=2)
            rois_inds += 1
            grasp_rois_mask = rois_label.view(-1) > 0

            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._MGN_head_to_tail(
                    pooled_feat[grasp_rois_mask])
                grasp_rois = rois.view(-1, 5)[grasp_rois_mask]
                # process grasp ground truth, return: N_{gr_rois} x N_{Gr_gt} x 5
                grasp_gt_xywhc = points2labels(gt_grasps)
                grasp_gt_xywhc = self._assign_rois_grasps(
                    grasp_gt_xywhc, gt_grasp_inds, rois_inds)
                grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]
            else:
                # when there are no one positive rois, return dummy results
                grasp_loc = torch.Tensor([]).type_as(gt_grasps)
                grasp_prob = torch.Tensor([]).type_as(gt_grasps)
                grasp_bbox_loss = torch.Tensor([0]).type_as(gt_grasps)
                grasp_cls_loss = torch.Tensor([0]).type_as(gt_grasps)
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._MGN_head_to_tail(pooled_feat)

        # N_{gr_rois} x W x H x A*5, N_{gr_rois} x W x H x A*2
        grasp_loc, grasp_conf = self.FCGN_classifier(grasp_feat)
        feat_height, feat_width = grasp_conf.size(1), grasp_conf.size(2)
        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_prob = F.softmax(grasp_conf, 2)

        # 2. calculate grasp loss
        grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = 0, 0, None
        if self.training:
            # N_{gr_rois} x K*A x 5
            grasp_all_anchors = self._generate_anchors(feat_height, feat_width,
                                                       grasp_rois)
            grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = self._grasp_loss_comp(
                grasp_rois, grasp_conf, grasp_loc, grasp_gt_xywhc,
                grasp_all_anchors, feat_height, feat_width)
        else:
            # bs*N x K*A x 5
            grasp_all_anchors = self._generate_anchors(feat_height, feat_width,
                                                       rois.view(-1, 5))

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\
               grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so on

        if True:

            iou_threshold = 0.8
            dis_threshold = 0.2

            # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1
            overlaps = bbox_overlaps_batch(rois, rois)
            overlaps_bin = overlaps.cpu().data.numpy().copy()

            _, N_node, _ = overlaps.shape

            overlaps_bin1 = torch.unsqueeze(torch.eye(N_node, N_node).cuda(),
                                            dim=0)
            overlaps_bin1[overlaps >= iou_threshold] = 1
            overlaps_bin1[overlaps < iou_threshold] = 0

            for j in range(N_node):
                for k in range(N_node):
                    if overlaps_bin[0][j][k] >= iou_threshold:
                        overlaps_bin[0][j][k] = 1
                    else:
                        overlaps_bin[0][j][k] = 0
                    if k == j:
                        overlaps_bin[0][j][k] = 0

            # second, calculate the distance among rois, set weights in edges between nodes iou=0 and dis<threshold to 1
            distances = bbox_distances_batch(rois, rois)
            distances_bin = distances.cpu().data.numpy().copy()

            for j in range(N_node):
                for k in range(N_node):
                    if distances_bin[0][j][k] <= dis_threshold:
                        distances_bin[0][j][k] = 1
                    else:
                        distances_bin[0][j][k] = 0
                    if k == j:
                        distances_bin[0][j][k] = 0

            #adj_matrix_bin = overlaps_bin + distances_bin

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        dot_product_mat = torch.mm(pooled_feat,
                                   torch.transpose(pooled_feat, 0, 1))
        len_vec = torch.unsqueeze(torch.sqrt(
            torch.sum(pooled_feat * pooled_feat, dim=1)),
                                  dim=0)
        len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec)
        pooled_feat_sim_mat = dot_product_mat / len_mat

        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

        # update 20191027: build graph for rois based on index (default: batch_size = 1)
        part_size = 10
        relation_size = 5
        if True:
            cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

            # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes
            adj_matrix = np.zeros((N_node, N_node))

            for s in range(N_node):
                row_idx = [t for t in range(N_node)]
                random.shuffle(row_idx)
                part_cnt = 0
                relation_cnt = 0
                for t in row_idx:
                    if part_cnt <= part_size:
                        if overlaps_bin[0, s, t] == 1:
                            node_feat_s = pooled_feat[s, :]
                            node_feat_t = pooled_feat[t, :]
                            adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                            part_cnt = part_cnt + 1
                            continue
                for t in row_idx:
                    if part_cnt <= part_size:
                        if overlaps_bin[0, s, t] == 1:
                            node_feat_s = pooled_feat[s, :]
                            node_feat_t = pooled_feat[t, :]
                            adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                            part_cnt = part_cnt + 1
                            continue
                    # if relation_cnt <= relation_size:
                    #     if distances_bin[0, s, t] == 1:
                    #         node_feat_s = pooled_feat[s, :]
                    #         node_feat_t = pooled_feat[t, :]
                    #         adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                    #         relation_cnt = relation_cnt + 1
                    #         continue

                    # if part_cnt > part_size and relation_cnt > relation_size:
                    #     break
                    if part_cnt > part_size:
                        break

            adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

            pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
            pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors

        scores = input[0]
        gt_boxes = input[1]
        im_info = input[2]
        num_boxes = input[3]
        feat_shapes = input[4]

        # NOTE: need to change
        # height, width = scores.size(2), scores.size(3)
        height, width = 0, 0

        batch_size = gt_boxes.size(0)

        anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, 
                feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)    
        total_anchors = anchors.size(0)
        
        #keep = ((anchors[:, 0] >= -self._allowed_border) &
        #        (anchors[:, 1] >= -self._allowed_border) &
        #        (anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
        #        (anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))
        keep = ((anchors[:, 0] >= -self._allowed_border) &
                (anchors[:, 1] >= -self._allowed_border) &
                (anchors[:, 2] < int(im_info[0][1]) + self._allowed_border) &
                (anchors[:, 3] < int(im_info[0][0]) + self._allowed_border))

        inds_inside = torch.nonzero(keep).view(-1)

        # keep only inside anchors
        anchors = anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
        bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
        bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()

        overlaps = bbox_overlaps_batch(anchors, gt_boxes)

        max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
        gt_max_overlaps, _ = torch.max(overlaps, 1)

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        gt_max_overlaps[gt_max_overlaps==0] = 1e-5
        keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)

        if torch.sum(keep) > 0:
            labels[keep>0] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)

        sum_fg = torch.sum((labels == 1).int(), 1)
        sum_bg = torch.sum((labels == 0).int(), 1)

        for i in range(batch_size):
            # subsample positive labels if we have too many
            if sum_fg[i] > num_fg:
                fg_inds = torch.nonzero(labels[i] == 1).view(-1)
                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. 
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.                
                #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
                #rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long() 
                rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).type(torch.long)
                disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
                labels[i][disable_inds] = -1

            num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]

            # subsample negative labels if we have too many
            if sum_bg[i] > num_bg:
                bg_inds = torch.nonzero(labels[i] == 0).view(-1)
                #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()

                #rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
                rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).type(torch.long)
#                 print('bg_inds.size(0):', bg_inds.size(0))
#                 print('bg_inds.size:', bg_inds.size())
#                 print('num_bg:', num_bg)
#                 print('rand_num:', rand_num.shape)
#                 print('bg_inds:', bg_inds.shape)
#                 print('rand_num[:bg_inds.size(0)-num_bg] shape:', rand_num[:bg_inds.size(0)-num_bg].shape)
#                 print('rand_num[:bg_inds.size(0)-num_bg]', rand_num[:bg_inds.size(0)-num_bg])
                disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
                labels[i][disable_inds] = -1

        offset = torch.arange(0, batch_size)*gt_boxes.size(1)

        argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
        bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))

        # use a single value instead of 4 values for easy index.
        bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]

        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            num_examples = torch.sum(labels[i] >= 0).item()
            positive_weights = 1.0 / num_examples
            negative_weights = 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))

        bbox_outside_weights[labels == 1] = positive_weights
        bbox_outside_weights[labels == 0] = negative_weights

        labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)

        outputs = []

        # labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous()
        # labels = labels.view(batch_size, 1, A * height, width)
        outputs.append(labels)
        # bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous()
        outputs.append(bbox_targets)

        # anchors_count = bbox_inside_weights.size(1)
        # bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        # bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            # .permute(0,3,1,2).contiguous()

        outputs.append(bbox_inside_weights)

        # bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        # bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            # .permute(0,3,1,2).contiguous()
        outputs.append(bbox_outside_weights)

        return outputs
示例#5
0
def context_anchor(rois, features, hh, hw):
    # topleft =     [2*x1 - x2, 2*y1 y2, x1,        y1]
    # top =         [x1,        2*y1 - y2, x2,        y1]
    # topright =    [x2,        2*y1 - y2, 2*x2 - x1, y1]
    # left =        [2*x1 - x2, y1,        x1,        y2]
    # right =       [x2,        y1,        2*x2 - x1, y2]
    # bottomleft =  [2*x1 - x2, y2,        x1,        2*y2 - y1]
    # bottom =      [x1,        y2,        x2,        2*y2 - y1]
    # bottomright = [x2,        y2,        2*x2 - x1, 2*y2 - y1]
    """
    rois[:,0],rois[:,1],rois[:,2],rois[:,3]
    x1,       y1,       x2,       y2

    """
    batch_size = features.size(0)
    num_channels = features.size(1)
    H = features.size(2)
    W = features.size(3)
    #rois:[128,5]
    x1 = rois[:, 1].cpu().numpy().reshape(rois.size(0), 1)  #[128,1]
    y1 = rois[:, 2].cpu().numpy().reshape(rois.size(0), 1)
    x2 = rois[:, 3].cpu().numpy().reshape(rois.size(0), 1)
    y2 = rois[:, 4].cpu().numpy().reshape(rois.size(0), 1)
    _w = (x2 - x1)  #[128,1]
    _h = (y2 - y1)  #[128,1]
    # cell center
    shift_x = (x1 - _w) + _w * np.arange(0, 3) + _w / 2  #[128,3]
    shift_y = (y1 - _h) + _h * np.arange(0, 3) + _h / 2
    offset = torch.from_numpy(np.hstack(
        (-_w / 4, -_h / 4, _w / 4, _h / 4)))  #[128,4]
    offset = offset.type_as(rois).float()

    #rois[0] anchor
    shift_xx, shift_yy = np.meshgrid(shift_x[0], shift_y[0])  #[3,3]

    offset0 = offset[0]  #[1,4]
    shifts0 = torch.from_numpy(
        np.vstack((shift_xx.ravel(), shift_yy.ravel(), shift_xx.ravel(),
                   shift_yy.ravel())).transpose())  #[4,9]----->[9,4]
    shifts0 = shifts0.contiguous().type_as(rois).float()
    gt = offset0 + shifts0  #[9,4]

    import ipdb
    #ipdb.set_trace()
    ww = gt[:, 2] - gt[:, 0] + 1
    hhh = gt[:, 3] - gt[:, 1] + 1
    min_size = 16

    keep = ((gt[:, 0] < 0) | (gt[:, 1] < 0) | (gt[:, 2] >= hw) |
            (gt[:, 3] >= hh) | (ww < min_size) | (hhh < min_size))
    if torch.sum(keep) > 0:
        gt[keep] = rois[0][1:5]

    gt = torch.cat((gt[:4, :], gt[5:, :]), 0)

    #gt = np.delete(gt,4,0)#[8,4]
    A = rois.size(0)  #128
    K = gt.shape[0]  #8
    for i in range(1, rois.size(0)):
        shift_xx, shift_yy = np.meshgrid(shift_x[i], shift_y[i])  #[3,3]

        shifts =  torch.from_numpy(np.vstack((shift_xx.ravel(),shift_yy.ravel(),\
                                        shift_xx.ravel(),shift_yy.ravel())).transpose())#[4,9]----->[9,4]
        shifts = shifts.contiguous().type_as(rois).float()
        gti = offset[i] + shifts  #[9,4]
        ww = gti[:, 2] - gti[:, 0] + 1
        hhh = gti[:, 3] - gti[:, 1] + 1

        keep = ((gti[:, 0] < 0) | (gti[:, 1] < 0) | (gti[:, 2] >= hw) |
                (gti[:, 3] >= hh) | (ww < min_size) | (hhh < min_size))
        if torch.sum(keep) > 0:
            gti[keep] = rois[i][1:5]
        gti = torch.cat((gti[:4, :], gti[5:, :]), 0)
        gt = torch.cat((gt, gti), 0)  #[1024,4]

    gt = gt.view(batch_size, -1, 4)  #[1,1024,4]
    # check if surpass the bound

    all_anchors = rois[:, 1:5]  #[128,5]]
    total_anchors = int(K * A)  #128*9=1024

    overlaps = bbox_overlaps_batch(all_anchors, gt)  #[1,128,1024]
    max_overlaps, argmax_overlaps = torch.max(overlaps, 2)  #[1,128]
    gt_max_overlaps, _ = torch.max(overlaps, 1)  #[1,1024]

    inds_inside = torch.zeros(gt.size(1)).view(-1)  #[1024,1]
    labels = gt.new(batch_size, inds_inside.size(0)).fill_(0)  #[1,1024]
    gt_max_overlaps[gt_max_overlaps == 0] = 1e-5
    keep = torch.sum(
        overlaps.eq(
            gt_max_overlaps.view(batch_size, 1, -1).expand_as(overlaps)), 2)

    width_cell = gt[:, :, 2] - gt[:, :, 0]
    height_cell = gt[:, :, 3] - gt[:, :, 1]
    max_cell = np.maximum(width_cell, height_cell)
    min_cell = np.minimum(width_cell, height_cell)
    # intersect with rois >0.3
    # width height cell

    labels[gt_max_overlaps >= 0.3] = 1

    import ipdb
    #    ipdb.set_trace()
    if torch.sum(labels == 1) > 0:
        width = gt.new(batch_size, inds_inside.size(0)).fill_(0)
        height = gt.new(batch_size, inds_inside.size(0)).fill_(0)
        width[labels ==
              1] = rois[_[labels == 1]][:, 3] - rois[_[labels == 1]][:, 1]
        height[labels ==
               1] = rois[_[labels == 1]][:, 4] - rois[_[labels == 1]][:, 2]
        max = np.maximum(width, height)
        min = np.minimum(width, height)
        labels[max >= max_cell] = 0
        labels[min < 1 / 3 * min_cell] = 0
    if torch.sum(labels == 1) > 0:
        gt[labels == 1] = rois[_[labels == 1]][:, 1:5]
    labels[:] = 0
    #gt = np.insert(gt,0,values = labels,axis = 2).view(-1,5)
    gt = torch.cat((labels.view(batch_size, -1, 1), gt), -1)
    return gt
示例#6
0
    def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image,
                             rois_per_image, num_classes):
        """Generate a random sample of RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)

        overlaps = bbox_overlaps_batch(all_rois, gt_boxes)

        max_overlaps, gt_assignment = torch.max(overlaps, 2)

        batch_size = overlaps.size(0)
        num_proposal = overlaps.size(1)
        num_boxes_per_img = overlaps.size(2)

        offset = torch.arange(0, batch_size) * gt_boxes.size(1)
        offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment

        # labels = gt_boxes[:,:,4].contiguous().view(-1).index(offset.view(-1)).view(batch_size, -1)
        labels = gt_boxes[:, :, 4].contiguous().view(-1).index(
            (offset.view(-1), )).view(batch_size, -1)

        labels_batch = labels.new(batch_size, rois_per_image).zero_()
        rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_()
        gt_assign_batch = all_rois.new(batch_size, rois_per_image).zero_()
        gt_rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_()
        # Guard against the case when an image has fewer than max_fg_rois_per_image
        # foreground RoIs
        for i in range(batch_size):

            fg_inds = torch.nonzero(
                max_overlaps[i] >= cfg.TRAIN.FG_THRESH).view(-1)
            fg_num_rois = fg_inds.numel()

            # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
            bg_inds = torch.nonzero(
                (max_overlaps[i] < cfg.TRAIN.BG_THRESH_HI)
                & (max_overlaps[i] >= cfg.TRAIN.BG_THRESH_LO)).view(-1)
            bg_num_rois = bg_inds.numel()

            if fg_num_rois > 0 and bg_num_rois > 0:
                # sampling fg
                fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois)

                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                #rand_num = torch.randperm(fg_num_rois).long().cuda()
                rand_num = torch.from_numpy(np.random.permutation(
                    fg_num_rois)).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]]

                # sampling bg
                bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image

                # Seems torch.rand has a bug, it will generate very large number and make an error.
                # We use numpy rand instead.
                #rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(bg_rois_per_this_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                bg_inds = bg_inds[rand_num]

            elif fg_num_rois > 0 and bg_num_rois == 0:
                # sampling fg
                #rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(rois_per_image) * fg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num]
                fg_rois_per_this_image = rois_per_image
                bg_rois_per_this_image = 0
            elif bg_num_rois > 0 and fg_num_rois == 0:
                # sampling bg
                #rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(rois_per_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()

                bg_inds = bg_inds[rand_num]
                bg_rois_per_this_image = rois_per_image
                fg_rois_per_this_image = 0
            else:
                raise ValueError(
                    "bg_num_rois = 0 and fg_num_rois = 0, this should not happen!"
                )

            # The indices that we're selecting (both fg and bg)
            keep_inds = torch.cat([fg_inds, bg_inds], 0)

            # Select sampled values from various arrays:
            labels_batch[i].copy_(labels[i][keep_inds])

            # Clamp labels for the background RoIs to 0
            labels_batch[i][fg_rois_per_this_image:] = 0

            rois_batch[i] = all_rois[i][keep_inds]
            rois_batch[i, :, 0] = i

            # TODO: check the below line when batch_size > 1, no need to add offset here
            gt_assign_batch[i] = gt_assignment[i][keep_inds]

            gt_rois_batch[i] = gt_boxes[i][gt_assignment[i][keep_inds]]

        bbox_target_data = self._compute_targets_pytorch(
            rois_batch[:, :, 1:5], gt_rois_batch[:, :, :4])

        bbox_targets, bbox_inside_weights = \
                self._get_bbox_regression_labels_pytorch(bbox_target_data, labels_batch, num_classes)

        return labels_batch, rois_batch, gt_assign_batch, bbox_targets, bbox_inside_weights
示例#7
0
    def forward(self, input):
        # [1, 256, 5]
        rois = input[1]
        batch_size = rois.size(0)
        # [1, 256, 21]
        cls_prob = input[0].view(batch_size, -1, input[0].size(1))
        # [1, 20, 5]
        gt_boxes = input[2]
        # [1, 20, 3]
        crowdsourced_classes = input[3]
        num_annotator = crowdsourced_classes.size(2)
        # [3, 21, 21]
        alpha_con = input[4]
        rois_label = input[5]

        overlaps = bbox_overlaps_batch(rois, gt_boxes)

        max_overlaps, gt_assignment = torch.max(overlaps, 2)

        alpha = get_alpha(alpha_con)

        # TODO batch_size 不为1 时可能有bug
        # 前景rois索引
        fg_rois_ix = torch.nonzero(rois_label != 0).view(-1)
        for i in range(batch_size):
            # 前景bbox 索引
            fg_gt_boxes_ix = torch.nonzero(gt_boxes[i, :, 4] != 0).view(-1)
            for ix in fg_gt_boxes_ix:
                # 搜集所有的与该bbox IoU最大的前景roi
                rois_ix = torch.nonzero(
                    gt_assignment[i, fg_rois_ix] == ix).view(-1)

                # 留下的rois没有与该gt box匹配的
                if rois_ix.size(0) == 0:
                    continue

                # 映射全部rois下的索引
                rois_ix = fg_rois_ix[rois_ix]
                # 推理这个fg_gt_boxes的类别
                # p暂时用rois的类别平均概率 (后可以试试用IoU分配权重)size[21]
                mean_cls_prob = torch.mean(cls_prob[i, rois_ix, :], dim=0)
                tmp = torch.ones(mean_cls_prob.size()).type_as(mean_cls_prob)

                for c in range(tmp.size(0)):
                    tmp[c] *= mean_cls_prob[c]
                    for j in range(num_annotator):
                        tmp[c] *= alpha[j, c, crowdsourced_classes[i, ix, j]]

                tmp = tmp / tmp.sum()

                # 推理出的类别 不能为0
                _, mu = torch.max(tmp[1:], dim=0)
                mu += 1

                # 更改 rois_label
                rois_label[rois_ix] = mu
                # 更新 alpha_con
                for j in range(num_annotator):
                    alpha_con[j, mu, crowdsourced_classes[i, ix, j]] += 1

        return rois_label
示例#8
0
    def forward(self, im_data, gt):
        # object detection
        if self.training:
            self._train_iter_conter += 1

        self.batch_size = im_data.size(0)

        gt_boxes = gt['boxes']
        gt_grasps = gt['grasps']
        gt_grasp_inds = gt['grasp_inds']
        num_boxes = gt['num_boxes']
        num_grasps = gt['num_grasps']
        im_info = gt['im_info']
        rel_mat = gt['rel_mat']

        # feed image data to base model to obtain base feature map
        base_feat = self.VMRN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.VMRN_obj_rpn(base_feat, im_info, gt_boxes, num_boxes)

        # rois preprocess
        if self.training:
            obj_det_rois = rois[:,:cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET]
            roi_data = self.VMRN_obj_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            grasp_rois = rois.clone()
            rois = torch.cat([obj_det_rois,rois],1)

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        pooled_feat = self._roi_pooing(base_feat, rois)

        if self.training:
            pooled_feat_shape = pooled_feat.size()
            pooled_feat = pooled_feat.contiguous().view((self.batch_size, -1) + pooled_feat_shape[1:])
            grasp_feat = pooled_feat[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:].\
                contiguous().view((-1,) + pooled_feat_shape[1:])
            pooled_feat = pooled_feat.view(pooled_feat_shape)
            if self._MGN_USE_POOLED_FEATS:
                rois_overlaps = bbox_overlaps_batch(rois[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:], gt_boxes)
                # bs x N_{rois}
                _, rois_inds = torch.max(rois_overlaps, dim=2)
                rois_inds += 1
                grasp_rois_mask = rois_label.view(-1) > 0
            else:
                raise NotImplementedError

        ############################################
        # OBJECT DETECTION
        ############################################
        # feed pooled features to top model
        VMRN_feat = self._head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.VMRN_obj_bbox_pred(VMRN_feat)
        if self.training:
            if self.class_agnostic:
                bbox_pred = bbox_pred.contiguous().view(self.batch_size, -1, 4)
            else:
                bbox_pred = bbox_pred.contiguous().view(self.batch_size, -1, 4 * self.n_classes)
            obj_det_bbox_pred = bbox_pred[:,:cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET]
            bbox_pred = bbox_pred[:,cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:]
            if self.class_agnostic:
                obj_det_bbox_pred = obj_det_bbox_pred.contiguous().view(-1, 4)
                bbox_pred = bbox_pred.contiguous().view(-1, 4)
            else:
                obj_det_bbox_pred = obj_det_bbox_pred.contiguous().view(-1, 4 * self.n_classes)
                bbox_pred = bbox_pred.contiguous().view(-1, 4 * self.n_classes)

        # compute object classification probability
        cls_score = self.VMRN_obj_cls_score(VMRN_feat)
        cls_prob = F.softmax(cls_score)
        if self.training:
            cls_score = cls_score.contiguous().view(self.batch_size, -1, self.n_classes)
            obj_det_cls_score = cls_score[:, :cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET]
            cls_score = cls_score[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:]
            obj_det_cls_score = obj_det_cls_score.contiguous().view(-1, self.n_classes)
            cls_score = cls_score.contiguous().view(-1, self.n_classes)

            cls_prob = cls_prob.contiguous().view(self.batch_size, -1, self.n_classes)
            obj_det_cls_prob = cls_prob[:, :cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET]
            cls_prob = cls_prob[:, cfg.TRAIN.VMRN.TOP_N_ROIS_FOR_OBJ_DET:]
            obj_det_cls_prob = obj_det_cls_prob.contiguous().view(-1, self.n_classes)
            cls_prob = cls_prob.contiguous().view(-1, self.n_classes)

        VMRN_obj_loss_cls = 0
        VMRN_obj_loss_bbox = 0

        # compute object detector loss
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(bbox_pred_view, 1,
                                            rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        if self.training:
            # classification loss
            VMRN_obj_loss_cls = F.cross_entropy(cls_score, rois_label)
            # bounding box regression L1 loss
            VMRN_obj_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)

        ############################################
        # VISUAL MANIPULATION RELATIONSHIP
        ############################################
        # online data
        if self.training:
            if self._train_iter_conter > cfg.TRAIN.VMRN.ONLINEDATA_BEGIN_ITER:
                obj_rois, obj_num = self._obj_det(obj_det_rois,
                        obj_det_cls_prob.contiguous().view(self.batch_size, -1, self.n_classes),
                        obj_det_bbox_pred.contiguous().view(self.batch_size,
                                -1, 4 if self.class_agnostic else 4 * self.n_classes),
                        self.batch_size, im_info)
                obj_rois = obj_rois.type_as(gt_boxes)
                obj_num = obj_num.type_as(num_boxes)
            else:
                obj_rois = torch.FloatTensor([]).type_as(gt_boxes)
                obj_num = torch.LongTensor([]).type_as(num_boxes)
            obj_labels = None
        else:
            # when testing, this is object detection results
            # TODO: SUPPORT MULTI-IMAGE BATCH
            obj_rois, obj_num = self._obj_det(rois,
                    cls_prob.contiguous().view(self.batch_size, -1, self.n_classes),
                    bbox_pred.contiguous().view(self.batch_size,
                                -1, 4 if self.class_agnostic else 4 * self.n_classes),
                    self.batch_size, im_info)
            if obj_rois.numel() > 0:
                obj_labels = obj_rois[:,5]
                obj_rois = obj_rois[:,:5]
                obj_rois = obj_rois.type_as(gt_boxes)
                obj_num = obj_num.type_as(num_boxes)
            else:
                # there is no object detected
                obj_labels = torch.Tensor([]).type_as(gt_boxes).long()
                obj_rois = obj_rois.type_as(gt_boxes)
                obj_num = obj_num.type_as(num_boxes)

        # offline data
        if self.training:
            for i in range(self.batch_size):
                obj_rois = torch.cat([obj_rois,
                                  torch.cat([(i * torch.ones(num_boxes[i].item(),1)).type_as(gt_boxes),
                                             (gt_boxes[i][:num_boxes[i]][:,0:4])],1)
                                  ])
                obj_num = torch.cat([obj_num,torch.Tensor([num_boxes[i]]).type_as(obj_num)])

        obj_rois = Variable(obj_rois)

        if obj_rois.size(0)>1:
            # filter out the detection of only one object instance
            obj_pair_feat = self.VMRN_rel_op2l(base_feat, obj_rois, self.batch_size, obj_num)
            # obj_pair_feat = obj_pair_feat.detach()
            obj_pair_feat = self._rel_head_to_tail(obj_pair_feat)
            rel_cls_score = self.VMRN_rel_cls_score(obj_pair_feat)

            rel_cls_prob = F.softmax(rel_cls_score)

            VMRN_rel_loss_cls = 0
            if self.training:
                self.rel_batch_size = rel_cls_prob.size(0)

                obj_pair_rel_label = self._generate_rel_labels(obj_rois, gt_boxes, obj_num, rel_mat)
                obj_pair_rel_label = obj_pair_rel_label.type_as(gt_boxes).long()

                rel_not_keep = (obj_pair_rel_label == 0)
                rel_keep = torch.nonzero(rel_not_keep == 0).view(-1)

                rel_cls_score = rel_cls_score[rel_keep]
                obj_pair_rel_label = obj_pair_rel_label[rel_keep]

                obj_pair_rel_label -= 1

                VMRN_rel_loss_cls = F.cross_entropy(rel_cls_score, obj_pair_rel_label)
            else:
                if (not cfg.TEST.VMRN.ISEX) and cfg.TRAIN.VMRN.ISEX:
                    rel_cls_prob = rel_cls_prob[::2,:]

        else:
            VMRN_rel_loss_cls = 0
            # no detected relationships
            rel_cls_prob = Variable(torch.Tensor([]).type_as(obj_labels))

        rel_result = None
        if not self.training:
            if obj_rois.numel() > 0:
                pred_boxes = obj_rois.data[:,1:5]
                pred_boxes[:, 0::2] /= im_info[0][3].item()
                pred_boxes[:, 1::2] /= im_info[0][2].item()
                rel_result = (pred_boxes, obj_labels, rel_cls_prob.data)
            else:
                rel_result = (obj_rois.data, obj_labels, rel_cls_prob.data)

        ############################################
        # ROI-BASED GRASP DETECTION
        ############################################
        if self.training:
            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._MGN_head_to_tail(grasp_feat[grasp_rois_mask])
            else:
                # when there are no one positive rois:
                grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_bbox_loss = Variable(torch.Tensor([0]).type_as(VMRN_obj_loss_bbox))
                grasp_cls_loss = Variable(torch.Tensor([0]).type_as(VMRN_obj_loss_cls))
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \
                    VMRN_obj_loss_cls, VMRN_obj_loss_bbox, VMRN_rel_loss_cls, rois_label, \
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._MGN_head_to_tail(pooled_feat)

        grasp_pred = self.MGN_classifier(grasp_feat)
        # bs*N x K*A x 5, bs*N x K*A x 2
        grasp_loc, grasp_conf = grasp_pred

        # generate anchors
        # bs*N x K*A x 5
        if self.training:
            grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), grasp_rois)
        else:
            grasp_all_anchors = self._generate_anchors(grasp_conf.size(1), grasp_conf.size(2), rois)
        # filter out negative samples
        grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps)
        if self.training:
            grasp_all_anchors = grasp_all_anchors[grasp_rois_mask]
            # bs*N x 1 x 1
            rois_w = (grasp_rois[:, :, 3] - grasp_rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_h = (grasp_rois[:, :, 4] - grasp_rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_w = rois_w[grasp_rois_mask]
            rois_h = rois_h[grasp_rois_mask]
            # bs*N x 1 x 1
            fsx = rois_w / grasp_conf.size(1)
            fsy = rois_h / grasp_conf.size(2)
            # bs*N x 1 x 1
            xleft = grasp_rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2)
            ytop = grasp_rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2)
            xleft = xleft[grasp_rois_mask]
            ytop = ytop[grasp_rois_mask]

        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_batch_size = grasp_loc.size(0)

        # bs*N x K*A x 2
        grasp_prob = F.softmax(grasp_conf, 2)

        grasp_bbox_loss = 0
        grasp_cls_loss = 0
        grasp_conf_label = None
        if self.training:
            # inside weights indicate which bounding box should be regressed
            # outside weidhts indicate two things:
            # 1. Which bounding box should contribute for classification loss,
            # 2. Balance cls loss and bbox loss
            grasp_gt_xywhc = points2labels(gt_grasps)
            # bs*N x N_{Gr_gt} x 5
            grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds)
            # filter out negative samples
            grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]

            # absolute coords to relative coords
            grasp_gt_xywhc[:, :, 0:1] -= xleft
            grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1], min = 0)
            grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1], rois_w)
            grasp_gt_xywhc[:, :, 1:2] -= ytop
            grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2], min = 0)
            grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2], rois_h)

            # grasp training data
            grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.MGN_proposal_target(grasp_conf,
                                        grasp_gt_xywhc, grasp_all_anchors,xthresh = fsx/2, ythresh = fsy/2)

            grasp_keep = Variable(grasp_conf_label.view(-1).ne(-1).nonzero().view(-1))
            grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0, grasp_keep.data)
            grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0, grasp_keep.data)
            grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label)

            grasp_iw = Variable(grasp_iw)
            grasp_ow = Variable(grasp_ow)
            grasp_loc_label = Variable(grasp_loc_label)
            grasp_bbox_loss = _smooth_l1_loss(grasp_loc, grasp_loc_label, grasp_iw, grasp_ow, dim = [2,1])

        return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \
                VMRN_obj_loss_cls, VMRN_obj_loss_bbox, VMRN_rel_loss_cls, rois_label, \
                grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
示例#9
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so onif self.training:

        # part_threshold = 0.25
        #
        # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls)
        overlaps = bbox_overlaps_batch(rois, rois)[0]

        N_node, _ = overlaps.shape

        node_list = [i for i in range(N_node)]

        for j in range(N_node):
            for k in range(N_node):
                if overlaps[j][k] != 0:
                    overlaps[j][k] = 1
                if k == j:
                    overlaps[j][k] = 0

        idx_subgraph, vertex_subgraph = subgraph_split(overlaps)

        # max_overlaps_rois2gt, roi2gt_assignment = torch.max(overlaps, 1)
        #
        # # second, calculate the overlaps among rois and rois_select,
        # # using threshold to select roi for each rois_select (node_part)
        #
        # rois_cls_tmp = rois[:, roi2gt_assignment[0], :]
        # rois_cls_num = np.argwhere(gt_boxes[:, :, 4].cpu().data.numpy()[0] != 0).shape[0]
        # rois_cls_tmp = rois_cls_tmp[:,:rois_cls_num, :]
        # rois_cls = rois_cls_tmp.new(rois_cls_tmp.size(0), rois_cls_tmp.size(1), 5).zero_()
        # rois_cls[:, :, :4] = rois_cls_tmp[:, :, 1:5]
        # rois_cls[:, :, 4] = rois_cls_tmp[:, :, 0]
        #
        # # rois_cls_idx_list is the idx related from rois_cls to rois
        # roi_cls_idx_list = roi2gt_assignment[0][:rois_cls_num]
        #
        # overlaps = bbox_overlaps_batch(rois, rois_cls)
        # max_overlaps_rois2cls, roi2cls_assignment = torch.max(overlaps, 2)
        #
        # roi_part_match_overlap = max_overlaps_rois2cls.cpu().data.numpy()
        # roi_part_match = roi2cls_assignment.cpu().data.numpy()
        #
        # # roi_part_idx_list is the idx related from rois_part to rois
        # roi_part_idx_list = []
        # roi_part_match_idx = np.unique(roi_part_match[0])
        # for roi_cls_idx in roi_part_match_idx:
        #     match_idx_tmp = np.transpose(np.argwhere(roi_part_match[0] == roi_cls_idx))[0]
        #     match_overlap_tmp = roi_part_match_overlap[0][match_idx_tmp]
        #     # use threshold to select rois_part
        #     match_idx_tmp_select = np.transpose(np.argwhere(match_overlap_tmp > part_threshold))[0]
        #     match_idx_tmp = match_idx_tmp[match_idx_tmp_select]
        #     roi_part_idx_list.append(torch.from_numpy(match_idx_tmp))

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # # update 20191027: build graph for rois based on index (default: batch_size = 1)
        # adj_jud = np.zeros((0))
        # adj_rois = torch.zeros(0).cuda().long()
        # for i in range(roi_cls_idx_list.shape[0]):
        #     adj_jud = np.concatenate((adj_jud, [1]))
        #     adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i+1]))
        #     try:
        #         adj_jud = np.concatenate((adj_jud, np.zeros((roi_part_idx_list[i].shape[0]))))
        #         adj_rois = torch.cat((adj_rois, roi_part_idx_list[i].cuda()))
        #     except IndexError:
        #         print ('IndexError happen, continue')
        #         continue
        #
        # node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0]
        #
        # adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # # link edges for node_cls to node_cls
        # for k in range(len(node_cls_idx)-1):
        #     idx_node_cls_1 = node_cls_idx[k]
        #     idx_node_cls_2 = node_cls_idx[k + 1]
        #     adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1
        #     adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1
        #
        # # link edges for node_cls to related node_part
        # for k in range(len(node_cls_idx)-1):
        #     idx_start = node_cls_idx[k]
        #     idx_end = node_cls_idx[k + 1]
        #     for s in range(idx_start, idx_end):
        #         for t in range(idx_start, idx_end):
        #             if s == t:
        #                 adj_matrix_bin[s, t] = 0
        #             else:
        #                 adj_matrix_bin[s, t] = 1

        # # calculate the adj_mat based on adj_matrix_bin, the weights on edges are the cosine distance between nodes
        # adj_matrix = np.zeros((len(adj_jud), len(adj_jud)))
        #
        # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        #
        # for s in range(len(adj_jud)):
        #     for t in range(len(adj_jud)):
        #         if adj_matrix_bin[s, t] == 1:
        #             node_feat_s = pooled_feat[adj_rois[s], :]
        #             node_feat_t = pooled_feat[adj_rois[t], :]
        #             adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
        #         else:
        #             adj_matrix[s, t] = 0
        #
        # adj_matrix = torch.from_numpy(adj_matrix).float().cuda()
        #
        # pooled_feat[adj_rois, :] = F.relu(self.gcn1(pooled_feat[adj_rois, :], adj_matrix))
        # pooled_feat[adj_rois, :] = F.relu(self.gcn2(pooled_feat[adj_rois, :], adj_matrix))

        # adj_jud = np.zeros((N_node, N_node))
        adj_matrix = np.zeros((N_node, N_node))
        #
        # for k in range(idx_subgraph):
        #     idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0]
        #     for s in range(idx_k.shape[0]):
        #         for t in range(idx_k.shape[0]):
        #             if s == t:
        #                 adj_jud[s, t] = 0
        #             else:
        #                 adj_jud[s, t] = 1
        #
        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

        for s in range(N_node):
            for t in range(N_node):
                #if adj_jud[s,t] != 0:
                if s != t:
                    node_feat_s = pooled_feat[s, :]
                    node_feat_t = pooled_feat[t, :]
                    adj_matrix[s, t] = cos(node_feat_s, node_feat_t)

        adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

        pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
        pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so onif self.training:

        part_threshold = 0.5

        # the shape of rois is 1,300,5, however, there is no 300 proposal after nms, so the last of the rois is all 0s
        rois_none_idx = 300
        for i in range(rois.shape[1]):
            if rois[:, i, :].sum() <= 0:
                rois_none_idx = i
                break

        # # first, calculate the overlaps among rois and gt, get the max roi for each gt (node_cls)
        overlaps = bbox_overlaps_batch(rois[:, :rois_none_idx, :],
                                       rois[:, :rois_none_idx, :])[0]

        N_node, _ = overlaps.shape

        overlaps_bin = overlaps.cpu().data.numpy().copy()

        for j in range(N_node):
            for k in range(N_node):
                if overlaps_bin[j][k] >= part_threshold:
                    overlaps_bin[j][k] = 1
                else:
                    overlaps_bin[j][k] = 0
                if k == j:
                    overlaps_bin[j][k] = 0

        idx_subgraph, vertex_subgraph = subgraph_split(overlaps_bin)

        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # # update 20191105: build graph for rois based on index (default: batch_size = 1)

        roi_all_idx_list = []
        roi_cls_idx_list = []
        roi_part_idx_list = []

        adj_jud = np.zeros((0))
        adj_rois = torch.zeros(0).cuda().long()

        for k in range(idx_subgraph):
            idx_k = np.transpose(np.argwhere(vertex_subgraph == k))[0]
            roi_all_idx_list.append(idx_k)

        overlaps = overlaps.cpu().data.numpy()

        # 选度数最大的点作为node_cls
        for i in range(len(roi_all_idx_list)):
            rois_idx = roi_all_idx_list[i]

            # consider the size of rois_select larger than 5, the rois_select is probably an object
            if rois_idx.shape[0] < 5:
                continue

            overlaps_once = overlaps[rois_idx][:, rois_idx]
            overlaps_once_bin = overlaps_bin[rois_idx][:, rois_idx]

            N_node_once, _ = overlaps_once.shape

            ########## update 20191104: select IoU > threshold
            # for j in range(N_node_once):
            #     for k in range(N_node_once):
            #         if overlaps_once[j][k] >= part_threshold:
            #             overlaps_once[j][k] = 1
            #         else:
            #             overlaps_once[j][k] = 0
            #         if k == j:
            #             overlaps_once[j][k] = 0

            # overlaps_once = np.sum(overlaps_once, axis=1)
            #
            # rois_once_max_idx = np.argmax(overlaps_once)
            # roi_cls_idx_list.append(rois_idx[rois_once_max_idx])
            #
            # roi_part_tmp = []
            # for k in range(rois_idx.shape[0]):
            #     if overlaps[rois_idx[rois_once_max_idx]][k] == 0:
            #         continue
            #     roi_part_tmp.append(rois_idx[k])
            # roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp))
            # roi_part_idx_list.append(roi_part_tmp)

            ########## update 20191107: all proposal

            overlaps_once_bin = np.sum(overlaps_once_bin, axis=1)

            rois_once_max_idx = np.argmax(overlaps_once_bin)
            roi_cls_idx_list.append(rois_idx[rois_once_max_idx])

            roi_part_tmp = []
            roi_iou = overlaps_once[rois_once_max_idx]
            roi_part_num_threshold = 10
            if roi_iou.shape[0] >= roi_part_num_threshold:
                roi_order = np.argsort(roi_iou)[::-1]
                for ii in range(roi_part_num_threshold):
                    roi_part_tmp.append(rois_idx[roi_order[ii]])
            else:
                for k in range(rois_idx.shape[0]):
                    if overlaps[rois_idx[rois_once_max_idx]][k] == 0:
                        continue
                    roi_part_tmp.append(rois_idx[k])
            roi_part_tmp = torch.from_numpy(np.array(roi_part_tmp))
            roi_part_idx_list.append(roi_part_tmp)

        roi_cls_idx_list = torch.from_numpy(np.array(roi_cls_idx_list)).cuda()

        for i in range(roi_cls_idx_list.shape[0]):
            adj_jud = np.concatenate((adj_jud, [1]))
            adj_rois = torch.cat((adj_rois, roi_cls_idx_list[i:i + 1]))
            try:
                if roi_part_idx_list[i].shape[0] != 0:
                    adj_jud = np.concatenate(
                        (adj_jud, np.zeros((roi_part_idx_list[i].shape[0]))))
                    adj_rois = torch.cat(
                        (adj_rois, roi_part_idx_list[i].cuda()))
            except IndexError:
                print('IndexError happen, continue')
                continue

        node_cls_idx = np.transpose(np.argwhere(adj_jud == 1))[0]

        adj_matrix_bin = np.zeros((len(adj_jud), len(adj_jud)))

        # link edges for node_cls to node_cls
        for k in range(len(node_cls_idx) - 1):
            idx_node_cls_1 = node_cls_idx[k]
            idx_node_cls_2 = node_cls_idx[k + 1]
            adj_matrix_bin[idx_node_cls_1, idx_node_cls_2] = 1
            adj_matrix_bin[idx_node_cls_2, idx_node_cls_1] = 1

        # link edges for node_cls to related node_part
        for k in range(len(node_cls_idx) - 1):
            idx_start = node_cls_idx[k]
            idx_end = node_cls_idx[k + 1]
            for s in range(idx_start, idx_end):
                for t in range(idx_start, idx_end):
                    if s == t:
                        adj_matrix_bin[s, t] = 0
                    else:
                        adj_matrix_bin[s, t] = 1

        cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)
        adj_matrix = np.zeros((len(adj_jud), len(adj_jud)))

        for s in range(len(adj_jud)):
            for t in range(len(adj_jud)):
                if adj_matrix_bin[s, t] == 1:
                    node_feat_s = pooled_feat[adj_rois[s], :]
                    node_feat_t = pooled_feat[adj_rois[t], :]
                    adj_matrix[s, t] = cos(node_feat_s, node_feat_t)
                else:
                    adj_matrix[s, t] = 0

        adj_matrix = torch.from_numpy(adj_matrix).float().cuda()

        try:
            pooled_feat[adj_rois, :] = F.relu(
                self.gcn1(pooled_feat[adj_rois, :], adj_matrix))
            pooled_feat[adj_rois, :] = F.relu(
                self.gcn2(pooled_feat[adj_rois, :], adj_matrix))
        except RuntimeError:
            print(pooled_feat[adj_rois, :].size())
            print(adj_matrix.size())

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
示例#11
0
    def forward(self, im_data, im_info, gt_boxes, num_boxes):
        batch_size = im_data.size(0)

        im_info = im_info.data
        gt_boxes = gt_boxes.data
        num_boxes = num_boxes.data

        # feed image data to base model to obtain base feature map
        base_feat = self.RCNN_base(im_data)

        # feed base feature map tp RPN to obtain rois
        rois, rpn_loss_cls, rpn_loss_bbox, num_proposal = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        rois = Variable(rois)
        # do roi pooling based on predicted rois
        if cfg.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        pooled_feat = self._head_to_tail(pooled_feat)

        # update 20191026: get the index of nodes in graph for rois (default: batch_size = 1)
        # if we want to change batch_size, we should consider to change roi2gt_assignment[0]
        # roi_part_match[0] and  roi_part_match_overlap[0] and so on

        iou_threshold = 0.7
        dis_threshold = 0.01
        # part_size = 10
        # relation_size = 5
        iou_size = 6
        edge_size = 4
        child_size = 4
        batch = 0
        if True:
            if not self.training:
                rois = rois[:, :num_proposal, :]
                pooled_feat = pooled_feat[:num_proposal, :]

            # first, calculate the overlaps among rois, set weights in edges between nodes iou>0.7 to 1
            overlaps = bbox_overlaps_batch(rois, rois)
            # overlaps_bin = overlaps.cpu().data.numpy().copy()

            _, N_node, _ = overlaps.shape
            # second, calculate the distance among rois, set weights in edges between nodes iou=0 and
            distances = bbox_distances_batch(rois, rois)
            # update 20191115: build graph for rois based on index (default: batch_size = 1)
            # feature cosine similarity

            # similarity in PGCN
            dot_product_mat = torch.mm(pooled_feat,
                                       torch.transpose(pooled_feat, 0, 1))
            len_vec = torch.unsqueeze(torch.sqrt(
                torch.sum(pooled_feat * pooled_feat, dim=1)),
                                      dim=0)
            len_mat = torch.mm(torch.transpose(len_vec, 0, 1), len_vec)
            pooled_feat_sim_mat = dot_product_mat / len_mat

            # cos = torch.nn.CosineSimilarity(dim=0, eps=1e-6)

            # calculate the adj_mat based on iou and distance, the weights on edges are the cosine similarity between nodes
            mask = torch.eye(N_node, N_node).cuda()
            for s in range(N_node):

                overlap_node_index = (overlaps[batch][s] >=
                                      iou_threshold).nonzero()
                overlap_node_size = iou_size if overlap_node_index.shape[
                    0] > iou_size else overlap_node_index.shape[0]
                overlap_node_random = torch.randperm(
                    overlap_node_index.shape[0])[0:overlap_node_size]
                overlap_node_index_select = overlap_node_index[
                    overlap_node_random]

                # TODO(junjie) remove the iou box in distance box.

                distance_node_index = (distances[batch][s] <
                                       dis_threshold).nonzero()
                distance_node_size = iou_size if distance_node_index.shape[
                    0] > iou_size else distance_node_index.shape[0]
                distance_node_random = torch.randperm(
                    distance_node_index.shape[0])[0:distance_node_size]
                distance_node_index_select = distance_node_index[
                    distance_node_random]

                _node_index_select = torch.cat(
                    (overlap_node_index_select, distance_node_index_select),
                    dim=0)
                if _node_index_select.shape[0] == 0:
                    continue
                else:
                    _node_index_select = _node_index_select.squeeze(dim=1)
                _node_size = child_size if _node_index_select.shape[
                    0] > child_size else _node_index_select.shape[0]
                _node_index_select_random = torch.randperm(
                    _node_index_select.shape[0])[0:_node_size]
                node_index_select = _node_index_select[
                    _node_index_select_random]

                mask[s, node_index_select] = 1
                # print("test ")

            adj_matrix = torch.mul(mask, pooled_feat_sim_mat)

            pooled_feat = F.relu(self.gcn1(pooled_feat, adj_matrix))
            pooled_feat = F.relu(self.gcn2(pooled_feat, adj_matrix))

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(pooled_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(pooled_feat)
        cls_prob = F.softmax(cls_score, 1)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        # update 2019-6-17:fix the bug for dimension specified as 0...
        if self.training:
            rpn_loss_cls = torch.unsqueeze(rpn_loss_cls, 0)
            rpn_loss_bbox = torch.unsqueeze(rpn_loss_bbox, 0)
            RCNN_loss_cls = torch.unsqueeze(RCNN_loss_cls, 0)
            RCNN_loss_bbox = torch.unsqueeze(RCNN_loss_bbox, 0)

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
    def forward(self, im_data, gt):
        batch_size = im_data.size(0)

        gt_boxes = gt['boxes']
        # for jacquard dataset, the bounding box labels are set to -1. For training, we set them to 1, which does not
        # affect the training process.
        if self.training:
            if gt_boxes[:, :, -1].sum().item() < 0:
                gt_boxes[:, :, -1] = -gt_boxes[:, :, -1]
        gt_grasps = gt['grasps']
        gt_grasp_inds = gt['grasp_inds']
        num_boxes = gt['num_boxes']
        num_grasps = gt['num_grasps']
        im_info = gt['im_info']

        for i in range(batch_size):
            if torch.sum(gt_grasp_inds[i]).item() == 0:
                gt_grasp_inds[i, :num_grasps[i].item()] = 1

        # features
        base_feat = self.base(im_data)

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
            rois_label = Variable(rois_label.view(-1).long())
        else:
            rois_label = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if cfg.MGN.USE_FIXED_SIZE_ROI:
            _rois = rois.view(-1, 5)
            rois_cx = (_rois[:, 1:2] + _rois[:, 3:4]) / 2
            rois_cy = (_rois[:, 2:3] + _rois[:, 4:5]) / 2
            rois_xmin = torch.clamp(rois_cx - 100, min=1, max=600)
            rois_ymin = torch.clamp(rois_cy - 100, min=1, max=600)
            rois_xmax = rois_xmin + 200
            rois_ymax = rois_ymin + 200
            rois_for_grasp = torch.cat(
                [_rois[:, :1], rois_xmin, rois_ymin, rois_xmax, rois_ymax],
                dim=1)
            if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(rois_for_grasp,
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat = self.RCNN_roi_crop(base_feat,
                                                 Variable(grid_yx).detach())
                if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
                pooled_feat = self.RCNN_roi_align(base_feat, rois_for_grasp)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
                pooled_feat = self.RCNN_roi_pool(base_feat, rois_for_grasp)

        else:
            if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
                # pdb.set_trace()
                # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
                grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                           base_feat.size()[2:],
                                           self.grid_size)
                grid_yx = torch.stack(
                    [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                    3).contiguous()
                pooled_feat = self.RCNN_roi_crop(base_feat,
                                                 Variable(grid_yx).detach())
                if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
                    pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
            elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
                pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
            elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
                pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        # grasp top
        if self.training:
            if self._ROIGN_USE_POOLED_FEATS:
                rois_overlaps = bbox_overlaps_batch(rois, gt_boxes)
                # bs x N_{rois}
                _, rois_inds = torch.max(rois_overlaps, dim=2)
                rois_inds += 1
                grasp_rois_mask = rois_label.view(-1) > 0
            else:
                raise NotImplementedError

        if self.training:
            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._ROIGN_head_to_tail(
                    pooled_feat[grasp_rois_mask])
            else:
                # when there are no one positive rois:
                grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_bbox_loss = Variable(
                    torch.Tensor([0]).type_as(gt_grasps))
                grasp_cls_loss = Variable(torch.Tensor([0]).type_as(gt_grasps))
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._ROIGN_head_to_tail(pooled_feat)

        grasp_pred = self.ROIGN_classifier(grasp_feat)
        # bs*N x K*A x 5, bs*N x K*A x 2
        grasp_loc, grasp_conf = grasp_pred

        # generate anchors
        # bs*N x K*A x 5
        grasp_all_anchors = self._generate_anchors(grasp_conf.size(1),
                                                   grasp_conf.size(2), rois)
        # filter out negative samples
        grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps)
        if self.training:
            grasp_all_anchors = grasp_all_anchors[grasp_rois_mask]
            # bs*N x 1 x 1
            rois_w = (rois[:, :, 3] -
                      rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_h = (rois[:, :, 4] -
                      rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_w = rois_w[grasp_rois_mask]
            rois_h = rois_h[grasp_rois_mask]
            # bs*N x 1 x 1
            fsx = rois_w / grasp_conf.size(1)
            fsy = rois_h / grasp_conf.size(2)
            # bs*N x 1 x 1
            xleft = rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2)
            ytop = rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2)
            xleft = xleft[grasp_rois_mask]
            ytop = ytop[grasp_rois_mask]

        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_batch_size = grasp_loc.size(0)

        # bs*N x K*A x 2
        grasp_prob = F.softmax(grasp_conf, 2)

        grasp_bbox_loss = 0
        grasp_cls_loss = 0
        grasp_conf_label = None
        if self.training:
            # inside weights indicate which bounding box should be regressed
            # outside weidhts indicate two things:
            # 1. Which bounding box should contribute for classification loss,
            # 2. Balance cls loss and bbox loss
            grasp_gt_xywhc = points2labels(gt_grasps)
            # bs*N x N_{Gr_gt} x 5
            grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc,
                                                      gt_grasp_inds, rois_inds)
            # filter out negative samples
            grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]

            # absolute coords to relative coords
            grasp_gt_xywhc[:, :, 0:1] -= xleft
            grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1],
                                                    min=0)
            grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1],
                                                  rois_w)
            grasp_gt_xywhc[:, :, 1:2] -= ytop
            grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2],
                                                    min=0)
            grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2],
                                                  rois_h)

            # grasp training data
            grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.ROIGN_proposal_target(
                grasp_conf,
                grasp_gt_xywhc,
                grasp_all_anchors,
                xthresh=fsx / 2,
                ythresh=fsy / 2)

            grasp_keep = Variable(
                grasp_conf_label.view(-1).ne(-1).nonzero().view(-1))
            grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0,
                                            grasp_keep.data)
            grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0,
                                                  grasp_keep.data)
            grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label)

            grasp_iw = Variable(grasp_iw)
            grasp_ow = Variable(grasp_ow)
            grasp_loc_label = Variable(grasp_loc_label)
            grasp_bbox_loss = _smooth_l1_loss(grasp_loc,
                                              grasp_loc_label,
                                              grasp_iw,
                                              grasp_ow,
                                              dim=[2, 1])

        return rois, rpn_loss_cls, rpn_loss_bbox, rois_label,\
               grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
    def sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image,
                            rois_per_image):
        """Generate a random sample of template RoIs comprising foreground and background
        examples.
        """
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps_batch(all_rois, gt_boxes)

        max_overlaps, gt_assignment = torch.max(overlaps, 2)

        batch_size = overlaps.size(0)
        num_proposal = overlaps.size(1)
        num_boxes_per_img = overlaps.size(2)

        offset = torch.arange(0, batch_size) * gt_boxes.size(1)
        offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment

        labels = gt_boxes[:, :,
                          4].contiguous().view(-1)[(offset.view(-1), )].view(
                              batch_size, -1)
        track_id = gt_boxes[:, :,
                            5].contiguous().view(-1)[(offset.view(-1), )].view(
                                batch_size, -1)

        labels_batch = labels.new(batch_size, rois_per_image).zero_()
        track_id_batch = track_id.new(batch_size, rois_per_image).zero_() - 1

        rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_()

        # Guard against the case when an image has fewer than max_fg_rois_per_image
        # foreground RoIs
        for i in range(batch_size):

            fg_inds = torch.nonzero(
                max_overlaps[i] >= cfg.SIAMESE.TEMPLATE_SEL_FG_THRESH).view(-1)
            fg_num_rois = fg_inds.numel()

            # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
            bg_inds = torch.nonzero(
                (max_overlaps[i] < cfg.SIAMESE.TEMPLATE_SEL_BG_THRESH_HI)
                & (max_overlaps[i] >= cfg.SIAMESE.TEMPLATE_SEL_BG_THRESH_LO)
                | (max_overlaps[i] < 0)).view(-1)
            bg_num_rois = bg_inds.numel()
            if fg_num_rois > 0 and bg_num_rois > 0:
                # sampling fg
                fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois)

                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                # rand_num = torch.randperm(fg_num_rois).long().cuda()
                rand_num = torch.from_numpy(np.random.permutation(
                    fg_num_rois)).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]]

                # sampling bg
                bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image

                # Seems torch.rand has a bug, it will generate very large number and make an error.
                # We use numpy rand instead.
                # rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(bg_rois_per_this_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                bg_inds = bg_inds[rand_num]

            elif fg_num_rois > 0 and bg_num_rois == 0:
                # sampling fg
                # rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(rois_per_image) * fg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()
                fg_inds = fg_inds[rand_num]
                fg_rois_per_this_image = rois_per_image
                bg_rois_per_this_image = 0
            elif bg_num_rois > 0 and fg_num_rois == 0:
                # sampling bg
                # rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda()
                rand_num = np.floor(
                    np.random.rand(rois_per_image) * bg_num_rois)
                rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long()

                bg_inds = bg_inds[rand_num]
                bg_rois_per_this_image = rois_per_image
                fg_rois_per_this_image = 0
            else:
                print('overlaps:', overlaps.shape)
                print('max_overlaps:', max_overlaps)
                raise ValueError(
                    "template proposal layer bg_num_rois = 0 and fg_num_rois = 0, this should not happen!"
                )

            # The indices that we're selecting (both fg and bg)
            keep_inds = torch.cat([fg_inds, bg_inds], 0)
            # Select sampled values from various arrays:
            labels_batch[i].copy_(labels[i][keep_inds])
            track_id_batch[i].copy_(track_id[i][keep_inds])

            # Clamp labels for the background RoIs to 0
            if fg_rois_per_this_image < rois_per_image:
                labels_batch[i][fg_rois_per_this_image:] = 0
                track_id_batch[i][fg_rois_per_this_image:] = -1

            rois_batch[i] = all_rois[i][keep_inds]
            rois_batch[i, :, 0] = i

        return rois_batch, labels_batch, track_id_batch
    def forward(self, im_data, gt):

        batch_size = im_data.size(0)

        gt_boxes = gt['boxes']
        gt_grasps = gt['grasps']
        gt_grasp_inds = gt['grasp_inds']
        num_boxes = gt['num_boxes']
        num_grasps = gt['num_grasps']
        im_info = gt['im_info']

        # features
        base_feat = self.base(im_data)

        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(
            base_feat, im_info, gt_boxes, num_boxes)

        # if it is training phrase, then use ground trubut bboxes for refining
        if self.training:
            roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

            rois_label = Variable(rois_label.view(-1).long())
            rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
            rois_inside_ws = Variable(
                rois_inside_ws.view(-1, rois_inside_ws.size(2)))
            rois_outside_ws = Variable(
                rois_outside_ws.view(-1, rois_outside_ws.size(2)))
        else:
            rois_label = None
            rois_target = None
            rois_inside_ws = None
            rois_outside_ws = None
            rpn_loss_cls = 0
            rpn_loss_bbox = 0

        if cfg.RCNN_COMMON.POOLING_MODE == 'crop':
            # pdb.set_trace()
            # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5))
            grid_xy = _affine_grid_gen(rois.view(-1, 5),
                                       base_feat.size()[2:], self.grid_size)
            grid_yx = torch.stack(
                [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]],
                3).contiguous()
            pooled_feat = self.RCNN_roi_crop(base_feat,
                                             Variable(grid_yx).detach())
            if cfg.RCNN_COMMON.CROP_RESIZE_WITH_MAX_POOL:
                pooled_feat = F.max_pool2d(pooled_feat, 2, 2)
        elif cfg.RCNN_COMMON.POOLING_MODE == 'align':
            pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5))
        elif cfg.RCNN_COMMON.POOLING_MODE == 'pool':
            pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5))

        # feed pooled features to top model
        # grasp top
        if self.training:
            if self._MGN_USE_POOLED_FEATS:
                rois_overlaps = bbox_overlaps_batch(rois, gt_boxes)
                # bs x N_{rois}
                _, rois_inds = torch.max(rois_overlaps, dim=2)
                rois_inds += 1
                grasp_rois_mask = rois_label.view(-1) > 0
            else:
                raise NotImplementedError
        # bbox top
        bbox_feat = self._RCNN_head_to_tail(pooled_feat)

        # compute bbox offset
        bbox_pred = self.RCNN_bbox_pred(bbox_feat)
        if self.training and not self.class_agnostic:
            # select the corresponding columns according to roi labels
            bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
                                            int(bbox_pred.size(1) / 4), 4)
            bbox_pred_select = torch.gather(
                bbox_pred_view, 1,
                rois_label.view(rois_label.size(0), 1,
                                1).expand(rois_label.size(0), 1, 4))
            bbox_pred = bbox_pred_select.squeeze(1)

        # compute object classification probability
        cls_score = self.RCNN_cls_score(bbox_feat)
        cls_prob = F.softmax(cls_score)

        RCNN_loss_cls = 0
        RCNN_loss_bbox = 0

        if self.training:
            # classification loss
            if cfg.TRAIN.COMMON.USE_FOCAL_LOSS:
                RCNN_loss_cls = F.cross_entropy(cls_score,
                                                rois_label,
                                                reduce=False)
                focal_loss_factor = torch.pow(
                    (1 - cls_prob[range(int(cls_prob.size(0))), rois_label]),
                    cfg.TRAIN.COMMON.FOCAL_LOSS_GAMMA)
                RCNN_loss_cls = torch.mean(RCNN_loss_cls * focal_loss_factor)
            else:
                RCNN_loss_cls = F.cross_entropy(cls_score, rois_label)

            # bounding box regression L1 loss
            RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target,
                                             rois_inside_ws, rois_outside_ws)

        cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)

        if self.training:
            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._MGN_head_to_tail(
                    pooled_feat[grasp_rois_mask])
            else:
                # when there are no one positive rois:
                grasp_loc = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_prob = Variable(torch.Tensor([]).type_as(gt_grasps))
                grasp_bbox_loss = Variable(
                    torch.Tensor([0]).type_as(RCNN_loss_bbox))
                grasp_cls_loss = Variable(
                    torch.Tensor([0]).type_as(RCNN_loss_cls))
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._MGN_head_to_tail(pooled_feat)

        grasp_pred = self.MGN_classifier(grasp_feat)
        # bs*N x K*A x 5, bs*N x K*A x 2
        grasp_loc, grasp_conf = grasp_pred

        # generate anchors
        # bs*N x K*A x 5
        grasp_all_anchors = self._generate_anchors(grasp_conf.size(1),
                                                   grasp_conf.size(2), rois)
        # filter out negative samples
        grasp_all_anchors = grasp_all_anchors.type_as(gt_grasps)
        if self.training:
            grasp_all_anchors = grasp_all_anchors[grasp_rois_mask]
            # bs*N x 1 x 1
            rois_w = (rois[:, :, 3] -
                      rois[:, :, 1]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_h = (rois[:, :, 4] -
                      rois[:, :, 2]).data.view(-1).unsqueeze(1).unsqueeze(2)
            rois_w = rois_w[grasp_rois_mask]
            rois_h = rois_h[grasp_rois_mask]
            # bs*N x 1 x 1
            fsx = rois_w / grasp_conf.size(1)
            fsy = rois_h / grasp_conf.size(2)
            # bs*N x 1 x 1
            xleft = rois[:, :, 1].data.view(-1).unsqueeze(1).unsqueeze(2)
            ytop = rois[:, :, 2].data.view(-1).unsqueeze(1).unsqueeze(2)
            xleft = xleft[grasp_rois_mask]
            ytop = ytop[grasp_rois_mask]

        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_batch_size = grasp_loc.size(0)

        # bs*N x K*A x 2
        grasp_prob = F.softmax(grasp_conf, 2)

        grasp_bbox_loss = 0
        grasp_cls_loss = 0
        grasp_conf_label = None
        if self.training:
            # inside weights indicate which bounding box should be regressed
            # outside weidhts indicate two things:
            # 1. Which bounding box should contribute for classification loss,
            # 2. Balance cls loss and bbox loss
            grasp_gt_xywhc = points2labels(gt_grasps)
            # bs*N x N_{Gr_gt} x 5
            grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc,
                                                      gt_grasp_inds, rois_inds)
            # filter out negative samples
            grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]

            # absolute coords to relative coords
            grasp_gt_xywhc[:, :, 0:1] -= xleft
            grasp_gt_xywhc[:, :, 0:1] = torch.clamp(grasp_gt_xywhc[:, :, 0:1],
                                                    min=0)
            grasp_gt_xywhc[:, :, 0:1] = torch.min(grasp_gt_xywhc[:, :, 0:1],
                                                  rois_w)
            grasp_gt_xywhc[:, :, 1:2] -= ytop
            grasp_gt_xywhc[:, :, 1:2] = torch.clamp(grasp_gt_xywhc[:, :, 1:2],
                                                    min=0)
            grasp_gt_xywhc[:, :, 1:2] = torch.min(grasp_gt_xywhc[:, :, 1:2],
                                                  rois_h)

            # grasp training data
            grasp_loc_label, grasp_conf_label, grasp_iw, grasp_ow = self.MGN_proposal_target(
                grasp_conf,
                grasp_gt_xywhc,
                grasp_all_anchors,
                xthresh=fsx / 2,
                ythresh=fsy / 2)

            grasp_keep = Variable(
                grasp_conf_label.view(-1).ne(-1).nonzero().view(-1))
            grasp_conf = torch.index_select(grasp_conf.view(-1, 2), 0,
                                            grasp_keep.data)
            grasp_conf_label = torch.index_select(grasp_conf_label.view(-1), 0,
                                                  grasp_keep.data)
            grasp_cls_loss = F.cross_entropy(grasp_conf, grasp_conf_label)

            grasp_iw = Variable(grasp_iw)
            grasp_ow = Variable(grasp_ow)
            grasp_loc_label = Variable(grasp_loc_label)
            grasp_bbox_loss = _smooth_l1_loss(grasp_loc,
                                              grasp_loc_label,
                                              grasp_iw,
                                              grasp_ow,
                                              dim=[2, 1])

        return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\
               grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
示例#15
0
    def forward(self, input):
        # torch.Size([1, 18, 50, 37])
        # input (rpn_cls_prob.data, gt_boxes, num_boxes, crowdsourced_classes, alpha_con)
        rpn_cls_prob = input[0]
        gt_boxes = input[1]
        num_boxes = input[2]
        im_info = input[3]
        crowdsourced_classes = input[4]
        alpha_con = input[5]
        batch_size = gt_boxes.size(0)

        # 把每个anchor的坐标列出来
        feat_height, feat_width = rpn_cls_prob.size(2), rpn_cls_prob.size(3)
        # _feat_stride 16 图片到feature map的比例
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(rpn_cls_prob).float()
        A = self._num_anchors
        K = shifts.size(0)
        self._anchors = self._anchors.type_as(
            gt_boxes)  # move to specific gpu.
        # all_anchors torch.Size([1850, 9, 4])
        all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        all_anchors = all_anchors.view(K * A, 4)

        # 删除越界的anchors
        keep = ((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) &
                (all_anchors[:, 2] < long(im_info[0][1])) &
                (all_anchors[:, 3] < long(im_info[0][0])))

        # 保留的anchors 索引
        inds_inside = torch.nonzero(keep).view(-1)
        anchors = all_anchors[inds_inside, :]

        # 从rpn_cls_score中找到gt_box对应的标签
        overlaps = bbox_overlaps_batch(anchors, gt_boxes)

        # arggt_max_overlaps size([1, 20])
        _, arggt_max_overlaps = torch.max(overlaps, 1)
        index = inds_inside[arggt_max_overlaps]

        reshape_rpn_cls_prob = rpn_cls_prob.view(batch_size, 2, -1)
        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        # gt_boxes_cls <=> p
        gt_boxes_cls = torch.gather(reshape_rpn_cls_prob[:, 1], 1, index)

        alpha = get_alpha(alpha_con)

        sensitivity = get_sensitivity(alpha)
        specificity = get_specificity(alpha)
        if DEBUG:
            print('sensitivity: ', sensitivity)
            print('specificity: ', specificity)
        a = get_a(sensitivity, crowdsourced_classes)
        b = get_b(specificity, crowdsourced_classes)

        # print('a: ', a)
        # print('b: ', b)
        # print('p: ', gt_boxes_cls)

        # Size [1, 20]
        if DEBUG:
            print('a: ', a)
            print('b: ', b)
            print('gt_boxes_cls: ', gt_boxes_cls)
        mu = binary_get_mu(a, b, gt_boxes_cls)
        if DEBUG:
            print('mu: ', mu)

        bg_index = torch.nonzero(mu < 0.5)
        # 更新 alpha_con 针对与 [i, 0, j]
        update_alpha_con_rpn(alpha_con, bg_index, crowdsourced_classes)
        if bg_index.size() != torch.Size([0]):
            bg_index = bg_index.t()
            gt_boxes[bg_index[0], bg_index[1], :] = 0

        return gt_boxes
    def forward(self, data_batch):
        im_data = data_batch[0]
        im_info = data_batch[1]
        gt_boxes = data_batch[2]
        gt_grasps = data_batch[3]
        num_boxes = data_batch[4]
        num_grasps = data_batch[5]
        rel_mat = data_batch[6]
        gt_grasp_inds = data_batch[7]

        # object detection
        if self.training:
            self.iter_counter += 1
        self.batch_size = im_data.size(0)

        # feed image data to base model to obtain base feature map
        base_feat = self.FeatExt(im_data)
        ### GENERATE ROIs
        rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)
        if self.training:
            rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = \
                self._get_header_train_data(rois, gt_boxes, num_boxes)
            pos_rois_labels = [(rois_label[i * rois.size(1): (i + 1) * rois.size(1)] > 0) for i in range(self.batch_size)]
            od_rois = [rois[i][pos_rois_labels[i]].data for i in range(self.batch_size)]
        else:
            rois_label, rois_target, rois_inside_ws, rois_outside_ws = None, None, None, None
            od_rois = rois.data
        pooled_feat = self._roi_pooling(base_feat, rois)

        ### OBJECT DETECTION
        cls_score, cls_prob, bbox_pred = self._get_obj_det_result(pooled_feat)
        RCNN_loss_bbox, RCNN_loss_cls = 0, 0
        if self.training:
            RCNN_loss_bbox, RCNN_loss_cls = self._obj_det_loss_comp(cls_score, cls_prob, bbox_pred, rois_label, rois_target,
                                                                    rois_inside_ws, rois_outside_ws)
        cls_prob = cls_prob.contiguous().view(self.batch_size, rois.size(1), -1)
        bbox_pred = bbox_pred.contiguous().view(self.batch_size, rois.size(1), -1)

        ### VISUAL MANIPULATION RELATIONSHIP DETECTION
        # for object detection before relationship detection
        if self.training:
            od_cls_prob = [cls_prob[i][pos_rois_labels[i]].data for i in range(self.batch_size)]
            od_bbox_pred = [bbox_pred[i][pos_rois_labels[i]].data for i in range(self.batch_size)]
        else:
            od_cls_prob = cls_prob.data
            od_bbox_pred = bbox_pred.data

        # generate object RoIs.
        obj_rois, obj_num = torch.Tensor([]).type_as(rois), torch.Tensor([]).type_as(num_boxes)
        # online data
        if not self.training or (cfg.TRAIN.VMRN.TRAINING_DATA == 'all' or 'online'):
            obj_rois, obj_num = self._object_detection(od_rois, od_cls_prob, od_bbox_pred, self.batch_size, im_info.data)
        # offline data
        if self.training and (cfg.TRAIN.VMRN.TRAINING_DATA == 'all' or 'offline'):
            for i in range(self.batch_size):
                img_ind = (i * torch.ones(num_boxes[i].item(),1)).type_as(gt_boxes)
                obj_rois = torch.cat([obj_rois, torch.cat([img_ind, (gt_boxes[i][:num_boxes[i]])],1)])
            obj_num = torch.cat([obj_num, num_boxes])

        obj_labels = torch.Tensor([]).type_as(gt_boxes).long()
        if obj_rois.size(0) > 0:
            obj_labels = obj_rois[:, 5]
            obj_rois = obj_rois[:, :5]

        VMRN_rel_loss_cls = 0
        if (obj_num > 1).sum().item() > 0:
            rel_cls_score, rel_cls_prob = self._get_rel_det_result(base_feat, obj_rois, obj_num)
            if self.training:
                obj_pair_rel_label = self._generate_rel_labels(obj_rois, gt_boxes, obj_num, rel_mat, rel_cls_prob.size(0))
                VMRN_rel_loss_cls = self._rel_det_loss_comp(obj_pair_rel_label.type_as(gt_boxes).long(), rel_cls_score)
            else:
                rel_cls_prob = self._rel_cls_prob_post_process(rel_cls_prob)
        else:
            rel_cls_prob = torch.Tensor([]).type_as(cls_prob)

        rel_result = None
        if not self.training:
            if obj_rois.numel() > 0:
                pred_boxes = obj_rois.data[:,1:5]
                pred_boxes[:, 0::2] /= im_info[0][3].item()
                pred_boxes[:, 1::2] /= im_info[0][2].item()
                rel_result = (pred_boxes, obj_labels, rel_cls_prob.data)
            else:
                rel_result = (obj_rois.data, obj_labels, rel_cls_prob.data)

        ### ROI-BASED GRASP DETECTION
        if self.training:
            rois_overlaps = bbox_overlaps_batch(rois, gt_boxes)
            # bs x N_{rois}
            _, rois_inds = torch.max(rois_overlaps, dim=2)
            rois_inds += 1
            grasp_rois_mask = rois_label.view(-1) > 0

            if (grasp_rois_mask > 0).sum().item() > 0:
                grasp_feat = self._MGN_head_to_tail(pooled_feat[grasp_rois_mask])
                grasp_rois = rois.view(-1, 5)[grasp_rois_mask]
                # process grasp ground truth, return: N_{gr_rois} x N_{Gr_gt} x 5
                grasp_gt_xywhc = points2labels(gt_grasps)
                grasp_gt_xywhc = self._assign_rois_grasps(grasp_gt_xywhc, gt_grasp_inds, rois_inds)
                grasp_gt_xywhc = grasp_gt_xywhc[grasp_rois_mask]
            else:
                # when there are no one positive rois, return dummy results
                grasp_loc = torch.Tensor([]).type_as(gt_grasps)
                grasp_prob = torch.Tensor([]).type_as(gt_grasps)
                grasp_bbox_loss = torch.Tensor([0]).type_as(gt_grasps)
                grasp_cls_loss = torch.Tensor([0]).type_as(gt_grasps)
                grasp_conf_label = torch.Tensor([-1]).type_as(rois_label)
                grasp_all_anchors = torch.Tensor([]).type_as(gt_grasps)
                return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,\
                   grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
        else:
            grasp_feat = self._MGN_head_to_tail(pooled_feat)

        # N_{gr_rois} x W x H x A*5, N_{gr_rois} x W x H x A*2
        grasp_loc, grasp_conf = self.FCGN_classifier(grasp_feat)
        feat_height, feat_width = grasp_conf.size(1), grasp_conf.size(2)
        # reshape grasp_loc and grasp_conf
        grasp_loc = grasp_loc.contiguous().view(grasp_loc.size(0), -1, 5)
        grasp_conf = grasp_conf.contiguous().view(grasp_conf.size(0), -1, 2)
        grasp_prob = F.softmax(grasp_conf, 2)

        # 2. calculate grasp loss
        grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = 0, 0, None
        if self.training:
            # N_{gr_rois} x K*A x 5
            grasp_all_anchors = self._generate_anchors(feat_height, feat_width, grasp_rois)
            grasp_bbox_loss, grasp_cls_loss, grasp_conf_label = self._grasp_loss_comp(grasp_rois,
                grasp_conf, grasp_loc, grasp_gt_xywhc, grasp_all_anchors, feat_height, feat_width)
        else:
            # bs*N x K*A x 5
            grasp_all_anchors = self._generate_anchors(feat_height, feat_width, rois.view(-1, 5))

        return rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_bbox, \
                RCNN_loss_cls, RCNN_loss_bbox, VMRN_rel_loss_cls, rois_label, \
                grasp_loc, grasp_prob, grasp_bbox_loss , grasp_cls_loss, grasp_conf_label, grasp_all_anchors
示例#17
0
    def forward(self, input):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors

        rpn_cls_score = input[0]
        gt_boxes = input[1]
        im_info = input[2]
        num_boxes = input[3]

        # map of shape (..., H, W)
        height, width = rpn_cls_score.size(2), rpn_cls_score.size(3)

        batch_size = gt_boxes.size(0)

        feat_height, feat_width = rpn_cls_score.size(2), rpn_cls_score.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(rpn_cls_score).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(gt_boxes) # move to specific gpu.
        all_anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        all_anchors = all_anchors.view(K * A, 4)

        total_anchors = int(K * A)

        keep = ((all_anchors[:, 0] >= -self._allowed_border) &
                (all_anchors[:, 1] >= -self._allowed_border) &
                (all_anchors[:, 2] < long(im_info[0][1]) + self._allowed_border) &
                (all_anchors[:, 3] < long(im_info[0][0]) + self._allowed_border))

        inds_inside = torch.nonzero(keep).view(-1)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = gt_boxes.new(batch_size, inds_inside.size(0)).fill_(-1)
        bbox_inside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()
        bbox_outside_weights = gt_boxes.new(batch_size, inds_inside.size(0)).zero_()

        overlaps = bbox_overlaps_batch(anchors, gt_boxes)

        max_overlaps, argmax_overlaps = torch.max(overlaps, 2)
        gt_max_overlaps, _ = torch.max(overlaps, 1)

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[(max_overlaps<=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_HI) & (max_overlaps>=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_LO)] = 0

        gt_max_overlaps[gt_max_overlaps==0] = 1e-5
        keep = torch.sum(overlaps.eq(gt_max_overlaps.view(batch_size,1,-1).expand_as(overlaps)), 2)

        if torch.sum(keep) > 0:
            labels[keep>0] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.SIAMESE.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            labels[(max_overlaps<=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_HI) & (max_overlaps>=cfg.SIAMESE.RPN_NEGATIVE_OVERLAP_LO)] = 0

        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)

        sum_fg = torch.sum((labels == 1).int(), 1)
        sum_bg = torch.sum((labels == 0).int(), 1)

        for i in range(batch_size):
            # subsample positive labels if we have too many
            if sum_fg[i] > num_fg:
                fg_inds = torch.nonzero(labels[i] == 1).view(-1)
                # torch.randperm seems has a bug on multi-gpu setting that cause the segfault.
                # See https://github.com/pytorch/pytorch/issues/1868 for more details.
                # use numpy instead.
                #rand_num = torch.randperm(fg_inds.size(0)).type_as(gt_boxes).long()
                rand_num = torch.from_numpy(np.random.permutation(fg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = fg_inds[rand_num[:fg_inds.size(0)-num_fg]]
                labels[i][disable_inds] = -1

#           num_bg = cfg.TRAIN.RPN_BATCHSIZE - sum_fg[i]
            num_bg = cfg.TRAIN.RPN_BATCHSIZE - torch.sum((labels == 1).int(), 1)[i]

            # subsample negative labels if we have too many
            if sum_bg[i] > num_bg:
                bg_inds = torch.nonzero(labels[i] == 0).view(-1)
                #rand_num = torch.randperm(bg_inds.size(0)).type_as(gt_boxes).long()

                rand_num = torch.from_numpy(np.random.permutation(bg_inds.size(0))).type_as(gt_boxes).long()
                disable_inds = bg_inds[rand_num[:bg_inds.size(0)-num_bg]]
                labels[i][disable_inds] = -1

        offset = torch.arange(0, batch_size)*gt_boxes.size(1)

        argmax_overlaps = argmax_overlaps + offset.view(batch_size, 1).type_as(argmax_overlaps)
        bbox_targets = _compute_targets_batch(anchors, gt_boxes.view(-1,5)[argmax_overlaps.view(-1), :].view(batch_size, -1, 5))

        # use a single value instead of 4 values for easy index.
        bbox_inside_weights[labels==1] = cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS[0]

        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            num_examples = torch.sum(labels[i] >= 0)
            if num_examples.item()>0:
                positive_weights = 1.0 / num_examples.item()
                negative_weights = 1.0 / num_examples.item()
            else:
                positive_weights = None
                negative_weights = None
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))

        if positive_weights is not None and negative_weights is not None:
            bbox_outside_weights[labels == 1] = positive_weights
            bbox_outside_weights[labels == 0] = negative_weights

        labels = _unmap(labels, total_anchors, inds_inside, batch_size, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, batch_size, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, batch_size, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, batch_size, fill=0)

        outputs = []

        labels = labels.view(batch_size, height, width, A).permute(0,3,1,2).contiguous()
        labels = labels.view(batch_size, 1, A * height, width)
        outputs.append(labels)

        bbox_targets = bbox_targets.view(batch_size, height, width, A*4).permute(0,3,1,2).contiguous()
        outputs.append(bbox_targets)

        anchors_count = bbox_inside_weights.size(1)
        bbox_inside_weights = bbox_inside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)

        bbox_inside_weights = bbox_inside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()

        outputs.append(bbox_inside_weights)

        bbox_outside_weights = bbox_outside_weights.view(batch_size,anchors_count,1).expand(batch_size, anchors_count, 4)
        bbox_outside_weights = bbox_outside_weights.contiguous().view(batch_size, height, width, 4*A)\
                            .permute(0,3,1,2).contiguous()
        outputs.append(bbox_outside_weights)

        return outputs