示例#1
0
    def build_cost_matrix(self, instances: Instances,
                          prev_instances: Instances) -> np.ndarray:
        """
        Build the cost matrix for assignment problem
        (https://en.wikipedia.org/wiki/Assignment_problem)

        Args:
            instances: D2 Instances, for current frame predictions
            prev_instances: D2 Instances, for previous frame predictions

        Return:
            the cost matrix in numpy array
        """
        assert instances is not None and prev_instances is not None
        # calculate IoU of all bbox pairs
        iou_all = pairwise_iou(
            boxes1=instances.pred_boxes,
            boxes2=self._prev_instances.pred_boxes,
        )
        bbox_pairs = create_prediction_pairs(
            instances,
            self._prev_instances,
            iou_all,
            threshold=self._track_iou_threshold)
        # assign large cost value to make sure pair below IoU threshold won't be matched
        cost_matrix = np.full((len(instances), len(prev_instances)),
                              LARGE_COST_VALUE)
        return self.assign_cost_matrix_values(cost_matrix, bbox_pairs)
def non_maximum_regression_loss_stage1(box_reg_delta,
                                       proposals,
                                       box_score,
                                       score_gap=0.2,
                                       reg_iou=0.6,
                                       box2box_translation=None):

    device = torch.device('cuda')
    loss = torch.zeros(1).to(device)
    box_score_topk, box_score_index = torch.topk(box_score, k=1, dim=1)
    box_score_topk, box_score_index = box_score_topk.reshape(
        -1), box_score_index.reshape(-1)
    proposals_max = proposals[box_score_index]  ## (M)*4
    proposals_ious = pairwise_iou(proposals_max, proposals)
    proposals_ious[proposals_ious == 1] = 0
    proposals_ious = (proposals_ious >= reg_iou).float()
    if proposals_ious.sum():
        vx, vy = proposals_ious.nonzero().transpose(0, 1)
        proposals_reg_target = proposals_max[vx]
        box_score_targets = box_score_topk[vx]
        box_score_reg = box_score[vx, vy].reshape(-1)
        score_att = (box_score_targets - box_score_reg) >= score_gap
        proposals_need_reg = proposals[vy]
        proposal_target = box2box_translation.get_deltas(
            proposals_need_reg.tensor, proposals_reg_target.tensor)
        reg_delta = box_reg_delta[vx, vy]
        loss += weighted_smooth_l1_loss(reg_delta,
                                        proposal_target,
                                        beta=1,
                                        weight=score_att.float(),
                                        reduction='mean')

    return loss
示例#3
0
 def update(self, instances: Instances) -> Instances:
     """
     See BaseTracker description
     """
     if instances.has("pred_keypoints"):
         raise NotImplementedError("Need to add support for keypoints")
     instances = self._initialize_extra_fields(instances)
     if self._prev_instances is not None:
         # calculate IoU of all bbox pairs
         iou_all = pairwise_iou(
             boxes1=instances.pred_boxes,
             boxes2=self._prev_instances.pred_boxes,
         )
         # sort IoU in descending order
         bbox_pairs = self._create_prediction_pairs(instances, iou_all)
         # assign previous ID to current bbox if IoU > track_iou_threshold
         self._reset_fields()
         for bbox_pair in bbox_pairs:
             if bbox_pair["IoU"] < self._track_iou_threshold:
                 continue
             idx = bbox_pair["idx"]
             prev_id = bbox_pair["prev_id"]
             if idx in self._matched_idx or prev_id in self._matched_ID:
                 continue
             instances.ID[idx] = prev_id
             instances.ID_period[idx] = bbox_pair["prev_period"] + 1
             instances.lost_frame_count[idx] = 0
             self._matched_idx.add(idx)
             self._matched_ID.add(prev_id)
             self._untracked_prev_idx.remove(bbox_pair["prev_idx"])
         instances = self._assign_new_id(instances)
         instances = self._merge_untracked_instances(instances)
     self._prev_instances = copy.deepcopy(instances)
     return instances
 def match_targets_to_proposals(self, proposal, target):
     match_quality_matrix = pairwise_iou(target, proposal)
     matched_idxs = self.proposal_matcher(match_quality_matrix)
     # Mask RCNN needs "labels" and "masks "fields for creating the targets
     target = target.copy_with_fields(["labels", "masks", "char_masks"])
     # get the targets corresponding GT for each proposal
     # NB: need to clamp the indices because we can have a single
     # GT in the image, and matched_idxs can be -2, which goes
     # out of bounds
     matched_targets = target[matched_idxs.clamp(min=0)]
     matched_targets.add_field("matched_idxs", matched_idxs)
     return matched_targets
def non_maximum_regression_loss_stage2_ada(box_reg_delta,
                                           proposals,
                                           box_score,
                                           score_gap=0.2,
                                           reg_iou=0.6,
                                           box2box_translation=None,
                                           atten_mask=None):

    device = torch.device('cuda')
    loss = torch.zeros(1).to(device)
    box_score = box_score * atten_mask
    box_reg_delta = box_reg_delta.reshape(-1, 4)
    num_phrase, num_boxes = box_score.shape
    box_score = box_score.reshape(-1)
    box_score_topk, box_score_index = torch.topk(box_score.reshape(
        num_phrase, num_boxes),
                                                 k=1,
                                                 dim=1)
    box_score_topk, box_score_index = box_score_topk.reshape(
        -1), box_score_index.reshape(-1)
    box_score_index = box_score_index + torch.arange(num_phrase).to(
        device) * num_boxes
    proposals_max = proposals[box_score_index]  ## M*4
    proposals_ious = pairwise_iou(proposals_max, proposals)
    proposals_ious[proposals_ious == 1] = 0
    keep_mat = torch.zeros_like(proposals_ious)
    for pid in range(num_phrase):
        keep_mat[pid, pid * num_boxes:(pid + 1) * num_boxes] = 1

    keep_mat = keep_mat * atten_mask.reshape(-1)

    proposals_ious = proposals_ious * keep_mat
    proposals_ious_flag = (proposals_ious >= reg_iou).float()

    if proposals_ious_flag.sum():
        vx, vy = proposals_ious_flag.nonzero().transpose(0, 1)
        proposals_reg_target = proposals_max[vx]
        proposals_need_reg = proposals[vy]
        box_score_target = box_score_topk[vx]
        box_score_reg = box_score[vy]
        score_att = (box_score_target - box_score_reg) >= score_gap
        proposal_target = box2box_translation.get_deltas(
            proposals_need_reg.tensor, proposals_reg_target.tensor)
        reg_delta = box_reg_delta[vy]
        loss += weighted_smooth_l1_loss(reg_delta,
                                        proposal_target,
                                        beta=1,
                                        weight=score_att.float(),
                                        reduction='mean')

    return loss
def generate_attention_entropy_loss(pred_sim,
                                    pred_topk_sim,
                                    targets,
                                    precomp_boxes,
                                    s2_topk=5):

    pred_sim = torch.clamp(pred_sim, min=1e-5)
    pred_topk_sim = torch.clamp(pred_topk_sim, min=1e-5)

    ious = pairwise_iou(targets, precomp_boxes)
    atten_topk = torch.topk(pred_sim, k=s2_topk, dim=1)[1]
    ious_topk = torch.gather(ious, dim=1, index=atten_topk)
    gt_score = F.normalize(ious * ious.ge(0.5).float(), p=1, dim=1)
    gt_score_topk = F.normalize(ious_topk * ious_topk.ge(0.5).float(),
                                p=1,
                                dim=1)

    cls_loss = -(gt_score * pred_sim.log()).mean()
    cls_loss_topk = -(gt_score_topk * pred_topk_sim.log()).mean()

    return cls_loss, cls_loss_topk
    def evaluate(self):
        """
        Evaluates Referring Segmentation IoU:
        """

        if self._distributed:
            synchronize()

            self._predictions = all_gather(self._predictions)

            if not is_main_process():
                return

            all_prediction = {}
            for p in self._predictions:
                all_prediction.update(p)
        else:
            all_prediction = self._predictions

        image_unique_ids = list(all_prediction.keys())

        total_num = 0
        recall_num = 0
        recall_t2_num = 0
        recall_t2_fusion_num = 0
        num_type = {}
        recall_type = {}
        acc_type = {}
        recall_topk_num = {5: 0, 10: 0}
        point_recall_num = 0
        point_recall_t2_num = 0
        point_recall_fusion_t2_num = 0

        for img_sent_id in image_unique_ids:

            result = all_prediction[img_sent_id]
            phrase_ids = result[0]
            phrase_types = result[1]
            pred_boxes = result[2]
            pred_similarity = result[3]
            targets = result[4]
            precomp_boxes = result[5]

            topk_pred_boxes = result[6]
            topk_fusion_pred_boxes = result[8]

            pred_boxes.clip()
            ious = pairwise_iou(
                targets, pred_boxes
            )  # this function will change the target_boxes into cuda mode
            iou = ious.numpy().diagonal()
            total_num += iou.shape[0]
            recall_num += int((iou >= cfg.MODEL.VG.EVAL_THRESH).sum())  # 0.5

            pred_boxes_tensor = pred_boxes.tensor
            pred_center = (pred_boxes_tensor[:, :2] +
                           pred_boxes_tensor[:, 2:]) / 2.0
            pred_center = pred_center.repeat(1, 2)  ## x_c, y_c, x_c, y_c
            targets_tensor = targets.tensor
            fall_tensor = targets_tensor - pred_center
            fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + (
                fall_tensor[:, 2:] >= 0).float().sum(1)
            point_recall_num += (fall_tensor == 4).float().numpy().sum()

            topk_pred_boxes.clip()
            ious_topk = pairwise_iou(targets, topk_pred_boxes)
            recall_t2_num += int((ious_topk.numpy().diagonal() >
                                  cfg.MODEL.VG.EVAL_THRESH).sum())

            topk_boxes_tensor = topk_pred_boxes.tensor
            pred_center = (topk_boxes_tensor[:, :2] +
                           topk_boxes_tensor[:, 2:]) / 2.0
            pred_center = pred_center.repeat(1, 2)  ## x_c, y_c, x_c, y_c
            fall_tensor = targets_tensor - pred_center
            fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + (
                fall_tensor[:, 2:] >= 0).float().sum(1)
            point_recall_t2_num += (fall_tensor == 4).float().numpy().sum()

            topk_fusion_pred_boxes.clip()
            ious_fusion_topk = pairwise_iou(targets, topk_fusion_pred_boxes)
            recall_t2_fusion_num += int((ious_fusion_topk.numpy().diagonal() >
                                         cfg.MODEL.VG.EVAL_THRESH).sum())

            topk_fusion_boxes_tensor = topk_fusion_pred_boxes.tensor
            pred_center = (topk_fusion_boxes_tensor[:, :2] +
                           topk_fusion_boxes_tensor[:, 2:]) / 2.0
            pred_center = pred_center.repeat(1, 2)  ## x_c, y_c, x_c, y_c
            fall_tensor = targets_tensor - pred_center
            fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + (
                fall_tensor[:, 2:] >= 0).float().sum(1)
            point_recall_fusion_t2_num += (
                fall_tensor == 4).float().numpy().sum()

            for pid, p_type in enumerate(phrase_types):
                p_type = p_type[0]
                num_type[p_type] = num_type.setdefault(p_type, 0) + 1
                recall_type[p_type] = recall_type.setdefault(
                    p_type, 0) + (iou[pid] >= cfg.MODEL.VG.EVAL_THRESH)

            precomp_boxes.clip()
            ious_top = pairwise_iou(targets, precomp_boxes).cpu()

            for k in [5, 10]:
                top_k = torch.topk(pred_similarity, k=k, dim=1)[0][:, [-1]]
                pred_similarity_topk = (pred_similarity >= top_k).float()
                ious_top_k = (ious_top * pred_similarity_topk).numpy()
                recall_topk_num[k] += int(
                    ((ious_top_k >= cfg.MODEL.VG.EVAL_THRESH).sum(1) >
                     0).sum())

        acc = recall_num / total_num
        acc_top5 = recall_topk_num[5] / total_num
        acc_top10 = recall_topk_num[10] / total_num

        acc_s2 = recall_t2_num / total_num
        acc_s2_fusion = recall_t2_fusion_num / total_num

        point_acc = point_recall_num / total_num
        point_acc_s2 = point_recall_t2_num / total_num
        point_acc_s2_fusion = point_recall_fusion_t2_num / total_num

        for type, type_num in num_type.items():
            acc_type[type] = recall_type[type] / type_num

        if self._output_dir:
            PathManager.mkdirs(self._output_dir)
            file_path = os.path.join(
                self._output_dir,
                "prediction_{}.pkl".format(str(acc).replace('.', '_')[:6]))
            with PathManager.open(file_path, "wb") as f:
                pickle.dump(all_prediction, f)
        del all_prediction
        self._logger.info(
            'evaluation on {} expression instances, detailed_iou: {}'.format(
                len(image_unique_ids), acc_type))
        self._logger.info(
            'Evaluate Pointing Accuracy: PointAcc:{}, PointAccS2:{}, PointAccS2Fusion:{}'
            .format(point_acc, point_acc_s2, point_acc_s2_fusion))
        results = OrderedDict({
            "acc": acc,
            "acc_top5": acc_top5,
            "acc_top10": acc_top10,
            'acc_s2': acc_s2,
            'acc_s2_fusion': acc_s2_fusion
        })
        return results
示例#8
0
 def topk_iou_boxes(self, candidates: Boxes, targets: Boxes, k=1):
     iou_matrix = pairwise_iou(candidates, targets)
     _, topk_idxs = iou_matrix.topk(k, dim=0)
     return candidates.tensor[topk_idxs], topk_idxs
    def extend_after(self, frame_index, proposal_instances,
                     prior_selection_mask):
        result_iou_mask = torch.zeros(prior_selection_mask.shape,
                                      dtype=torch.bool)
        if self.is_dead(frame_index):
            return result_iou_mask
        if not torch.any(prior_selection_mask):
            return result_iou_mask

        last_key_instance_index = self.last_key_instance_index[-1]
        if self.config.perform_projection:
            projected_last_instance = self.project_proposal_instance(
                frame_index)
        else:
            projected_last_instance = self.proposal_instances[
                last_key_instance_index]

        iou_per_proposal = pairwise_iou(
            proposal_instances.pred_boxes,
            projected_last_instance.pred_boxes).squeeze()
        result_iou_mask = (iou_per_proposal >
                           self.config.tubelet_iou_threshold)
        selection_mask = prior_selection_mask & result_iou_mask

        projection_dim = projected_last_instance.pred_boxes.tensor[:,
                                                                   2:] - projected_last_instance.pred_boxes.tensor[:, :
                                                                                                                   2]
        proposals_dim = proposal_instances.pred_boxes.tensor[:,
                                                             2:] - proposal_instances.pred_boxes.tensor[:, :
                                                                                                        2]
        selection_mask_rel = selection_mask_abs = selection_mask
        if self.config.max_dimension_change_ratio is not None:
            dim_ratios = proposals_dim / projection_dim
            selection_mask_rel = torch.all(
                (dim_ratios > 1 - self.config.max_dimension_change_ratio) &
                (dim_ratios < 1 + self.config.max_dimension_change_ratio),
                dim=1)
        if self.config.max_dimension_change_abs is not None:
            dim_diffs = torch.abs(proposals_dim - projection_dim)
            selection_mask_abs = torch.all(
                dim_diffs < self.config.max_dimension_change_abs, dim=1)
        selection_mask &= (selection_mask_rel | selection_mask_abs)

        if self.config.extend_class_only:
            # Ignore all proposals of different classes
            selection_mask &= (proposal_instances.pred_classes ==
                               self.config.class_index_to_detect)

        if self.config.min_extension_probability is not None:
            selection_mask &= (
                proposal_instances.class_distributions[:, self.config.
                                                       class_index_to_detect] >
                self.config.min_extension_probability)

        last_index = self.frame_ids[-1]
        if last_index + 1 != frame_index and self.config.min_extension_probability_after_skipped_frames is not None:
            selection_mask &= (
                proposal_instances.class_distributions[:, self.config.
                                                       class_index_to_detect] >
                self.config.min_extension_probability_after_skipped_frames)

        iou_per_proposal[~selection_mask] = 0
        if not torch.any(iou_per_proposal > 0):
            # Not a single proposal survived the selection process
            return result_iou_mask
        extension_candidate_proposal_index = int(
            iou_per_proposal.argmax())  # TODO: Add class probabilities?

        for j in range(last_index + 1, frame_index):
            self.frame_ids.append(j)
            self.proposal_instances.append(None)
            self.proposal_instances_ious.append(0)
            self.last_key_instance_index.append(last_key_instance_index)
            self.projected_proposal_instances.append(projected_last_instance)

        pi = proposal_instances[extension_candidate_proposal_index]
        pi.generation_process = ["T+"] if frame_index > 0 else ["T-"]

        self.frame_ids.append(frame_index)
        self.proposal_instances.append(pi)
        self.proposal_instances_ious.append(
            iou_per_proposal[extension_candidate_proposal_index])
        self.last_key_instance_index.append(len(self.last_key_instance_index))
        self.projected_proposal_instances.append(projected_last_instance)
        return result_iou_mask