def build_cost_matrix(self, instances: Instances, prev_instances: Instances) -> np.ndarray: """ Build the cost matrix for assignment problem (https://en.wikipedia.org/wiki/Assignment_problem) Args: instances: D2 Instances, for current frame predictions prev_instances: D2 Instances, for previous frame predictions Return: the cost matrix in numpy array """ assert instances is not None and prev_instances is not None # calculate IoU of all bbox pairs iou_all = pairwise_iou( boxes1=instances.pred_boxes, boxes2=self._prev_instances.pred_boxes, ) bbox_pairs = create_prediction_pairs( instances, self._prev_instances, iou_all, threshold=self._track_iou_threshold) # assign large cost value to make sure pair below IoU threshold won't be matched cost_matrix = np.full((len(instances), len(prev_instances)), LARGE_COST_VALUE) return self.assign_cost_matrix_values(cost_matrix, bbox_pairs)
def non_maximum_regression_loss_stage1(box_reg_delta, proposals, box_score, score_gap=0.2, reg_iou=0.6, box2box_translation=None): device = torch.device('cuda') loss = torch.zeros(1).to(device) box_score_topk, box_score_index = torch.topk(box_score, k=1, dim=1) box_score_topk, box_score_index = box_score_topk.reshape( -1), box_score_index.reshape(-1) proposals_max = proposals[box_score_index] ## (M)*4 proposals_ious = pairwise_iou(proposals_max, proposals) proposals_ious[proposals_ious == 1] = 0 proposals_ious = (proposals_ious >= reg_iou).float() if proposals_ious.sum(): vx, vy = proposals_ious.nonzero().transpose(0, 1) proposals_reg_target = proposals_max[vx] box_score_targets = box_score_topk[vx] box_score_reg = box_score[vx, vy].reshape(-1) score_att = (box_score_targets - box_score_reg) >= score_gap proposals_need_reg = proposals[vy] proposal_target = box2box_translation.get_deltas( proposals_need_reg.tensor, proposals_reg_target.tensor) reg_delta = box_reg_delta[vx, vy] loss += weighted_smooth_l1_loss(reg_delta, proposal_target, beta=1, weight=score_att.float(), reduction='mean') return loss
def update(self, instances: Instances) -> Instances: """ See BaseTracker description """ if instances.has("pred_keypoints"): raise NotImplementedError("Need to add support for keypoints") instances = self._initialize_extra_fields(instances) if self._prev_instances is not None: # calculate IoU of all bbox pairs iou_all = pairwise_iou( boxes1=instances.pred_boxes, boxes2=self._prev_instances.pred_boxes, ) # sort IoU in descending order bbox_pairs = self._create_prediction_pairs(instances, iou_all) # assign previous ID to current bbox if IoU > track_iou_threshold self._reset_fields() for bbox_pair in bbox_pairs: if bbox_pair["IoU"] < self._track_iou_threshold: continue idx = bbox_pair["idx"] prev_id = bbox_pair["prev_id"] if idx in self._matched_idx or prev_id in self._matched_ID: continue instances.ID[idx] = prev_id instances.ID_period[idx] = bbox_pair["prev_period"] + 1 instances.lost_frame_count[idx] = 0 self._matched_idx.add(idx) self._matched_ID.add(prev_id) self._untracked_prev_idx.remove(bbox_pair["prev_idx"]) instances = self._assign_new_id(instances) instances = self._merge_untracked_instances(instances) self._prev_instances = copy.deepcopy(instances) return instances
def match_targets_to_proposals(self, proposal, target): match_quality_matrix = pairwise_iou(target, proposal) matched_idxs = self.proposal_matcher(match_quality_matrix) # Mask RCNN needs "labels" and "masks "fields for creating the targets target = target.copy_with_fields(["labels", "masks", "char_masks"]) # get the targets corresponding GT for each proposal # NB: need to clamp the indices because we can have a single # GT in the image, and matched_idxs can be -2, which goes # out of bounds matched_targets = target[matched_idxs.clamp(min=0)] matched_targets.add_field("matched_idxs", matched_idxs) return matched_targets
def non_maximum_regression_loss_stage2_ada(box_reg_delta, proposals, box_score, score_gap=0.2, reg_iou=0.6, box2box_translation=None, atten_mask=None): device = torch.device('cuda') loss = torch.zeros(1).to(device) box_score = box_score * atten_mask box_reg_delta = box_reg_delta.reshape(-1, 4) num_phrase, num_boxes = box_score.shape box_score = box_score.reshape(-1) box_score_topk, box_score_index = torch.topk(box_score.reshape( num_phrase, num_boxes), k=1, dim=1) box_score_topk, box_score_index = box_score_topk.reshape( -1), box_score_index.reshape(-1) box_score_index = box_score_index + torch.arange(num_phrase).to( device) * num_boxes proposals_max = proposals[box_score_index] ## M*4 proposals_ious = pairwise_iou(proposals_max, proposals) proposals_ious[proposals_ious == 1] = 0 keep_mat = torch.zeros_like(proposals_ious) for pid in range(num_phrase): keep_mat[pid, pid * num_boxes:(pid + 1) * num_boxes] = 1 keep_mat = keep_mat * atten_mask.reshape(-1) proposals_ious = proposals_ious * keep_mat proposals_ious_flag = (proposals_ious >= reg_iou).float() if proposals_ious_flag.sum(): vx, vy = proposals_ious_flag.nonzero().transpose(0, 1) proposals_reg_target = proposals_max[vx] proposals_need_reg = proposals[vy] box_score_target = box_score_topk[vx] box_score_reg = box_score[vy] score_att = (box_score_target - box_score_reg) >= score_gap proposal_target = box2box_translation.get_deltas( proposals_need_reg.tensor, proposals_reg_target.tensor) reg_delta = box_reg_delta[vy] loss += weighted_smooth_l1_loss(reg_delta, proposal_target, beta=1, weight=score_att.float(), reduction='mean') return loss
def generate_attention_entropy_loss(pred_sim, pred_topk_sim, targets, precomp_boxes, s2_topk=5): pred_sim = torch.clamp(pred_sim, min=1e-5) pred_topk_sim = torch.clamp(pred_topk_sim, min=1e-5) ious = pairwise_iou(targets, precomp_boxes) atten_topk = torch.topk(pred_sim, k=s2_topk, dim=1)[1] ious_topk = torch.gather(ious, dim=1, index=atten_topk) gt_score = F.normalize(ious * ious.ge(0.5).float(), p=1, dim=1) gt_score_topk = F.normalize(ious_topk * ious_topk.ge(0.5).float(), p=1, dim=1) cls_loss = -(gt_score * pred_sim.log()).mean() cls_loss_topk = -(gt_score_topk * pred_topk_sim.log()).mean() return cls_loss, cls_loss_topk
def evaluate(self): """ Evaluates Referring Segmentation IoU: """ if self._distributed: synchronize() self._predictions = all_gather(self._predictions) if not is_main_process(): return all_prediction = {} for p in self._predictions: all_prediction.update(p) else: all_prediction = self._predictions image_unique_ids = list(all_prediction.keys()) total_num = 0 recall_num = 0 recall_t2_num = 0 recall_t2_fusion_num = 0 num_type = {} recall_type = {} acc_type = {} recall_topk_num = {5: 0, 10: 0} point_recall_num = 0 point_recall_t2_num = 0 point_recall_fusion_t2_num = 0 for img_sent_id in image_unique_ids: result = all_prediction[img_sent_id] phrase_ids = result[0] phrase_types = result[1] pred_boxes = result[2] pred_similarity = result[3] targets = result[4] precomp_boxes = result[5] topk_pred_boxes = result[6] topk_fusion_pred_boxes = result[8] pred_boxes.clip() ious = pairwise_iou( targets, pred_boxes ) # this function will change the target_boxes into cuda mode iou = ious.numpy().diagonal() total_num += iou.shape[0] recall_num += int((iou >= cfg.MODEL.VG.EVAL_THRESH).sum()) # 0.5 pred_boxes_tensor = pred_boxes.tensor pred_center = (pred_boxes_tensor[:, :2] + pred_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c targets_tensor = targets.tensor fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_num += (fall_tensor == 4).float().numpy().sum() topk_pred_boxes.clip() ious_topk = pairwise_iou(targets, topk_pred_boxes) recall_t2_num += int((ious_topk.numpy().diagonal() > cfg.MODEL.VG.EVAL_THRESH).sum()) topk_boxes_tensor = topk_pred_boxes.tensor pred_center = (topk_boxes_tensor[:, :2] + topk_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_t2_num += (fall_tensor == 4).float().numpy().sum() topk_fusion_pred_boxes.clip() ious_fusion_topk = pairwise_iou(targets, topk_fusion_pred_boxes) recall_t2_fusion_num += int((ious_fusion_topk.numpy().diagonal() > cfg.MODEL.VG.EVAL_THRESH).sum()) topk_fusion_boxes_tensor = topk_fusion_pred_boxes.tensor pred_center = (topk_fusion_boxes_tensor[:, :2] + topk_fusion_boxes_tensor[:, 2:]) / 2.0 pred_center = pred_center.repeat(1, 2) ## x_c, y_c, x_c, y_c fall_tensor = targets_tensor - pred_center fall_tensor = (fall_tensor[:, :2] <= 0).float().sum(1) + ( fall_tensor[:, 2:] >= 0).float().sum(1) point_recall_fusion_t2_num += ( fall_tensor == 4).float().numpy().sum() for pid, p_type in enumerate(phrase_types): p_type = p_type[0] num_type[p_type] = num_type.setdefault(p_type, 0) + 1 recall_type[p_type] = recall_type.setdefault( p_type, 0) + (iou[pid] >= cfg.MODEL.VG.EVAL_THRESH) precomp_boxes.clip() ious_top = pairwise_iou(targets, precomp_boxes).cpu() for k in [5, 10]: top_k = torch.topk(pred_similarity, k=k, dim=1)[0][:, [-1]] pred_similarity_topk = (pred_similarity >= top_k).float() ious_top_k = (ious_top * pred_similarity_topk).numpy() recall_topk_num[k] += int( ((ious_top_k >= cfg.MODEL.VG.EVAL_THRESH).sum(1) > 0).sum()) acc = recall_num / total_num acc_top5 = recall_topk_num[5] / total_num acc_top10 = recall_topk_num[10] / total_num acc_s2 = recall_t2_num / total_num acc_s2_fusion = recall_t2_fusion_num / total_num point_acc = point_recall_num / total_num point_acc_s2 = point_recall_t2_num / total_num point_acc_s2_fusion = point_recall_fusion_t2_num / total_num for type, type_num in num_type.items(): acc_type[type] = recall_type[type] / type_num if self._output_dir: PathManager.mkdirs(self._output_dir) file_path = os.path.join( self._output_dir, "prediction_{}.pkl".format(str(acc).replace('.', '_')[:6])) with PathManager.open(file_path, "wb") as f: pickle.dump(all_prediction, f) del all_prediction self._logger.info( 'evaluation on {} expression instances, detailed_iou: {}'.format( len(image_unique_ids), acc_type)) self._logger.info( 'Evaluate Pointing Accuracy: PointAcc:{}, PointAccS2:{}, PointAccS2Fusion:{}' .format(point_acc, point_acc_s2, point_acc_s2_fusion)) results = OrderedDict({ "acc": acc, "acc_top5": acc_top5, "acc_top10": acc_top10, 'acc_s2': acc_s2, 'acc_s2_fusion': acc_s2_fusion }) return results
def topk_iou_boxes(self, candidates: Boxes, targets: Boxes, k=1): iou_matrix = pairwise_iou(candidates, targets) _, topk_idxs = iou_matrix.topk(k, dim=0) return candidates.tensor[topk_idxs], topk_idxs
def extend_after(self, frame_index, proposal_instances, prior_selection_mask): result_iou_mask = torch.zeros(prior_selection_mask.shape, dtype=torch.bool) if self.is_dead(frame_index): return result_iou_mask if not torch.any(prior_selection_mask): return result_iou_mask last_key_instance_index = self.last_key_instance_index[-1] if self.config.perform_projection: projected_last_instance = self.project_proposal_instance( frame_index) else: projected_last_instance = self.proposal_instances[ last_key_instance_index] iou_per_proposal = pairwise_iou( proposal_instances.pred_boxes, projected_last_instance.pred_boxes).squeeze() result_iou_mask = (iou_per_proposal > self.config.tubelet_iou_threshold) selection_mask = prior_selection_mask & result_iou_mask projection_dim = projected_last_instance.pred_boxes.tensor[:, 2:] - projected_last_instance.pred_boxes.tensor[:, : 2] proposals_dim = proposal_instances.pred_boxes.tensor[:, 2:] - proposal_instances.pred_boxes.tensor[:, : 2] selection_mask_rel = selection_mask_abs = selection_mask if self.config.max_dimension_change_ratio is not None: dim_ratios = proposals_dim / projection_dim selection_mask_rel = torch.all( (dim_ratios > 1 - self.config.max_dimension_change_ratio) & (dim_ratios < 1 + self.config.max_dimension_change_ratio), dim=1) if self.config.max_dimension_change_abs is not None: dim_diffs = torch.abs(proposals_dim - projection_dim) selection_mask_abs = torch.all( dim_diffs < self.config.max_dimension_change_abs, dim=1) selection_mask &= (selection_mask_rel | selection_mask_abs) if self.config.extend_class_only: # Ignore all proposals of different classes selection_mask &= (proposal_instances.pred_classes == self.config.class_index_to_detect) if self.config.min_extension_probability is not None: selection_mask &= ( proposal_instances.class_distributions[:, self.config. class_index_to_detect] > self.config.min_extension_probability) last_index = self.frame_ids[-1] if last_index + 1 != frame_index and self.config.min_extension_probability_after_skipped_frames is not None: selection_mask &= ( proposal_instances.class_distributions[:, self.config. class_index_to_detect] > self.config.min_extension_probability_after_skipped_frames) iou_per_proposal[~selection_mask] = 0 if not torch.any(iou_per_proposal > 0): # Not a single proposal survived the selection process return result_iou_mask extension_candidate_proposal_index = int( iou_per_proposal.argmax()) # TODO: Add class probabilities? for j in range(last_index + 1, frame_index): self.frame_ids.append(j) self.proposal_instances.append(None) self.proposal_instances_ious.append(0) self.last_key_instance_index.append(last_key_instance_index) self.projected_proposal_instances.append(projected_last_instance) pi = proposal_instances[extension_candidate_proposal_index] pi.generation_process = ["T+"] if frame_index > 0 else ["T-"] self.frame_ids.append(frame_index) self.proposal_instances.append(pi) self.proposal_instances_ious.append( iou_per_proposal[extension_candidate_proposal_index]) self.last_key_instance_index.append(len(self.last_key_instance_index)) self.projected_proposal_instances.append(projected_last_instance) return result_iou_mask