def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): anchors = F.concat(anchors_list, axis=0) labels_list = [] offsets_list = [] for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] overlaps = layers.get_iou(gt_boxes[:, :4], anchors) matched_indices, labels = self.matcher(overlaps) offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4]) # sample positive labels num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio) labels = layers.sample_labels(labels, num_positive, 1, -1) # sample negative labels num_positive = (labels == 1).sum().astype(np.int32) num_negative = self.cfg.num_sample_anchors - num_positive labels = layers.sample_labels(labels, num_negative, 0, -1) labels_list.append(labels) offsets_list.append(offsets) return ( F.concat(labels_list, axis=0).detach(), F.concat(offsets_list, axis=0).detach(), )
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(gt_boxes.shape[0]): num_valid_boxes = im_info[bid, 4].astype("int32") gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid) gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois]) overlaps = layers.get_iou(all_rois[:, 1:5], gt_boxes_per_img) max_overlaps = overlaps.max(axis=1) gt_assignment = F.argmax(overlaps, axis=1).astype("int32") labels = gt_boxes_per_img[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0) bg_mask = ((max_overlaps >= self.cfg.bg_threshold_low) & (max_overlaps < self.cfg.bg_threshold_high)) num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio) fg_inds_mask = layers.sample_mask_from_labels( fg_mask, num_fg_rois, 1) num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum()) bg_inds_mask = layers.sample_mask_from_labels( bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[:min(self.cfg.num_rois, keep_inds.shape[0])] labels = labels[keep_inds].astype("int32") rois = all_rois[keep_inds] target_boxes = gt_boxes_per_img[gt_assignment[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.concat(return_rois, axis=0).detach(), F.concat(return_labels, axis=0).detach(), F.concat(return_bbox_targets, axis=0).detach(), )
def get_ground_truth(self, anchors, batched_gt_boxes, batched_valid_gt_box_number): total_anchors = anchors.shape[0] labels_cat_list = [] bbox_targets_list = [] for b_id in range(self.batch_size): gt_boxes = batched_gt_boxes[b_id, : batched_valid_gt_box_number[b_id]] overlaps = layers.get_iou(anchors, gt_boxes[:, :4]) argmax_overlaps = F.argmax(overlaps, axis=1) max_overlaps = overlaps.ai[ F.linspace(0, total_anchors - 1, total_anchors).astype(np.int32), argmax_overlaps, ] labels = mge.tensor([-1]).broadcast(total_anchors) labels = labels * (max_overlaps >= self.cfg.negative_thresh) labels = labels * (max_overlaps < self.cfg.positive_thresh) + ( max_overlaps >= self.cfg.positive_thresh ) bbox_targets = self.box_coder.encode( anchors, gt_boxes.ai[argmax_overlaps, :4] ) labels_cat = gt_boxes.ai[argmax_overlaps, 4] labels_cat = labels_cat * (1.0 - F.less_equal(F.abs(labels), 1e-5)) ignore_mask = F.less_equal(F.abs(labels + 1), 1e-5) labels_cat = labels_cat * (1 - ignore_mask) - ignore_mask # assign low_quality boxes if self.cfg.allow_low_quality: gt_argmax_overlaps = F.argmax(overlaps, axis=0) labels_cat = labels_cat.set_ai(gt_boxes[:, 4])[gt_argmax_overlaps] matched_low_bbox_targets = self.box_coder.encode( anchors.ai[gt_argmax_overlaps, :], gt_boxes[:, :4] ) bbox_targets = bbox_targets.set_ai(matched_low_bbox_targets)[ gt_argmax_overlaps, : ] labels_cat_list.append(F.add_axis(labels_cat, 0)) bbox_targets_list.append(F.add_axis(bbox_targets, 0)) return ( F.zero_grad(F.concat(labels_cat_list, axis=0)), F.zero_grad(F.concat(bbox_targets_list, axis=0)), )
def get_ground_truth(self, anchors, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] overlaps = layers.get_iou(gt_boxes[:, :4], anchors) match_indices, labels = self.matcher(overlaps) gt_boxes_matched = gt_boxes[match_indices] fg_mask = labels == 1 labels[fg_mask] = gt_boxes_matched[fg_mask, 4].astype(np.int32) offsets = self.box_coder.encode(anchors, gt_boxes_matched[:, :4]) labels_list.append(labels) offsets_list.append(offsets) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), )
def per_level_gt(self, gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = self.cfg.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:im_info[4], :] # compute the iou matrix overlaps = layers.get_iou(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] max_overlaps = F.max(overlaps, axis=1) argmax_overlaps = F.argmax(overlaps, axis=1) # all ignore labels = mge.ones(a_shp0).astype("int32") * ignore_label # set negative ones labels = labels * (max_overlaps >= self.cfg.rpn_negative_overlap) # set positive ones fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap const_one = mge.tensor(1.0) if allow_low_quality_matches: # make sure that max iou of gt matched gt_argmax_overlaps = F.argmax(overlaps, axis=0) num_valid_boxes = valid_gt_boxes.shapeof(0) gt_id = F.linspace(0, num_valid_boxes - 1, num_valid_boxes).astype("int32") argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps] max_overlaps = max_overlaps.set_ai( const_one.broadcast(num_valid_boxes))[gt_argmax_overlaps] fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap # set positive ones _, fg_mask_ind = F.cond_take(fg_mask == 1, fg_mask) labels = labels.set_ai(const_one.broadcast( fg_mask_ind.shapeof(0)))[fg_mask_ind] # compute the targets bbox_targets = self.box_coder.encode( anchors, valid_gt_boxes.ai[argmax_overlaps, :4]) return labels, bbox_targets
def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts): labels_list = [] offsets_list = [] ctrness_list = [] all_level_anchors = F.concat(anchors_list, axis=0) for bid in range(batched_gt_boxes.shape[0]): gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]] ious = [] candidate_idxs = [] base = 0 for stride, anchors_i in zip(self.cfg.stride, anchors_list): ious.append( layers.get_iou( gt_boxes[:, :4], F.concat([ anchors_i - stride * self.cfg.anchor_scale / 2, anchors_i + stride * self.cfg.anchor_scale / 2, ], axis=1))) gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2 distances = F.sqrt( F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2, axis=2)) _, topk_idxs = F.topk(distances, self.cfg.anchor_topk) candidate_idxs.append(base + topk_idxs) base += anchors_i.shape[0] ious = F.concat(ious, axis=1) candidate_idxs = F.concat(candidate_idxs, axis=1) candidate_ious = F.gather(ious, 1, candidate_idxs) ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) + F.std(candidate_ious, axis=1, keepdims=True)) is_foreground = F.scatter( F.zeros(ious.shape), 1, candidate_idxs, F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr) is_in_boxes = F.min(self.point_coder.encode( all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)), axis=2) > 0 ious[~is_foreground] = -1 ious[~is_in_boxes] = -1 match_indices = F.argmax(ious, axis=0) gt_boxes_matched = gt_boxes[match_indices] anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0) labels = gt_boxes_matched[:, 4].astype(np.int32) labels[anchor_max_iou == -1] = 0 offsets = self.point_coder.encode(all_level_anchors, gt_boxes_matched[:, :4]) left_right = offsets[:, [0, 2]] top_bottom = offsets[:, [1, 3]] ctrness = F.sqrt( F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1), lower=0) * F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1), lower=0)) labels_list.append(labels) offsets_list.append(offsets) ctrness_list.append(ctrness) return ( F.stack(labels_list, axis=0).detach(), F.stack(offsets_list, axis=0).detach(), F.stack(ctrness_list, axis=0).detach(), )
def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes, im_info): # pylint: disable=too-many-statements def positive_bag_loss(logits, axis=1): weight = 1.0 / (1.0 - logits) weight /= weight.sum(axis=axis, keepdims=True) bag_prob = (weight * logits).sum(axis=1) return -layers.safelog(bag_prob) def negative_bag_loss(logits, gamma): return (logits**gamma) * (-layers.safelog(1.0 - logits)) pred_scores = F.sigmoid(pred_logits) box_prob_list = [] positive_losses = [] clamp_eps = 1e-7 bucket_size = self.cfg.bucket_size for bid in range(im_info.shape[0]): boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")] # id 0 is used for background classes, so -1 first labels = boxes_info[:, 4].astype("int32") - 1 pred_box = self.box_coder.decode(anchors, pred_offsets[bid]).detach() overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach() thresh1 = self.cfg.box_iou_threshold thresh2 = F.clip(overlaps.max(axis=1, keepdims=True), lower=thresh1 + clamp_eps, upper=1.0) gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1), lower=0, upper=1.0) image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach() # guarantee that nonzero_idx is not empty if gt_pred_prob.max() > clamp_eps: _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob) # since nonzeros is only 1 dim, use num_anchor to get real indices num_anchors = gt_pred_prob.shape[1] anchors_idx = nonzero_idx % num_anchors gt_idx = nonzero_idx // num_anchors image_boxes_prob[anchors_idx, labels[gt_idx]] = gt_pred_prob[gt_idx, anchors_idx] box_prob_list.append(image_boxes_prob) # construct bags for objects match_quality_matrix = layers.get_iou(boxes_info[:, :4], anchors).detach() num_gt = match_quality_matrix.shape[0] _, matched_idx = F.topk( match_quality_matrix, k=bucket_size, descending=True, no_sort=True, ) matched_idx = matched_idx.detach() matched_idx_flatten = matched_idx.reshape(-1) gather_idx = labels.reshape(-1, 1) gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size)) gather_src = pred_scores[bid, matched_idx_flatten] gather_src = gather_src.reshape(num_gt, bucket_size, -1) matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2) topk_anchors = anchors[matched_idx_flatten] boxes_broad_cast = F.broadcast_to( F.expand_dims(boxes_info[:, :4], axis=1), (num_gt, bucket_size, 4)).reshape(-1, 4) matched_offsets = self.box_coder.encode(topk_anchors, boxes_broad_cast) reg_loss = layers.smooth_l1_loss( pred_offsets[bid, matched_idx_flatten], matched_offsets, beta=self.cfg.smooth_l1_beta).sum( axis=-1) * self.cfg.reg_loss_weight matched_reg_scores = F.exp(-reg_loss) positive_losses.append( positive_bag_loss(matched_score * matched_reg_scores.reshape(-1, bucket_size), axis=1)) num_foreground = im_info[:, 4].sum() pos_loss = F.concat(positive_losses).sum() / F.maximum( 1.0, num_foreground) box_probs = F.stack(box_prob_list, axis=0) neg_loss = negative_bag_loss( pred_scores * (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum( 1.0, num_foreground * bucket_size) alpha = self.cfg.focal_loss_alpha pos_loss = pos_loss * alpha neg_loss = neg_loss * (1 - alpha) loss_dict = { "total_loss": pos_loss + neg_loss, "pos_loss": pos_loss, "neg_loss": neg_loss, } return loss_dict
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(self.cfg.batch_per_gpu): num_valid_boxes = im_info[bid, 4] gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = mge.ones((gt_boxes_per_img.shapeof(0), 1)) * bid # if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid _, batch_roi_inds = F.cond_take(batch_roi_mask == 1, batch_roi_mask) # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois]) overlaps_normal, overlaps_ignore = layers.get_iou( all_rois[:, 1:5], gt_boxes_per_img, return_ignore=True, ) max_overlaps_normal = overlaps_normal.max(axis=1) gt_assignment_normal = F.argmax(overlaps_normal, axis=1) max_overlaps_ignore = overlaps_ignore.max(axis=1) gt_assignment_ignore = F.argmax(overlaps_ignore, axis=1) ignore_assign_mask = (max_overlaps_normal < self.cfg.fg_threshold) * ( max_overlaps_ignore > max_overlaps_normal ) max_overlaps = ( max_overlaps_normal * (1 - ignore_assign_mask) + max_overlaps_ignore * ignore_assign_mask ) gt_assignment = ( gt_assignment_normal * (1 - ignore_assign_mask) + gt_assignment_ignore * ignore_assign_mask ) gt_assignment = gt_assignment.astype("int32") labels = gt_boxes_per_img.ai[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) * ( labels != self.cfg.ignore_label ) bg_mask = (max_overlaps < self.cfg.bg_threshold_high) * ( max_overlaps >= self.cfg.bg_threshold_low ) num_fg_rois = self.cfg.num_rois * self.cfg.fg_ratio fg_inds_mask = self._bernoulli_sample_masks(fg_mask, num_fg_rois, 1) num_bg_rois = self.cfg.num_rois - fg_inds_mask.sum() bg_inds_mask = self._bernoulli_sample_masks(bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[: F.minimum(self.cfg.num_rois, keep_inds.shapeof(0))] # labels labels = labels.ai[keep_inds].astype("int32") rois = all_rois.ai[keep_inds] target_boxes = gt_boxes_per_img.ai[gt_assignment.ai[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.zero_grad(F.concat(return_rois, axis=0)), F.zero_grad(F.concat(return_labels, axis=0)), F.zero_grad(F.concat(return_bbox_targets, axis=0)), )