def __call__(self, matched_idxs): """ Arguments: matched idxs: list of tensors containing -1, 0 or positive values. Each tensor corresponds to a specific image. -1 values are ignored, 0 are considered as negatives and > 0 as positives. Returns: pos_idx (list[tensor]) neg_idx (list[tensor]) Returns two lists of binary masks for each image. The first list contains the positive elements that were selected, and the second list the negative example. """ pos_idx = [] neg_idx = [] for matched_idxs_per_image in matched_idxs: positive = jt.nonzero(matched_idxs_per_image >= 1).squeeze(1) negative = jt.nonzero(matched_idxs_per_image == 0).squeeze(1) num_pos = int(self.batch_size_per_image * self.positive_fraction) # protect against not enough positive examples num_pos = min(positive.numel(), num_pos) num_neg = self.batch_size_per_image - num_pos # protect against not enough negative examples num_neg = min(negative.numel(), num_neg) # randomly select positive and negative examples perm1 = jt.randperm(positive.numel())[:num_pos] perm2 = jt.randperm(negative.numel())[:num_neg] pos_idx_per_image = positive[perm1] neg_idx_per_image = negative[perm2] # create binary mask from indices pos_idx_per_image_mask = jt.zeros_like( matched_idxs_per_image).bool() neg_idx_per_image_mask = jt.zeros_like( matched_idxs_per_image).bool() pos_idx_per_image_mask[pos_idx_per_image] = 1 neg_idx_per_image_mask[neg_idx_per_image] = 1 pos_idx.append(pos_idx_per_image_mask) neg_idx.append(neg_idx_per_image_mask) return pos_idx, neg_idx
def __call__(self, proposals, keypoint_logits): heatmaps = [] valid = [] for proposals_per_image in proposals: kp = proposals_per_image.get_field("keypoints") heatmaps_per_image, valid_per_image = project_keypoints_to_heatmap( kp, proposals_per_image, self.discretization_size) heatmaps.append(heatmaps_per_image.reshape(-1)) valid.append(valid_per_image.reshape(-1)) keypoint_targets = cat(heatmaps, dim=0) valid = cat(valid, dim=0).bool() valid = jt.nonzero(valid).squeeze(1) # torch.mean (in binary_cross_entropy_with_logits) does'nt # accept empty tensors, so handle it sepaartely if keypoint_targets.numel() == 0 or len(valid) == 0: return keypoint_logits.sum() * 0 N, K, H, W = keypoint_logits.shape keypoint_logits = keypoint_logits.reshape(N * K, H * W) keypoint_loss = nn.cross_entropy_loss(keypoint_logits[valid], keypoint_targets[valid]) return keypoint_loss
def select_top_predictions(self, predictions): """ Select only predictions which have a `score` > self.confidence_threshold, and returns the predictions in descending order of score Arguments: predictions (BoxList): the result of the computation by the model. It should contain the field `scores`. Returns: prediction (BoxList): the detected objects. Additional information of the detection properties can be found in the fields of the BoxList via `prediction.fields()` """ if predictions.has_field("mask_scores"): scores = predictions.get_field("mask_scores") else: scores = predictions.get_field("scores") if scores.shape[0]==0: return None keep = jt.nonzero(scores>self.confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") idx,_ = jt.argsort(scores,0, descending=True) return predictions[idx]
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets, mask_ratios = self.prepare_targets( proposals, targets) labels = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) positive_inds = jt.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] # accept empty tensors, so handle it separately if mask_targets.numel() == 0: if not self.maskiou_on: return mask_logits.sum() * 0 else: selected_index = jt.arange(mask_logits.shape[0]) selected_mask = mask_logits[selected_index, labels] mask_num, mask_h, mask_w = selected_mask.shape selected_mask = selected_mask.reshape(mask_num, 1, mask_h, mask_w) return mask_logits.sum() * 0, selected_mask, labels, None if self.maskiou_on: mask_ratios = cat(mask_ratios, dim=0) value_eps = 1e-10 * jt.ones((mask_targets.shape[0], )) mask_ratios = jt.maximum(mask_ratios, value_eps) pred_masks = mask_logits[positive_inds, labels_pos] pred_masks[:] = pred_masks > 0 mask_targets_full_area = mask_targets.sum( dims=[1, 2]) / mask_ratios mask_ovr = pred_masks * mask_targets mask_ovr_area = mask_ovr.sum(dims=[1, 2]) mask_union_area = pred_masks.sum( dims=[1, 2]) + mask_targets_full_area - mask_ovr_area value_1 = jt.ones((pred_masks.shape[0], )) value_0 = jt.zeros((pred_masks.shape[0], )) mask_union_area = jt.maximum(mask_union_area, value_1) mask_ovr_area = jt.maximum(mask_ovr_area, value_0) maskiou_targets = mask_ovr_area / mask_union_area binary_cross_entropy_with_logits = nn.BCEWithLogitsLoss() mask_loss = binary_cross_entropy_with_logits( mask_logits[positive_inds, labels_pos], mask_targets) if not self.maskiou_on: return mask_loss else: selected_index = jt.index((mask_logits.shape[0], ), dim=0) selected_mask = mask_logits[selected_index, labels] mask_num, mask_h, mask_w = selected_mask.shape selected_mask = selected_mask.reshape(mask_num, 1, mask_h, mask_w) selected_mask = selected_mask.sigmoid() return mask_loss, selected_mask, labels, maskiou_targets
def __call__(self, proposals, mask_logits, targets): """ Arguments: proposals (list[BoxList]) mask_logits (Tensor) targets (list[BoxList]) Return: mask_loss (Tensor): scalar tensor containing the loss """ labels, mask_targets = self.prepare_targets(proposals, targets) labels = cat(labels, dim=0) mask_targets = cat(mask_targets, dim=0) positive_inds = jt.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: return mask_logits.sum() * 0 binary_cross_entropy_with_logits = nn.BCEWithLogitsLoss() mask_loss = binary_cross_entropy_with_logits( mask_logits[positive_inds, labels_pos], mask_targets) return mask_loss
def subsample(self, proposals, targets): """ This method performs the positive/negative sampling, and return the sampled proposals. Note: this function keeps a state. Arguments: proposals (list[BoxList]) targets (list[BoxList]) """ labels, keypoints = self.prepare_targets(proposals, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) proposals = list(proposals) # add corresponding label and regression_targets information to the bounding boxes for labels_per_image, keypoints_per_image, proposals_per_image in zip( labels, keypoints, proposals): proposals_per_image.add_field("labels", labels_per_image) proposals_per_image.add_field("keypoints", keypoints_per_image) # distributed sampled proposals, that were obtained on all feature maps # concatenated via the fg_bg_sampler, into individual feature map levels for img_idx, (pos_inds_img, neg_inds_img) in enumerate( zip(sampled_pos_inds, sampled_neg_inds)): img_sampled_inds = jt.nonzero(pos_inds_img).squeeze(1) proposals_per_image = proposals[img_idx][img_sampled_inds] proposals[img_idx] = proposals_per_image self._proposals = proposals return proposals
def prepare_targets(self, proposals, targets): labels = [] masks = [] mask_ratios = [] for proposals_per_image, targets_per_image in zip(proposals, targets): matched_targets = self.match_targets_to_proposals( proposals_per_image, targets_per_image) matched_idxs = matched_targets.get_field("matched_idxs") labels_per_image = matched_targets.get_field("labels") labels_per_image = labels_per_image.int32() # this can probably be removed, but is left here for clarity # and completeness neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD labels_per_image[neg_inds] = 0 # mask scores are only computed on positive samples positive_inds = jt.nonzero(labels_per_image > 0).squeeze(1) segmentation_masks = matched_targets.get_field("masks") segmentation_masks = segmentation_masks[positive_inds] positive_proposals = proposals_per_image[positive_inds] masks_per_image, mask_ratios_per_image = project_masks_on_boxes( segmentation_masks, positive_proposals, self.discretization_size, self.maskiou_on) labels.append(labels_per_image) masks.append(masks_per_image) mask_ratios.append(mask_ratios_per_image) return labels, masks, mask_ratios
def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): """ Produce additional matches for predictions that have only low-quality matches. Specifically, for each ground-truth find the set of predictions that have maximum overlap with it (including ties); for each prediction in that set, if it is unmatched, then match it to the ground-truth with which it has the highest quality value. """ # For each gt, find the prediction with which it has highest quality highest_quality_foreach_gt = match_quality_matrix.max(dim=1) # Find highest quality match available, even if it is low, including ties gt_pred_pairs_of_highest_quality = jt.nonzero( match_quality_matrix == highest_quality_foreach_gt.unsqueeze(1) ) # Example gt_pred_pairs_of_highest_quality: # tensor([[ 0, 39796], # [ 1, 32055], # [ 1, 32070], # [ 2, 39190], # [ 2, 40255], # [ 3, 40390], # [ 3, 41455], # [ 4, 45470], # [ 5, 45325], # [ 5, 46390]]) # Each row is a (gt index, prediction index) # Note how gt items 1, 2, 3, and 5 each have two ties pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): # multiclass nms result = boxlist_ml_nms(boxlists[i], self.nms_thresh) #print('ml_nms',jt.mean(result.bbox)) #print('scores',jt.mean(result.get_field("scores"))) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = jt.kthvalue( cls_scores, number_of_detections - self.fpn_post_nms_top_n + 1) #print(number_of_detections - self.fpn_post_nms_top_n + 1,self.fpn_post_nms_top_n,image_thresh) keep = cls_scores >= image_thresh.item() keep = jt.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[list[BoxList]]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor) """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = jt.nonzero( jt.contrib.concat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = jt.nonzero( jt.contrib.concat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = jt.contrib.concat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = concat_box_prediction_layers( objectness, box_regression) objectness = objectness.squeeze(1) labels = jt.contrib.concat(labels, dim=0) regression_targets = jt.contrib.concat(regression_targets, dim=0) box_loss = _smooth_l1_loss(box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], sigma=3.) / (sampled_inds.numel()) # bce_loss_with_logits = nn.BCEWithLogitsLoss() # objectness_loss = bce_loss_with_logits( # objectness[sampled_inds], labels[sampled_inds] # ) objectness_loss = nn.bce_loss(objectness[sampled_inds].sigmoid(), labels[sampled_inds]) return objectness_loss, box_loss
def __call__(self, labels, pred_maskiou, gt_maskiou): positive_inds = jt.nonzero(labels > 0).squeeze(1) labels_pos = labels[positive_inds] if labels_pos.numel() == 0: return pred_maskiou.sum() * 0 gt_maskiou = gt_maskiou.detach() maskiou_loss = l2_loss(pred_maskiou[positive_inds, labels_pos], gt_maskiou) maskiou_loss = self.loss_weight * maskiou_loss return maskiou_loss
def l2_loss(input, target): """ very similar to the smooth_l1_loss , but with the extra beta parameter """ pos_inds = jt.nonzero(target > 0.0).squeeze(1) if pos_inds.shape[0] > 0: cond = jt.abs(input[pos_inds] - target[pos_inds]) loss = 0.5 * cond**2 / pos_inds.shape[0] else: loss = input * 0.0 return loss.sum()
def __call__(self, class_logits, box_regression): """ Computes the loss for Faster R-CNN. This requires that the subsample method has been called beforehand. Arguments: class_logits (list[Tensor]) box_regression (list[Tensor]) Returns: classification_loss (Tensor) box_loss (Tensor) """ class_logits = cat(class_logits, dim=0) box_regression = cat(box_regression, dim=0) device = class_logits.device if not hasattr(self, "_proposals"): raise RuntimeError("subsample needs to be called before") proposals = self._proposals labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) regression_targets = cat([ proposal.get_field("regression_targets") for proposal in proposals ], dim=0) classification_loss = nn.cross_entropy_loss(class_logits, labels) # get indices that correspond to the regression targets for # the corresponding ground truth labels, to be used with # advanced indexing sampled_pos_inds_subset = jt.nonzero(labels > 0).squeeze(1) labels_pos = labels[sampled_pos_inds_subset] if self.cls_agnostic_bbox_reg: map_inds = jt.array([4, 5, 6, 7]) else: map_inds = 4 * labels_pos[:, None] + jt.array([0, 1, 2, 3]) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds_subset[:, None], map_inds], regression_targets[sampled_pos_inds_subset], size_average=False, beta=1, ) box_loss = box_loss / labels.numel() return classification_loss, box_loss
def execute(self, x, boxes): """ Arguments: x (list[Tensor]): feature maps for each level boxes (list[BoxList]): boxes to be used to perform the pooling operation. Returns: result (Tensor) """ num_levels = len(self.poolers) #print('boxes',boxes[0].bbox) rois = self.convert_to_roi_format(boxes) if num_levels == 1: return self.poolers[0](x[0], rois) levels = self.map_levels(boxes) num_rois = rois.shape[0] num_channels = x[0].shape[1] output_size = self.output_size[0] dtype = str(x[0].dtype) result = jt.zeros( (num_rois, num_channels, output_size, output_size), dtype=dtype, ) #print('rois',rois) i = 0 for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers.layers.values())): idx_in_level = jt.nonzero(levels == level).squeeze(1) rois_per_level = rois[idx_in_level] #print('idx_in_level',idx_in_level) #print('rois_per_level',rois_per_level) result[idx_in_level] = pooler(per_level_feature, rois_per_level).cast(dtype) #print(i,'---',pooler(per_level_feature, rois_per_level)) i += 1 return result
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = jt.contrib.concat(labels, dim=0) regression_targets = jt.contrib.concat(regression_targets, dim=0) pos_inds = jt.nonzero(labels > 0).squeeze(1) retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels) / (pos_inds.numel() + N) return retinanet_cls_loss, retinanet_regression_loss
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field="scores") num_labels = len(boxlist_for_class) boxlist_for_class.add_field("labels", jt.full((num_labels, ), j).int32()) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = jt.kthvalue( cls_scores, number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh keep = jt.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def run_model(config_file, img_f=None): original_image = load(img_f) from detectron.config import cfg from detectron.modeling.detector import build_detection_model from detectron.utils.checkpoint import DetectronCheckpointer from detectron.structures.image_list import to_image_list from detectron.modeling.roi_heads.mask_head.inference import Masker from jittor import transform as T from jittor import nn import jittor as jt from jittor_utils import auto_diff jt.flags.use_cuda = 1 confidence_threshold = 0.0 cfg.merge_from_file(config_file) model = build_detection_model(cfg) checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) _ = checkpointer.load(cfg.MODEL.WEIGHT) name = config_file.split('/')[-1].split('.')[0] # hook = auto_diff.Hook(name) # hook.hook_module(model) model.eval() class Resize(object): def __init__(self, min_size, max_size): self.min_size = min_size self.max_size = max_size # modified from torchvision to add support for max size def get_size(self, image_size): w, h = image_size size = self.min_size max_size = self.max_size if max_size is not None: min_original_size = float(min((w, h))) max_original_size = float(max((w, h))) if max_original_size / min_original_size * size > max_size: size = int( round(max_size * min_original_size / max_original_size)) if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def __call__(self, image): size = self.get_size(image.size) image = T.resize(image, size) return image def build_transform(): if cfg.INPUT.TO_BGR255: to_bgr_transform = T.Lambda(lambda x: x * 255) else: to_bgr_transform = T.Lambda(lambda x: x[[2, 1, 0]]) normalize_transform = T.ImageNormalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD) min_size = cfg.INPUT.MIN_SIZE_TEST max_size = cfg.INPUT.MAX_SIZE_TEST transform = T.Compose([ T.ToPILImage(), Resize(min_size, max_size), T.ToTensor(), to_bgr_transform, normalize_transform, ]) return transform transforms = build_transform() image = transforms(original_image) image_list = to_image_list(image, cfg.DATALOADER.SIZE_DIVISIBILITY) predictions = model(image_list) predictions = predictions[0] if predictions.has_field("mask_scores"): scores = predictions.get_field("mask_scores") else: scores = predictions.get_field("scores") keep = jt.nonzero(scores > confidence_threshold).squeeze(1) predictions = predictions[keep] scores = predictions.get_field("scores") idx, _ = jt.argsort(scores, 0, descending=True) predictions = predictions[idx] result_diff(predictions)
def __call__(self, locations, box_cls, box_regression, centerness, targets): """ Arguments: locations (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) centerness (list[Tensor]) targets (list[BoxList]) Returns: cls_loss (Tensor) reg_loss (Tensor) centerness_loss (Tensor) """ N = box_cls[0].size(0) num_classes = box_cls[0].size(1) // self.dense_points labels, reg_targets = self.prepare_targets(locations, targets) box_cls_flatten = [] box_regression_flatten = [] centerness_flatten = [] labels_flatten = [] reg_targets_flatten = [] for l in range(len(labels)): box_cls_flatten.append(box_cls[l].permute(0, 2, 3, 1).reshape(-1, num_classes)) box_regression_flatten.append(box_regression[l].permute(0, 2, 3, 1).reshape(-1, 4)) labels_flatten.append(labels[l].reshape(-1)) reg_targets_flatten.append(reg_targets[l].reshape(-1, 4)) centerness_flatten.append(centerness[l].permute(0, 2, 3, 1).reshape(-1)) box_cls_flatten = jt.contrib.concat(box_cls_flatten, dim=0) box_regression_flatten = jt.contrib.concat(box_regression_flatten, dim=0) centerness_flatten = jt.contrib.concat(centerness_flatten, dim=0) labels_flatten = jt.contrib.concat(labels_flatten, dim=0) reg_targets_flatten = jt.contrib.concat(reg_targets_flatten, dim=0) pos_inds = jt.nonzero(labels_flatten > 0).squeeze(1) cls_loss = self.cls_loss_func( box_cls_flatten, labels_flatten.int() ) / (pos_inds.numel() + N) # add N to avoid dividing by a zero box_regression_flatten = box_regression_flatten[pos_inds] reg_targets_flatten = reg_targets_flatten[pos_inds] centerness_flatten = centerness_flatten[pos_inds] if pos_inds.numel() > 0: centerness_targets = self.compute_centerness_targets(reg_targets_flatten) reg_loss = self.box_reg_loss_func( box_regression_flatten, reg_targets_flatten, centerness_targets, ) centerness_loss = self.centerness_loss_func( centerness_flatten, centerness_targets ) else: reg_loss = box_regression_flatten.sum() centerness_loss = centerness_flatten.sum() return cls_loss, reg_loss, centerness_loss
def __call__(self, locations, box_cls, box_regression, centerness, proposal_embed, proposal_margin, pixel_embed, targets): """ Arguments: locations (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) centerness (list[Tensor]) targets (list[BoxList]) Returns: cls_loss (Tensor) reg_loss (Tensor) centerness_loss (Tensor) """ num_classes = box_cls[0].size(1) im_h = box_cls[4].shape[2] * self.fpn_strides[4] im_w = box_cls[4].shape[3] * self.fpn_strides[4] labels_per_level, reg_targets_per_level, labels, reg_targets, matched_idxes = self.prepare_targets( locations, targets, im_w, im_h) box_cls_flatten = [] box_regression_flatten = [] centerness_flatten = [] labels_flatten = [] reg_targets_flatten = [] for l in range(len(labels_per_level)): box_cls_flatten.append(box_cls[l].transpose(0, 2, 3, 1).reshape( -1, num_classes)) box_regression_flatten.append(box_regression[l].transpose( 0, 2, 3, 1).reshape(-1, 4)) labels_flatten.append(labels_per_level[l].reshape(-1)) reg_targets_flatten.append(reg_targets_per_level[l].reshape(-1, 4)) centerness_flatten.append(centerness[l].reshape(-1)) box_cls_flatten = jt.contrib.concat(box_cls_flatten, dim=0) box_regression_flatten = jt.contrib.concat(box_regression_flatten, dim=0) centerness_flatten = jt.contrib.concat(centerness_flatten, dim=0) labels_flatten = jt.contrib.concat(labels_flatten, dim=0) reg_targets_flatten = jt.contrib.concat(reg_targets_flatten, dim=0) pos_inds = jt.nonzero(labels_flatten > 0).squeeze(1) box_regression_flatten = box_regression_flatten[pos_inds] reg_targets_flatten = reg_targets_flatten[pos_inds] centerness_flatten = centerness_flatten[pos_inds] num_gpus = get_num_gpus() # sync num_pos from all gpus total_num_pos = reduce_sum(pos_inds.new_tensor([pos_inds.numel() ])).item() num_pos_avg_per_gpu = max(total_num_pos / float(num_gpus), 1.0) cls_loss = self.cls_loss_func( box_cls_flatten, labels_flatten.int()) / num_pos_avg_per_gpu if pos_inds.numel() > 0: centerness_targets = self.compute_centerness_targets( reg_targets_flatten) # average sum_centerness_targets from all gpus, # which is used to normalize centerness-weighed reg loss sum_centerness_targets_avg_per_gpu = \ reduce_sum(centerness_targets.sum()).item() / float(num_gpus) reg_loss = self.box_reg_loss_func( box_regression_flatten, reg_targets_flatten, centerness_targets) / sum_centerness_targets_avg_per_gpu centerness_loss = self.centerness_loss_func( centerness_flatten, centerness_targets) / num_pos_avg_per_gpu else: reg_loss = box_regression_flatten.sum() reduce_sum(centerness_flatten.new_tensor([0.0])) centerness_loss = centerness_flatten.sum() #################################### Mask Related Losses ###################################### # get positive proposal labels for each gt instance pos_proposal_labels_for_targets = self.get_pos_proposal_indexes( locations, box_regression, matched_idxes, targets) # get positive samples of embeddings & margins for each gt instance proposal_embed_for_targets, valids_for_targets = self.get_proposal_element( proposal_embed, pos_proposal_labels_for_targets) proposal_margin_for_targets, _ = self.get_proposal_element( proposal_margin, pos_proposal_labels_for_targets) ######## MEANINGLESS_LOSS ####### mask_loss = box_cls[0].new_tensor(0.0) for i in range(len(proposal_embed)): mask_loss += 0 * proposal_embed[i].sum() mask_loss += 0 * proposal_margin[i].sum() mask_loss += 0 * pixel_embed.sum() ############ Mask Losses ############## # get target masks in prefer size N, _, m_h, m_w = pixel_embed.shape o_h = m_h * self.mask_scale_factor o_w = m_w * self.mask_scale_factor r_h = int(m_h * self.fpn_strides[0]) r_w = int(m_w * self.fpn_strides[0]) stride = self.fpn_strides[0] / self.mask_scale_factor targets_masks = [ target_im.get_field('masks').convert('mask').instances.masks.to( device=pixel_embed.device) for target_im in targets ] masks_t = self.prepare_masks(o_h, o_w, r_h, r_w, targets_masks) pixel_embed = interpolate(input=pixel_embed, size=(o_h, o_w), mode="bilinear", align_corners=False) if self.loss_mask_alpha > 0: for im in range(N): valid = valids_for_targets[im] if valid.sum() == 0: continue proposal_embed_im = proposal_embed_for_targets[im][valid] proposal_margin_im = proposal_margin_for_targets[im][valid] masks_t_im = masks_t[im][valid] boxes_t_im = targets[im].bbox[valid] / stride masks_prob = self.compute_mask_prob(proposal_embed_im, proposal_margin_im, pixel_embed[im]) masks_prob_crop, crop_mask = crop_by_box( masks_prob, boxes_t_im, self.box_padding) mask_loss_per_target = self.mask_loss_func(masks_prob_crop, masks_t_im, mask=crop_mask, act=True) mask_loss += mask_loss_per_target.mean() mask_loss = mask_loss / N * self.loss_mask_alpha return cls_loss, reg_loss, centerness_loss, mask_loss
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class # inds_all = (scores > self.score_thresh).int() inds_all = scores > self.score_thresh # print(self.score_thresh,num_classes) # print(inds_all.shape) # inds_all = inds_all.transpose(1,0) inds_nonzeros = [ inds_all[:,j].nonzero() for j in range(1, num_classes) ] jt.sync(inds_nonzeros) for j in range(1, num_classes): # with nvtx_scope("aa"): # inds = inds_all[:,j].nonzero().squeeze(1) # with nvtx_scope("bb"): # scores_j = scores[inds, j] # boxes_j = boxes[inds, j * 4 : (j + 1) * 4] # with nvtx_scope("cc"): # boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") # with nvtx_scope("cc2"): # boxlist_for_class.add_field("scores", scores_j) # with nvtx_scope("cc3"): # boxlist_for_class = boxlist_nms( # boxlist_for_class, self.nms # ) # with nvtx_scope("dd"): # num_labels = len(boxlist_for_class) # with nvtx_scope("dd2"): # boxlist_for_class.add_field( # "labels", jt.full((num_labels,), j).int32() # ) # result.append(boxlist_for_class) # inds = inds_all[:,j].nonzero().squeeze(1) inds = inds_nonzeros[j-1] if inds.shape[0] == 0: continue inds = inds.squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4 : (j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms ) num_labels = len(boxlist_for_class) # print(j,num_labels) boxlist_for_class.add_field( "labels", jt.full((num_labels,), j).int32() ) result.append(boxlist_for_class) result = cat_boxlist(result) if not result.has_field('labels'): result.add_field('labels',jt.empty((0,))) if not result.has_field('scores'): result.add_field('scores',jt.empty((0,))) number_of_detections = len(result) #Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = jt.kthvalue( cls_scores, number_of_detections - self.detections_per_img + 1 ) keep = cls_scores >= image_thresh keep = jt.nonzero(keep).squeeze(1) result = result[keep] # # Absolute limit detection imgs # if number_of_detections > self.detections_per_img > 0: # cls_scores = result.get_field("scores") # scores, indices = jt.topk( # cls_scores, self.detections_per_img # ) # result = result[indices] return result