def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression ): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1 ) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds] ) return objectness_loss, box_loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[list[BoxList]]), 第一个维度是img_batch, 第二个维度是level 每个level的anchor是一个BoxList对象 objectness (list[Tensor]), 第一个维度是level box_regression (list[Tensor]), 第一个维度是level targets (list[BoxList]), 第一个维度是img_batch Returns: objectness_loss (Tensor) box_loss (Tensor) """ # anchors: [num_imgs, (x)num_levels(个boxlist)] --> [num_imgs(个boxlist),] # 即将batch中每张图片各个level的boxlist对象合并成一个 anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] # labels: fg,bg,discard [img_batch, num_anchors] # regression_targets: t_x,t_y,t_w,t_h [img_batch, num_anchors, 4] labels, regression_targets = self.prepare_targets(anchors, targets) # 从所有预测值中随机采样一个batch的正负样本 [img_batch, num_anchors] sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) # 处理之前sampled_pos_inds和sampled_neg_inds: [img_batch, num_anchors], 可以看成二维矩阵 # 矩阵中每一行都是0和1两种值, 1的位置代表采样到的样本数量, 这两个变量同一行中1的数量相加之后是 # batch_size_per_image, 即从同一张图片中采样batch_size_per_image个正负样本 # 处理之后sampled_pos_inds和sampled_neg_inds: [all_sampled_inds], 处理过程是首先把img_batch # 展开, 展开后共有img_batch*num_anchors个数, 然后取出这些数中非0元素的索引值, # 取值范围是0~ img_batch*num_anchors-1, 后面对labels和regression_targets同样将img_batch展 # 开, 这样就可以使用这两个索引变量直接进行取值 sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) # [img_batch*batch_size_per_image] sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) # objectness: [[num_img, num_anchors, H, W], ...] --> [img_batch*num_anchors, 1] # box_regression: [[num_img, 4*num_anchors, H, W], ...] --> [img_batch*num_anchors, 4] objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) # [img_batch*num_anchors] objectness = objectness.squeeze() # [img_batch, num_anchors] --> [img_batch*num_anchors] labels = torch.cat(labels, dim=0) # [img_batch, num_anchors, 4] --> [img_batch*num_anchors, 4] regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) # sigmod结合交叉熵进行fg/bg二分类的损失函数 objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def __call__(self, anchors, box_cls, box_regression, coeffs, prototypes, targets): coeffs = concat_coeffs_prediction_layers(coeffs, self.num_prototypes) anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets, mask_targets, mask_pred, gt_boxes_area = \ self.prepare_targets_and_assemble(anchors, targets, coeffs, prototypes) N = len(labels) box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) mask_pred = torch.cat(mask_pred, dim=0) device = mask_pred.device mask_targets = torch.cat(mask_targets, dim=0).to(device, dtype=torch.float32) gt_boxes_area = torch.cat(gt_boxes_area, dim=0) if mask_pred.size(0) > self.mask_to_train: perm = torch.randperm(mask_pred.size(0)) select = perm[:self.mask_to_train] mask_pred = mask_pred[select] mask_targets = mask_targets[select] gt_boxes_area = gt_boxes_area[select] # only positive boxes contribute to regression loss and mask loss retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) # if DEBUG: # print('retinanet_regression_loss', box_regression[pos_inds].shape) # torch.mean (in binary_cross_entropy_with_logits) doesn't # accept empty tensors, so handle it separately if mask_targets.numel() == 0: yolact_mask_loss = mask_pred.sum() * 0 else: if self.mask_with_logits: yolact_mask_loss = F.binary_cross_entropy_with_logits(mask_pred, mask_targets, reduction='none') else: yolact_mask_loss = F.binary_cross_entropy(mask_pred, mask_targets, reduction='none') # if DEBUG: # print("gt_boxes_area:", gt_boxes_area) # if DEBUG: # print('yolact_mask_loss', mask_pred.shape) # reweight mask loss by dividing the area of ground-truth boxes yolact_mask_loss = yolact_mask_loss.sum(dim=(1, 2)) / gt_boxes_area yolact_mask_loss = yolact_mask_loss.sum() / (max(1, pos_inds.numel() * self.mask_norm)) if DEBUG: print('pos_inds.numel():', pos_inds.numel()) print('gt_boxes_area.shape:', gt_boxes_area.shape) labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / (pos_inds.numel() + N) # if DEBUG: # print('retinanet_cls_loss', box_cls.shape) return retinanet_cls_loss, retinanet_regression_loss, yolact_mask_loss
def forward(self, images, iteration=None, targets=None): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) # Retina RPN Output rpn_features = features mask_all = [] for rpn_feat in features: num_batch = rpn_feat.shape[0] num_channel = rpn_feat.shape[1] num_height = rpn_feat.shape[2] num_width = rpn_feat.shape[3] # compute cam with conv feat feat_channel_mean = torch.mean(rpn_feat.view( num_batch, num_channel, -1), dim=2) feat_channel_mean = feat_channel_mean.view(num_batch, num_channel, 1, 1) cam = torch.sum(rpn_feat * feat_channel_mean, 1) # [B 1 H W] mask_all.append(cam) # Inverted Attention if self.cfg.FREEANCHOR.IA_ON and self.training and iteration is not None: rpn_features_tmp = [] for feat_idx, rpn_feat in enumerate(rpn_features): rpn_features_tmp.append(rpn_feat.clone().detach()) rpn_features_tmp = tuple(rpn_features_tmp) # the ratio of IA max_iteration = self.cfg.SOLVER.MAX_ITER ratio = self.ratio_function(self.cfg.FREEANCHOR.IA_TYPE, max_iteration, iteration) if self.cfg.FREEANCHOR.IA_FEAT: if self.cfg.FREEANCHOR.IA_FEAT_TYPE == 0: mask = self.IA_feat(rpn_features_tmp, ratio) else: mask = self.IA_feat2(rpn_features_tmp, ratio) else: mask = self.IA_grad(images, rpn_features_tmp, targets, ratio) if self.cfg.RETINANET.BACKBONE == "p2p7": rpn_features = features[1:] if self.cfg.FREEANCHOR.IA_ON and self.training: # print('images.size(): ', images.size(), targets) (anchors, detections), detector_losses = self.rpn(images, rpn_features, mask, targets=targets) else: (anchors, detections), detector_losses = self.rpn(images, rpn_features, targets=targets) # print('anchors: ', anchors) # print('detections: ', detections) # print('detector_losses: ', detector_losses) # print('size 1: ', images.size()) # print('size 2: ', len(rpn_features)) # for idx in range(len(rpn_features)): # print('size 2: ', rpn_features[idx].size()) # print('size 3: ', len(targets)) # print('size 3: ', targets[0]) if self.training: losses = {} losses.update(detector_losses) if self.mask: if self.cfg.MODEL.MASK_ON: # Padding the GT proposals = [] for (image_detections, image_targets) in zip(detections, targets): merge_list = [] if not isinstance(image_detections, list): merge_list.append( image_detections.copy_with_fields('labels')) if not isinstance(image_targets, list): merge_list.append( image_targets.copy_with_fields('labels')) if len(merge_list) == 1: proposals.append(merge_list[0]) else: proposals.append(cat_boxlist(merge_list)) x, result, mask_losses = self.mask(features, proposals, targets) # print('x: ', x) # print('result: ', result) # print('mask_losses: ', mask_losses) elif self.cfg.MODEL.SPARSE_MASK_ON: x, result, mask_losses = self.mask(features, anchors, targets) # print('x: ', x) # print('result: ', result) # print('mask_losses: ', mask_losses) losses.update(mask_losses) return losses else: if self.mask: proposals = [] for image_detections in detections: num_of_detections = image_detections.bbox.shape[0] if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0: cls_scores = image_detections.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), num_of_detections - \ self.cfg.RETINANET.NUM_MASKS_TEST + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) image_detections = image_detections[keep] proposals.append(image_detections) if self.cfg.MODEL.SPARSE_MASK_ON: x, detections, mask_losses = self.mask( features, proposals, targets) else: x, detections, mask_losses = self.mask( features, proposals, targets) return detections """
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] has_offsets = boxlists[0].has_field("offsets") for i in range(num_images): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") if has_offsets: offsets = boxlists[i].get_field("offsets") locations = boxlists[i].get_field("locations") rec_masks = boxlists[i].get_field("rec_masks") beziers = boxlists[i].get_field("beziers") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) beziers_j = beziers[inds, :].view(-1, 16) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class.add_field("beziers", beziers_j) if has_offsets: boxlist_for_class.add_field( "offsets", offsets[inds]) boxlist_for_class.add_field( "locations", locations[inds]) boxlist_for_class.add_field( "rec_masks", rec_masks[inds]) boxlist_for_class = boxlist_nms( boxlist_for_class, self.nms_thresh, score_field="scores" ) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels,), j, dtype=torch.int64, device=scores.device) ) result.append(boxlist_for_class) result = cat_boxlist(result) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) # keep bbox_regression box_regression_reploss = cat(box_regression_flattened, dim=1) batches = box_regression_reploss.shape[0] num_anchors = box_regression_reploss.shape[1] objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) #import pdb #pdb.set_trace() box_loss_tmp = smooth_l1( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, ) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) ###################################################### anchor_flattened = [] for anchor_per in anchors: anchor_flattened.append(anchor_per.bbox) #assert len(anchor_flattened) <2,"Multi level anchor!" #anchors_bbox = cat(anchor_flattened, dim=0).reshape(-1,4) anchors_bbox = anchor_flattened targets_bbox_flattened = [] for targets_bbox_per in targets: targets_bbox_flattened.append(targets_bbox_per.bbox) #import pdb; pdb.set_trace() #targets_box = cat(targets_bbox_flattened, dim=0).reshape(-1,4) targets_box = targets_bbox_flattened RepGT_losses = 0 RepBox_losses = 0 tmp_index = 0 for batch in range(batches): box_regression_dx = box_regression_reploss[batch, :, 0] box_regression_dy = box_regression_reploss[batch, :, 1] box_regression_dw = box_regression_reploss[batch, :, 2] box_regression_dh = box_regression_reploss[batch, :, 3] #assert box_regression.shape[0] == anchors_bbox.shape[0],"Invalid shape with bbox_regression && anchors!" targets_box_batch = targets_box[batch] anchors_bbox_batch = anchors_bbox[batch] inds_ge = sampled_pos_inds.ge(batch * num_anchors) inds_le = sampled_pos_inds.le(batch * num_anchors + num_anchors - 1) inds_bet = inds_ge * inds_le sampled_pos_inds_batch = sampled_pos_inds[inds_bet] % num_anchors if len(sampled_pos_inds_batch) != 0: anchors_bbox_cx = (anchors_bbox_batch[:, 0] + anchors_bbox_batch[:, 2]) / 2.0 anchors_bbox_cy = (anchors_bbox_batch[:, 1] + anchors_bbox_batch[:, 3]) / 2.0 anchors_bbox_w = anchors_bbox_batch[:, 2] - anchors_bbox_batch[:, 0] + 1 anchors_bbox_h = anchors_bbox_batch[:, 3] - anchors_bbox_batch[:, 1] + 1 predict_w = torch.exp(box_regression_dw) * anchors_bbox_w predict_h = torch.exp(box_regression_dh) * anchors_bbox_h predict_x = box_regression_dx * anchors_bbox_w + anchors_bbox_cx predict_y = box_regression_dy * anchors_bbox_h + anchors_bbox_cy predict_x1 = predict_x - 0.5 * predict_w predict_y1 = predict_y - 0.5 * predict_h predict_x2 = predict_x + 0.5 * predict_w predict_y2 = predict_y + 0.5 * predict_h predict_boxes = torch.stack( (predict_x1, predict_y1, predict_x2, predict_y2)).t() predict_boxes_pos = predict_boxes[sampled_pos_inds_batch, :] IoU = calc_iou( anchors_bbox_batch, targets_box_batch[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 #add RepGT losses IoU_pos = IoU[sampled_pos_inds_batch, :] IoU_max_keep, IoU_argmax_keep = torch.max( IoU_pos, dim=1, keepdim=True) # num_anchors x 1 for idx in range(IoU_argmax_keep.shape[0]): IoU_pos[idx, IoU_argmax_keep[idx]] = -1 IoU_sec, IoU_argsec = torch.max(IoU_pos, dim=1) assigned_annotations_sec = targets_box_batch[IoU_argsec, :] box_loss_tmp_batch = box_loss_tmp[tmp_index:tmp_index + sampled_pos_inds_batch. shape[0]] box_loss_tmp_batch = torch.sum(box_loss_tmp_batch, dim=1) IoG_to_minimize = IoG(assigned_annotations_sec, predict_boxes_pos) RepGT_loss = smooth_ln(IoG_to_minimize, 0.5) RepGT_loss = RepGT_loss * torch.lt(0.1 * RepGT_loss, box_loss_tmp_batch).float() RepGT_loss = RepGT_loss.mean() / sampled_pos_inds.numel() RepGT_losses += RepGT_loss #add RepBox losses IoU_argmax_pos = IoU_argmax[sampled_pos_inds_batch].float() IoU_argmax_pos = IoU_argmax_pos.unsqueeze(0).t() predict_boxes_pos = torch.cat( [predict_boxes_pos, IoU_argmax_pos], dim=1) predict_boxes_pos_np = predict_boxes_pos.detach().cpu().numpy() num_gt = targets_box_batch.shape[0] predict_boxes_pos_sampled = [] box_loss_tmp_batch_sampled = [] for id in range(num_gt): index = np.where(predict_boxes_pos_np[:, 4] == id)[0] if index.shape[0]: idx = random.choice(range(index.shape[0])) predict_boxes_pos_sampled.append( predict_boxes_pos[index[idx], :4]) box_loss_tmp_batch_sampled.append( box_loss_tmp_batch[index[idx]]) predict_boxes_pos_sampled = torch.stack( predict_boxes_pos_sampled) box_loss_tmp_batch_sampled = torch.stack( box_loss_tmp_batch_sampled) iou_repbox = calc_iou(predict_boxes_pos_sampled, predict_boxes_pos_sampled) mask = torch.lt(iou_repbox, 1.).float() iou_repbox = iou_repbox * mask RepBox_loss = smooth_ln(iou_repbox, 0.5) RepBox_loss = RepBox_loss * torch.lt( 0.85 * RepBox_loss, box_loss_tmp_batch_sampled).float() RepBox_loss = RepBox_loss.sum() / sampled_pos_inds.numel() RepBox_losses += RepBox_loss tmp_index += sampled_pos_inds_batch.shape[0] if RepBox_losses != RepBox_losses or RepGT_losses != RepGT_losses or box_loss != box_loss: import pdb pdb.set_trace() RepGT_losses /= batches RepBox_losses /= batches reg_loss = box_loss + 0.1 * RepGT_losses + 0.7 * RepBox_losses return objectness_loss, reg_loss
def mine_boxes(p_trainval, ov_th, score_th, mined_class_label=1, visualize=False): mined_images = 0 mined_boxes = 0 # lines = [] # has_mined = [] annos = [] id = 0 for img_id, (t, p) in tqdm(p_trainval.items(), mininterval=20): # for img_id, (t, p) in p_trainval.items(): p = p.resize(t.size) p.add_field('labels', (p.get_field('labels') > 0).to(torch.long) * mined_class_label) p = boxlist_nms(p, 0.4) s = p.get_field('scores') # Strategy 1: keep at least one box per image even the score is low # p = p[s >= min(score_th, s.max())] # Strategy 2: keep on high score ones p = p[s >= score_th] if len(p) and len(t): #ious = boxlist_iou(p, anno) ious = boxlist_overlap1(p, t) # try: ious = ious.max(1)[0] # except: # print (p,t,ious) p = p[ious < ov_th] if len(p): mined_images += 1 mined_boxes += len(p) # pn = [{'class': '_mined_', 'rect': p.bbox[i].tolist()} for i in range(len(p))] # l[3] = l[3] + pn # has_mined.append(True) del p.extra_fields['scores'] t = cat_boxlist((t, p)) # print (t.bbox, t.get_field('labels')) if visualize: #img = d.get_img(img_id) path = datasets[0].coco.loadImgs(img_id)[0]['file_name'] img = Image.open(os.path.join(datasets[0].root, path)).convert('RGB') plt.imshow(img) for i in range(len(t)): x0, y0, x1, y1 = t.bbox[i] w, h = x1-x0+1, y1-y0+1 plt.gca().add_patch(Rectangle((x0, y0), w, h, alpha=0.9, facecolor='none', edgecolor='green', linewidth=1.5)) for i in range(len(p)): x0, y0, x1, y1 = p.bbox[i] w, h = x1-x0+1, y1-y0+1 plt.gca().add_patch(Rectangle((x0, y0), w, h, alpha=0.9, facecolor='none', edgecolor='red', linewidth=1)) #plt.title(str(d.lines[id])) print (img_id, t, p, mined_images) plt.show() # else: # has_mined.append(False) # lines.append(l) boxes = t.bbox.cpu().numpy().copy() labels = t.get_field('labels').tolist() for i in range(len(boxes)): bbox = boxes[i] #.copy() bbox[2:] -= bbox[:2] - 1 bbox = bbox.tolist() id += 1 anno = {'area': bbox[2]*bbox[3], 'iscrowd': 0, 'image_id': int(img_id), 'bbox': bbox, 'category_id': labels[i], 'id': id, 'ignore': 0} annos.append(anno) print ('mined_images', mined_images, 'mined_boxes', mined_boxes) return annos
def filter_results(self, objectlist, num_classes): boxlist_left = objectlist.get_field("left_box") boxlist_right = objectlist.get_field("right_box") boxes_left = boxlist_left.bbox.reshape(-1, num_classes * 4) boxes_right = boxlist_right.bbox.reshape(-1, num_classes * 4) centers_left = objectlist.get_field("left_centers").reshape( -1, num_classes * 2) centers_right = objectlist.get_field("right_centers").reshape( -1, num_classes * 2) dimemsions = objectlist.get_field("dimensions").reshape( -1, num_classes * 3) rotations = objectlist.get_field("rotations").reshape(-1, num_classes) scores = objectlist.get_field("scores").reshape(-1, num_classes) device = scores.device result_box_left = [] result_box_right = [] result_center_left = [] result_center_right = [] result_dimensions = [] result_rotations = [] inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_left_j = boxes_left[inds, j * 4:(j + 1) * 4] boxes_right_j = boxes_right[inds, j * 4:(j + 1) * 4] centers_left_j = centers_left[inds, j * 2:(j + 1) * 2] centers_right_j = centers_right[inds, j * 2:(j + 1) * 2] dimemsions_j = dimemsions[inds, j * 3:(j + 1) * 3] rotations_j = rotations[inds, j] boxlist_left_for_class = BoxList(boxes_left_j, boxlist_left.size, mode="xyxy") boxlist_right_for_class = BoxList(boxes_right_j, boxlist_right.size, mode="xyxy") boxlist_left_for_class.add_field("scores", scores_j) boxlist_right_for_class.add_field("scores", scores_j) keep, mode = boxlist_nms_stereo_td(boxlist_left_for_class, boxlist_right_for_class, self.nms) boxlist_left_for_class = boxlist_left_for_class[keep].convert(mode) boxlist_right_for_class = boxlist_right_for_class[keep].convert( mode) centers_left_for_class = centers_left_j[keep] centers_right_for_class = centers_right_j[keep] dimemsions_for_class = dimemsions_j[keep] rotations_for_class = rotations_j[keep] num_labels = len(boxlist_left_for_class) labels = torch.full((num_labels, ), j, dtype=torch.int64, device=device) boxlist_left_for_class.add_field("labels", labels) boxlist_right_for_class.add_field("labels", labels) result_box_left.append(boxlist_left_for_class) result_box_right.append(boxlist_right_for_class) result_center_left.append(centers_left_for_class) result_center_right.append(centers_right_for_class) result_dimensions.append(dimemsions_for_class) result_rotations.append(rotations_for_class) result_box_left = cat_boxlist(result_box_left) result_box_right = cat_boxlist(result_box_right) result_center_left = torch.cat(result_center_left) result_center_right = torch.cat(result_center_right) result_dimensions = torch.cat(result_dimensions) result_rotations = torch.cat(result_rotations) number_of_detections = len(result_box_left) result = ObjectList() result.add_field("left_box", result_box_left) result.add_field("right_box", result_box_right) result.add_field("left_centers", result_center_left) result.add_field("right_centers", result_center_right) result.add_field("dimensions", result_dimensions) result.add_field("rotations", result_rotations) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result_box_left.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def __init__(self, cfg, bias, arch="RetinaNet"): device = torch.device(cfg.MODEL.DEVICE) if arch == "RetinaNet": anchor_generator = make_anchor_generator_retinanet(cfg) fg_iou, bg_iou = cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, cfg.MODEL.RETINANET.BG_IOU_THRESHOLD num_classes = cfg.MODEL.RETINANET.NUM_CLASSES - 1 num_anchors = len(cfg.MODEL.RETINANET.ASPECT_RATIOS) \ * cfg.MODEL.RETINANET.SCALES_PER_OCTAVE else: assert arch == "RPN" anchor_generator = make_anchor_generator(cfg) fg_iou, bg_iou = cfg.MODEL.RPN.FG_IOU_THRESHOLD, cfg.MODEL.RPN.BG_IOU_THRESHOLD num_classes = 1 num_anchors = anchor_generator.num_anchors_per_location()[0] prior = load_prior(cfg, arch) if prior is not None: nn.init.constant_(bias, -log((1 - prior) / prior)) return data_loader = make_init_data_loader( cfg, is_distributed=True, images_per_batch=cfg.SOLVER.IMS_PER_BATCH) proposal_matcher = Matcher( fg_iou, bg_iou, allow_low_quality_matches=True, ) backbone = build_backbone(cfg).to(device) num_fg, num_all = 0, 0 num_gpus = get_num_gpus() for images, targets, _ in tqdm(data_loader): images = images.to(device) targets = [target.to(device) for target in targets] h, w = images.tensors.shape[-2:] if num_all == 0: features = backbone(images.tensors) n, c = features[0].shape[:2] levels = len(features) stride = int(h / features[0].shape[2]) features = [ torch.zeros(n, c, int(ceil(h / (stride * 2**i))), int(ceil(w / (stride * 2**i))), device=device) for i in range(levels) ] anchors = anchor_generator(images, features) anchors = [ cat_boxlist(anchors_per_image).to(device) for anchors_per_image in anchors ] for anchor, target in zip(anchors, targets): match_quality_matrix = boxlist_iou(target, anchor) matched_idxs = proposal_matcher(match_quality_matrix) num_fg_per_image, num_bg_per_image = ( matched_idxs >= 0).sum(), ( matched_idxs == Matcher.BELOW_LOW_THRESHOLD).sum() num_fg += num_fg_per_image num_all += num_fg_per_image + num_bg_per_image fg_all_ratio = reduce_div(num_fg.float(), num_all.float(), num_gpus).item() prior = fg_all_ratio / num_classes nn.init.constant_(bias, -log((1 - prior) / prior)) if torch.cuda.current_device() == 0: save_prior(cfg, prior, arch)
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, matched_gt_ids, \ matched_gt_ious = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) labels = torch.cat(labels, dim=0) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) total_pos = sampled_pos_inds.numel() total_neg = sampled_neg_inds.numel() total_samples = total_pos + total_neg objectness, box_regression = concat_box_prediction_layers( objectness, box_regression) objectness = objectness.squeeze() if total_pos == 0: return objectness.sum() * 0, objectness.sum() * 0 regression_targets = torch.cat(regression_targets, dim=0) with torch.no_grad(): start_gt_idx = 0 for ix, t in enumerate(targets): matched_gt_ids[ix] += start_gt_idx start_gt_idx += len(t) matched_gt_ids = torch.cat(matched_gt_ids) pos_matched_gt_ids = matched_gt_ids[sampled_pos_inds] pos_label_weights = torch.zeros_like(pos_matched_gt_ids, dtype=torch.float32) label_idxs = [ torch.nonzero(pos_matched_gt_ids == x).squeeze() for x in range(start_gt_idx) ] # """OLD""" label_cnts = [li.numel() for li in label_idxs] # label_weights = total_pos / label_cnts.to(dtype=torch.float32) # label_weights /= start_gt_idx # equal class weighting for x in range(start_gt_idx): if label_cnts[x] > 0: pos_label_weights[label_idxs[x]] = total_pos / label_cnts[ x] / start_gt_idx # equal class weighting # # # # """NEW""" # # MAX_GT_NUM = 6 # TODO: CONFIG # # matched_gt_ious = torch.cat(matched_gt_ious) # # pos_matched_gt_ious = matched_gt_ious[sampled_pos_inds] # # # # label_cnts = [min(MAX_GT_NUM, nz.numel()) for nz in label_idxs] # # total_pos = sum(label_cnts) # # for x in range(start_gt_idx): # # nz = label_idxs[x] # # nnn = nz.numel() # # if nnn <= MAX_GT_NUM: # # if nnn > 0: # # pos_label_weights[nz] = total_pos / nnn # # continue # # top_iou_ids = torch.sort(pos_matched_gt_ious[nz], descending=True)[1][:MAX_GT_NUM] # # inds = nz[top_iou_ids] # # pos_label_weights[inds] = total_pos / MAX_GT_NUM # # # # pos_label_weights = pos_label_weights / start_gt_idx # # pos_regression = box_regression[sampled_pos_inds] # pos_regression_targets = regression_targets[sampled_pos_inds] # # normalize_reg_targets(pos_regression_targets) # box_loss = smooth_l1_loss( # pos_regression,#[:, :-1], # pos_regression_targets,#[:, :-1], # beta=1.0 / 9, # ) # box_loss = (box_loss * pos_label_weights.unsqueeze(1)).sum() / total_pos # # # angle_loss = 0 #torch.abs(torch.sin(pos_regression[:, -1] - pos_regression_targets[:, -1])).mean() # # # balance negative and positive weights sampled_labels = labels[sampled_inds] objectness_weights = torch.ones_like(sampled_labels, dtype=torch.float32) objectness_weights[sampled_labels == 1] = pos_label_weights objectness_weights[sampled_labels != 1] = min(pos_label_weights.min(), 0.5) # criterion = torch.nn.BCELoss(reduce=False) # entropy_loss = criterion(objectness[sampled_inds].sigmoid(), sampled_labels) # objectness_loss = torch.mul(entropy_loss, objectness_weights).sum() objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], sampled_labels, weight=objectness_weights) # gamma = 2.0 # alpha = 0.25 # p = torch.sigmoid(objectness[sampled_inds]) # t = sampled_labels # term1 = (1 - p) ** gamma * torch.log(p) # term2 = p ** gamma * torch.log(1 - p) # objectness_loss = -(t == 1).float() * term1 * alpha - ((t != 1) * (t >= 0)).float() * term2 * (1 - alpha) # objectness_loss = torch.mul(objectness_weights, objectness_loss).mean() box_reg = box_regression[sampled_pos_inds] box_reg_targets = regression_targets[sampled_pos_inds] box_loss = smooth_l1_loss( box_reg[:, :-1], box_reg_targets[:, :-1], beta=1.0 / 9, # size_average=False, ).sum() / (total_samples) angle_loss = smooth_angle_loss( box_reg[:, -1], box_reg_targets[:, -1]).sum() / (total_samples) box_loss = (box_loss + angle_loss) # with torch.no_grad(): # base_anchors = torch.cat([a.get_field("rrects") for a in anchors])[sampled_pos_inds] # gt_box = self.box_coder.decode(box_reg_targets, base_anchors) # pred_box = self.box_coder.decode(box_reg, base_anchors) # ious = compute_iou_rotate_loss(pred_box, gt_box) + 1e-5 # iou_loss = -torch.log(ious**2) # box_loss = iou_loss.sum() / total_samples # objectness_loss = F.binary_cross_entropy_with_logits( # objectness[sampled_inds], labels[sampled_inds] # ) return objectness_loss, box_loss #, angle_loss
def __call__(self, batch): transposed_batch = list(zip(*batch)) if self.mode == 0: images = to_image_list(transposed_batch[0], self.size_divisible) targets = transposed_batch[1] img_ids = transposed_batch[2] if self.special_deal: if self.post_branch == "retina": grid_sizes = [(math.ceil(self.crop_size / r), math.ceil(self.crop_size / r)) for r in (8, 16, 32, 64, 128)] mini_batch_size = len(targets) anchors = self.anchor_generator.get_anchors(mini_batch_size, self.crop_size, grid_sizes) anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.loss_evaluator.prepare_targets(anchors, targets) # cat labels(list) and regression_targets(list) into one single tensor seperately labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) targets = { 'labels': labels, 'regression_targets': regression_targets } else: strides = [8, 16, 32, 64, 128] feature_sizes = [(math.ceil(self.crop_size / r), math.ceil(self.crop_size / r)) for r in (8, 16, 32, 64, 128)] points = [] for level, size in enumerate(feature_sizes): h, w = size points_per_level = self.generate_points_per_level( h, w, strides[level], torch.device("cpu") ) points.append(points_per_level) cls_targets, reg_targets = self.loss_evaluator.prepare_targets(points, targets) cls_targets_flatten = [] reg_targets_flatten = [] for l in range(len(cls_targets)): cls_targets_flatten.append(cls_targets[l].reshape(-1)) reg_targets_flatten.append(reg_targets[l].reshape(-1, 4)) cls_targets_flatten = torch.cat(cls_targets_flatten, dim=0) reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0) targets = { 'cls_targets_flatten': cls_targets_flatten, 'reg_targets_flatten': reg_targets_flatten } elif self.mode == 1: feature_list = transposed_batch[0] feature_list_zip = zip(*(feature_list)) feature_list_flatten = [] for feature_per_level in feature_list_zip: feature_per_level = [torch.unsqueeze(xaf, dim=0) for xaf in feature_per_level] feature_per_level_batch = torch.cat(feature_per_level, dim=0) feature_list_flatten.append(feature_per_level_batch) images = feature_list_flatten if self.special_deal: if self.post_branch == "retina": labels = transposed_batch[1] regression_targets = transposed_batch[2] # cat labels(list) and regression_targets(list) into one single tensor seperately labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) targets = { 'labels': labels, 'regression_targets': regression_targets } else: densebox_labels = list(transposed_batch[1]) densebox_regs = list(transposed_batch[2]) #TODO: Automatically change num_points_per_level according to crop_size # num_points_per_level = [4096, 1024, 256, 64, 16] num_points_per_level = [2304, 576, 144, 36, 9] for xi in range(len(densebox_labels)): densebox_labels[xi] = torch.split( densebox_labels[xi], num_points_per_level, dim = 0 ) densebox_regs[xi] = torch.split( densebox_regs[xi], num_points_per_level, dim = 0 ) densebox_labels_level_first = [] densebox_regs_level_first = [] for level in range(len(num_points_per_level)): densebox_labels_level_first.append( torch.cat([densebox_labels_per_im[level] for densebox_labels_per_im in densebox_labels] , dim = 0) ) densebox_regs_level_first.append( torch.cat([densebox_regs_per_im[level] for densebox_regs_per_im in densebox_regs] , dim = 0) ) cls_targets_flatten = [] reg_targets_flatten = [] for xl in range(len(densebox_labels_level_first)): cls_targets_flatten.append(densebox_labels_level_first[xl].reshape(-1)) reg_targets_flatten.append(densebox_regs_level_first[xl].reshape(-1, 4)) cls_targets_flatten = torch.cat(cls_targets_flatten, dim=0) reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0) targets = { "cls_targets_flatten": cls_targets_flatten, "reg_targets_flatten": reg_targets_flatten } img_ids = transposed_batch[3] else: targets = transposed_batch[1] img_ids = transposed_batch[2] else: raise ValueError("No mode {} for data batch collect_fn".format(self.mode)) return images, targets, img_ids
def forward(self, anchors, objectness, box_regression, targets=None, centerness=None, rpn_center_box_regression=None, centerness_pack=None): """ Arguments: anchors: list[list[BoxList]] objectness: list[tensor] box_regression: list[tensor] Returns: boxlists (list[BoxList]): the post-processed anchors, after applying box decoding and NMS """ sampled_boxes = [] num_levels = len(objectness) anchors = list(zip(*anchors)) for a, o, b in zip(anchors, objectness, box_regression): sampled_boxes.append(self.forward_for_single_feature_map(a, o, b)) boxlists = list(zip(*sampled_boxes)) boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] if num_levels > 1: boxlists = self.select_over_all_levels(boxlists) # append ground-truth bboxes to proposals if self.training and targets is not None: boxlists = self.add_gt_proposals(boxlists, targets) if self.pred_targets: pred_targets = [] if True: for img_centerness, center_box_reg in zip( centerness, rpn_center_box_regression): # gt_centerness, gt_bbox, anchor_bbox = center_target # print(rpn_center_box_regression, anchor_bbox) # gt_mask = gt_centerness.detach().cpu().numpy() > 0.0 img_centerness = img_centerness[0, :, :] center_box_reg = center_box_reg[:, :, :].permute(1, 2, 0) anchor_bbox = np.zeros(shape=(center_box_reg.shape[0], center_box_reg.shape[1], 4)) for xx in range(anchor_bbox.shape[1]): for yy in range(anchor_bbox.shape[0]): anchor_bbox[yy, xx, :] = [ max(0.0, xx * 4 - 16), max(0.0, yy * 4 - 16), min(xx * 4 + 16, boxlists[0].size[0]), min(yy * 4 + 16, boxlists[0].size[1]) ] anchor_bbox = torch.as_tensor(anchor_bbox, device=center_box_reg.device) # print(center_box_reg.shape, anchor_bbox.shape) boxes = self.box_coder.decode( center_box_reg.reshape(-1, 4), anchor_bbox.view(-1, 4)) pred_target = None pred_score = torch.sigmoid( img_centerness.detach()).cpu().numpy() pred_mask = pred_score > 0.95 # print(gt_mask.shape, pred_mask.shape) imllabel, numlabel = scipy.ndimage.label(pred_mask) if numlabel > 0: valid = np.zeros(shape=(numlabel, ), dtype=np.bool) box_inds = [] for ano in range(1, numlabel + 1): mask = imllabel == ano valid[ano - 1] = True # gt_mask[mask].sum() == 0 box_inds.append(np.argmax(pred_score * mask)) if np.any(valid): boxes = boxes[box_inds, :] # print(box_inds, boxes, anchor_bbox.view(-1, 4)[box_inds, :], gt_bbox.view(-1, 4)[box_inds, :]) pred_target = BoxList(torch.as_tensor(boxes), boxlists[0].size, mode="xyxy") pred_target.clip_to_image() pred_target = pred_target.to(img_centerness.device) # print(img_centerness.device, pred_target.bbox.device) pred_targets.append(pred_target) else: for img_centerness in centerness: pred_target = None pred_mask = torch.sigmoid( img_centerness[0, :, :].detach()).cpu().numpy() > 0.95 # print(gt_mask.shape, pred_mask.shape) imllabel, numlabel = scipy.ndimage.label(pred_mask) if numlabel > 0: masks = np.zeros(shape=(pred_mask.shape[0], pred_mask.shape[1], numlabel), dtype=np.uint8) valid = np.zeros(shape=(numlabel, ), dtype=np.bool) for ano in range(1, numlabel + 1): mask = imllabel == ano valid[ano - 1] = True masks[:, :, ano - 1] = mask if np.any(valid): masks = masks[:, :, valid] boxes = extract_bboxes(masks) pred_target = BoxList(torch.as_tensor(boxes), boxlists[0].size, mode="xyxy") pred_target.clip_to_image() pred_target = pred_target.to(img_centerness.device) # print(img_centerness.device, pred_target.bbox.device) pred_targets.append(pred_target) if True: if not self.training: print('add', [ len(pred_target) for pred_target in pred_targets if pred_target ], 'proposals') boxlists = self.add_pred_proposals(boxlists, pred_targets) else: pred_targets = None return boxlists, pred_targets
def __call__(self, anchors, box_cls, box_regression, objectness_cls, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor) """ device = box_cls[0].device anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) if self.classify_objectness_image: sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) N = len(labels) box_cls, box_regression, objectness_cls = \ concat_box_prediction_layers(box_cls, box_regression, objectness_cls) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) if pos_inds.numel() > 0: retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) else: retinanet_regression_loss = torch.tensor(0.0, device=device) self.logger.info( "This batch has none positive anchors for bbox regression") if self.use_ignored_bbox: labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels) / (pos_inds.numel() + N) else: valid_inds1 = torch.nonzero(labels >= 0).squeeze(1) valid_inds2 = torch.nonzero(labels < -1).squeeze(1) valid_inds = torch.cat([valid_inds1, valid_inds2], dim=0) labels = labels.int() if valid_inds.numel() > 0: retinanet_cls_loss = self.box_cls_loss_func( box_cls[valid_inds], labels[valid_inds]) * 1000 / (max( 1, valid_inds.numel())) else: retinanet_cls_loss = torch.tensor(0.0, device=device) self.logger.info( "This batch has none valid anchors for bbox classification" ) if self.classify_objectness_image: objectness_labels = labels >= 1 objectness_labels = objectness_labels.view(-1, 1) objectness_labels = objectness_labels.float() retinanet_objectness_loss = F.binary_cross_entropy_with_logits( objectness_cls[sampled_inds], objectness_labels[sampled_inds], reduction='sum') / (sampled_inds.numel() * self.objectness_norm) else: retinanet_objectness_loss = torch.tensor(0.0, device=device) return retinanet_cls_loss, retinanet_regression_loss, retinanet_objectness_loss
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ # HxWxSxA anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() # add by hui ############################################### # _box_regression = box_regression.reshape((len(targets), -1, box_regression.shape[-1])) # for box_regression_per_img, anchors_per_image, targets_per_img in zip(_box_regression, anchors, targets): # assert len(anchors_per_image) == len(box_regression_per_img) # pred_boxes = self.box_coder.decode(box_regression_per_img, anchors_per_image.bbox) # pred_boxes = BoxList(pred_boxes, targets_per_img.size, mode='xyxy') # ious = boxlist_iou(targets_per_img, pred_boxes) # ious # ######################################################### labels, regression_targets = self.prepare_targets(anchors, targets) # show_label(anchors[0].size, labels, regression_targets, objectness) # sample sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) # raise ValueError("(sampled_inds.numel()) devide twice, another time is in line 156") # ################################# add by hui ################################################### if self.ohem_loss is None: objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) box_loss = box_loss / (sampled_inds.numel()) # print('rpnx', sampled_inds.numel()) else: objectness_loss = self.ohem_loss(objectness[sampled_inds], labels[sampled_inds]) box_loss = box_loss / self.ohem_loss.sample_count # print('rpn', self.ohem_loss.sample_count) # ################################################################################################# return objectness_loss, box_loss
def filter_results_parallel(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist # cpu version is faster than gpu. revert it to gpu only by verifying boxlist = boxlist.to('cpu') boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh all_cls_boxlist_for_class = [] for j in range(self.cls_start_idx, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) if len(inds) == 0: continue scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) all_cls_boxlist_for_class.append((j, boxlist_for_class)) all_boxlist_for_class = [ boxlist_for_class for _, boxlist_for_class in all_cls_boxlist_for_class ] from qd.qd_common import parallel_map all_boxlist_for_class = parallel_map(self.nms_func, all_boxlist_for_class) for i, boxlist_for_class in enumerate(all_boxlist_for_class): j = all_cls_boxlist_for_class[i][0] num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) if len(result) > 0: result = cat_boxlist(result) else: return self.prepare_empty_boxlist(boxlist) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] return result
def filter_results(self, boxlist, num_classes, return_idx=False): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) scores = boxlist.get_field("scores").reshape(-1, num_classes) device = scores.device result = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh # save the kept indexes keep_inds = [] for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) boxlist_for_class, keep_from_nms = boxlist_nms(boxlist_for_class, self.nms, return_idx=True) # find which boxed are saved after nms keep_from_nms = inds[keep_from_nms] if len(keep_inds) == 0: keep_inds = keep_from_nms else: keep_inds = torch.cat((keep_inds, keep_from_nms)) num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) # print("class: {}; big_score_index: {}; keep_from_nms: {}; new_keep_from_nms:{}".format(j, inds, keep_from_nms, new_keep_from_nms)) # print("keep index after nms: ", keep_inds) result = cat_boxlist(result) # NOTE: Nov 20, add a cross-class nms to further get rid of bad detections. result, keep_inds = boxlist_nms(result, 0.8, return_idx=True) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] keep_inds = keep_inds[keep] if result.__len__() != len(keep_inds): print(result) print(keep_inds) raise ValueError( "The kept index number is different from the save boxlist length" ) if return_idx: return result, keep_inds else: return result, None
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression): N, A, H, W = box_cls_per_level.shape C = self.num_classes box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W) box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2) box_cls_per_level = box_cls_per_level.reshape(N, -1, C) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) box_cls_flattened.append(box_cls_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1) box_regression = cat(box_regression_flattened, dim=1) cls_prob = torch.sigmoid(box_cls) box_prob = [] positive_numels = 0 positive_losses = [] for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate( zip(anchors, targets, cls_prob, box_regression)): labels_ = targets_.get_field("labels") - 1 with torch.set_grad_enabled(False): # box_localization: a_{j}^{loc}, shape: [j, 4] box_localization = self.box_coder.decode( box_regression_, anchors_.bbox) # object_box_iou: IoU_{ij}^{loc}, shape: [i, j] object_box_iou = boxlist_iou( targets_, BoxList(box_localization, anchors_.size, mode='xyxy')) t1 = self.bbox_threshold t2 = object_box_iou.max( dim=1, keepdim=True).values.clamp(min=t1 + 1e-12) # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j] object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp( min=0, max=1) indices = torch.stack( [torch.arange(len(labels_)).type_as(labels_), labels_], dim=0) # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j] object_cls_box_prob = torch.sparse_coo_tensor( indices, object_box_prob) # image_box_prob: P{a_{j} \in A_{+}}, shape: [j, c] """ from "start" to "end" implement: image_box_prob = torch.sparse.max(object_cls_box_prob, dim=0).t() """ # start indices = torch.nonzero( torch.sparse.sum(object_cls_box_prob, dim=0).to_dense()).t_() if indices.numel() == 0: image_box_prob = torch.zeros( anchors_.bbox.size(0), self.num_classes).type_as(object_box_prob) else: nonzero_box_prob = torch.where( (labels_.unsqueeze(dim=-1) == indices[0]), object_box_prob[:, indices[1]], torch.tensor( [0]).type_as(object_box_prob)).max(dim=0).values image_box_prob = torch.sparse_coo_tensor( indices.flip([0]), nonzero_box_prob, size=(anchors_.bbox.size(0), self.num_classes)).to_dense() # end box_prob.append(image_box_prob) # construct bags for objects match_quality_matrix = boxlist_iou(targets_, anchors_) _, matched = torch.topk(match_quality_matrix, self.pre_anchor_topk, dim=1, sorted=False) del match_quality_matrix # matched_cls_prob: P_{ij}^{cls} matched_cls_prob = torch.gather( cls_prob_[matched], 2, labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk, 1)).squeeze(2) # matched_box_prob: P_{ij}^{loc} matched_object_targets = self.box_coder.encode( targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched]) retinanet_regression_loss = smooth_l1_loss( box_regression_[matched], matched_object_targets, *self.smooth_l1_loss_param) matched_box_prob = torch.exp(-retinanet_regression_loss) # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } positive_numels += len(targets_) positive_losses.append( self.positive_bag_loss_func(matched_cls_prob * matched_box_prob, dim=1)) # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B|| positive_loss = torch.cat(positive_losses).sum() / max( 1, positive_numels) # box_prob: P{a_{j} \in A_{+}} box_prob = torch.stack(box_prob, dim=0) # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B|| negative_loss = self.negative_bag_loss_func( cls_prob * (1 - box_prob), self.focal_loss_gamma) / max( 1, positive_numels * self.pre_anchor_topk) losses = { "loss_retina_positive": positive_loss * self.focal_loss_alpha, "loss_retina_negative": negative_loss * (1 - self.focal_loss_alpha), } return losses
def filter_results(self, boxlist, num_classes): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). """ # unwrap the boxlist to avoid additional overhead. # if we had multi-class NMS, we could perform this directly on the boxlist boxes = boxlist.bbox.reshape(-1, num_classes * 4) boxes_per_cls = boxlist.bbox.reshape(-1, num_classes, 4) scores = boxlist.get_field("pred_scores").reshape(-1, num_classes) device = scores.device result = [] orig_inds = [] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class inds_all = scores > self.score_thresh for j in range(1, num_classes): inds = inds_all[:, j].nonzero().squeeze(1) scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("pred_scores", scores_j) boxlist_for_class, keep = boxlist_nms( boxlist_for_class, self.nms, max_proposals=self.post_nms_per_cls_topn, score_field='pred_scores') inds = inds[keep] num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "pred_labels", torch.full((num_labels, ), j, dtype=torch.int64, device=device)) result.append(boxlist_for_class) orig_inds.append(inds) #NOTE: kaihua, according to Neural-MOTIFS (and my experiments, we need remove duplicate bbox) if self.nms_filter_duplicates or self.save_proposals: assert len(orig_inds) == (num_classes - 1) # set all bg to zero inds_all[:, 0] = 0 for j in range(1, num_classes): inds_all[:, j] = 0 orig_idx = orig_inds[j - 1] inds_all[orig_idx, j] = 1 dist_scores = scores * inds_all.float() scores_pre, labels_pre = dist_scores.max(1) final_inds = scores_pre.nonzero() assert final_inds.dim() != 0 final_inds = final_inds.squeeze(1) scores_pre = scores_pre[final_inds] labels_pre = labels_pre[final_inds] result = BoxList(boxes_per_cls[final_inds, labels_pre], boxlist.size, mode="xyxy") result.add_field("pred_scores", scores_pre) result.add_field("pred_labels", labels_pre) orig_inds = final_inds else: result = cat_boxlist(result) orig_inds = torch.cat(orig_inds, dim=0) number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result.get_field("pred_scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] orig_inds = orig_inds[keep] return result, orig_inds, boxes_per_cls[orig_inds]
def __call__(self, anchors, objectness, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level in zip( objectness, box_regression): N, A, H, W = objectness_per_level.shape objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) # cat anchors dim to match regression dim anchor_flattened = [] for anchor_per in anchors: anchor_flattened.append(anchor_per.bbox) anchors_bbox = torch.cat(anchor_flattened, dim=0) box_regression_dx = box_regression[:, 0] box_regression_dy = box_regression[:, 1] box_regression_dw = box_regression[:, 2] box_regression_dh = box_regression[:, 3] anchors_bbox_cx = (anchors_bbox[:, 0] + anchors_bbox[:, 2]) / 2.0 anchors_bbox_cy = (anchors_bbox[:, 1] + anchors_bbox[:, 3]) / 2.0 anchors_bbox_w = anchors_bbox[:, 2] - anchors_bbox[:, 0] + 1 anchors_bbox_h = anchors_bbox[:, 3] - anchors_bbox[:, 1] + 1 predict_w = torch.exp(box_regression_dw) * anchors_bbox_w predict_h = torch.exp(box_regression_dh) * anchors_bbox_h predict_x = box_regression_dx * anchors_bbox_w + anchors_bbox_cx predict_y = box_regression_dy * anchors_bbox_h + anchors_bbox_cy predict_x1 = predict_x - 0.5 * predict_w predict_y1 = predict_y - 0.5 * predict_h predict_x2 = predict_x + 0.5 * predict_w predict_y2 = predict_y + 0.5 * predict_h predict_boxes = torch.stack( (predict_x1, predict_y1, predict_x2, predict_y2)).t() predict_iou = onehot_iou(anchors_bbox, predict_boxes) labels = torch.cat(labels, dim=0) * predict_iou regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss
def forward(self, images, targets=None, adapt=False): """ Arguments: images (list[Tensor] or ImageList): images to be processed targets (list[BoxList]): ground-truth boxes present in the image (optional) Returns: result (list[BoxList] or dict[Tensor]): the output from the model. During training, it returns a dict[Tensor] which contains the losses. During testing, it returns list[BoxList] contains additional fields like `scores`, `labels` and `mask` (for Mask R-CNN models). """ if self.training and targets is None: raise ValueError("In training mode, targets should be passed") images = to_image_list(images) features = self.backbone(images.tensors) # Retina RPN Output rpn_features = features if self.cfg.RETINANET.BACKBONE == "p2p7": rpn_features = features[1:] if adapt: return rpn_features (anchors, detections), detector_losses = self.rpn(images, rpn_features, targets) if self.training: losses = {} losses.update(detector_losses) if self.mask: if self.cfg.MODEL.MASK_ON: # Padding the GT proposals = [] for (image_detections, image_targets) in zip(detections, targets): merge_list = [] if not isinstance(image_detections, list): merge_list.append( image_detections.copy_with_fields('labels')) if not isinstance(image_targets, list): merge_list.append( image_targets.copy_with_fields('labels')) if len(merge_list) == 1: proposals.append(merge_list[0]) else: proposals.append(cat_boxlist(merge_list)) x, result, mask_losses = self.mask(features, proposals, targets) elif self.cfg.MODEL.SPARSE_MASK_ON: x, result, mask_losses = self.mask(features, anchors, targets) losses.update(mask_losses) return losses else: if self.mask: proposals = [] for image_detections in detections: num_of_detections = image_detections.bbox.shape[0] if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0: cls_scores = image_detections.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), num_of_detections - \ self.cfg.RETINANET.NUM_MASKS_TEST + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) image_detections = image_detections[keep] proposals.append(image_detections) if self.cfg.MODEL.SPARSE_MASK_ON: x, detections, mask_losses = self.mask( features, proposals, targets) else: x, detections, mask_losses = self.mask( features, proposals, targets) return detections
def select_over_all_levels(self, boxlists): # pdb.set_trace() num_images = len(boxlists) results = [] for i in range(num_images): scores = boxlists[i].get_field("scores") labels = boxlists[i].get_field("labels") boxes = boxlists[i].bbox boxlist = boxlists[i] result = [] # pdb.set_trace() # (Pdb) self.num_classes # 81 # (Pdb) labels.dtype # torch.int64 # (Pdb) scores.dtype # torch.float32 # (Pdb) boxes.dtype # torch.float32 # skip the background for j in range(1, self.num_classes): inds = (labels == j).nonzero().view(-1) scores_j = scores[inds] boxes_j = boxes[inds, :].view(-1, 4) boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") boxlist_for_class.add_field("scores", scores_j) ############################## softNMS ############################## if self.nms_method == "nms": # pdb.set_trace() # (Pdb) boxlist_for_class.bbox.shape # torch.Size([291, 4]) # (Pdb) boxlist_for_class.bbox[0] # tensor([1422.0798, 192.1235, 1482.6444, 257.5991], device='cuda:0') # (Pdb) boxlist_for_class.bbox[0].dtype # torch.float32 # (Pdb) boxlist_for_class.get_field('scores').shape # torch.Size([291]) # (Pdb) boxlist_for_class.get_field('scores')[0] # tensor(0.0988, device='cuda:0') # (Pdb) boxlist_for_class.get_field('scores')[0].dtype # torch.float32 # (Pdb) self.nms_thresh # 0.6 boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms_thresh, score_field="scores") elif self.nms_method == "soft_nms": boxlist_for_class = boxlist_soft_nms(boxlist_for_class, self.nms_thresh, score_field="scores") else: print('the nms method is wrong') ############################## softNMS ############################## num_labels = len(boxlist_for_class) # pdb.set_trace() boxlist_for_class.add_field( "labels", torch.full((num_labels, ), j, dtype=torch.int64, device=scores.device)) result.append(boxlist_for_class) # pdb.set_trace() # (Pdb) len(result) # 80 # (Pdb) result[0] # BoxList(num_boxes=185, image_width=1777, image_height=1000, mode=xyxy) result = cat_boxlist(result) # pdb.set_trace() # (Pdb) result # BoxList(num_boxes=529, image_width=1777, image_height=1000, mode=xyxy) number_of_detections = len(result) # pdb.set_trace() # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def __call__(self, anchors, objectness, box_regression, box_orien, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ # print(targets,'===================================') # # TODO square anchor box expand tragets in xyxy # for i, boxlist in enumerate(targets): # boxes = boxlist.bbox # for j, box in enumerate(boxes): # # print(box, '=====') # top_left, bottom_right = box[:2], box[2:] # l = abs(top_left[1] - bottom_right[1]) # w = abs(top_left[0] - bottom_right[0]) # xc = (top_left[0] + bottom_right[0]) / 2 # yc = (top_left[1] + bottom_right[1]) / 2 # if l > w: # f = 1.2 * l # else: # f = 1.2 * w # # print(f, xc, yc, '=============') # box = bBox_2D(f, f, xc, yc, 0) # box.xcyc2topleft() # box.xcyc2bottomright() # # boxlist.bbox[j] = torch.Tensor([box.xtl, box.ytl, box.xbr, box.ybr]) # print(box.xtl, box.ytl, box.xbr, box.ybr,'=================') # square_targets = [] # for j, target in enumerate(targets): # wh1 = target.bbox[:, 2:] - target.bbox[:, :2] # wh of target box1 by their br - tl # maxedge1 = torch.max(wh1[:, 0], wh1[:, 1]) # maxedge11 = torch.cat((maxedge1[:, None], maxedge1[:, None]), -1) # xcyc1 = (target.bbox[:, 2:] + target.bbox[:, :2]) * 0.5 # # box3 = torch.cat((xcyc1 - maxedge11 * 0.5, xcyc1 + maxedge11 * 0.5), -1) # # square box3 correspond to targets # targets[j].bbox = box3 anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets, orien_targets = self.prepare_targets( anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness_flattened = [] box_regression_flattened = [] box_orien_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for objectness_per_level, box_regression_per_level, box_orien_per_level in zip( objectness, box_regression, box_orien): N, A, H, W = objectness_per_level.shape # print(box_orien_per_level.shape) objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape( N, -1) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) box_orien_per_level = box_orien_per_level.view(N, -1, 2, H, W) box_orien_per_level = box_orien_per_level.permute(0, 3, 4, 1, 2) box_orien_per_level = box_orien_per_level.reshape(N, -1, 2) # print(box_regression_per_level.shape) # print(box_orien_per_level.shape,'========================') objectness_flattened.append(objectness_per_level) box_regression_flattened.append(box_regression_per_level) box_orien_flattened.append(box_orien_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) objectness = cat(objectness_flattened, dim=1).reshape(-1) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) orien_regression = cat(box_orien_flattened, dim=1).reshape(-1, 2) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) orien_targets = torch.cat(orien_targets, dim=0) # to_rotated_boxes(regression_targets[sampled_pos_inds], # orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor)) # print('\noriens:',oriens.size(),'boxes:',boxes.size(),'==========\n') box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / sampled_inds.numel() objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) # print(orien_targets[sampled_pos_inds], '=========orien===========') # print(orien_regression[sampled_pos_inds], '=========regression===========\n') orien_loss = F.mse_loss( orien_regression[sampled_pos_inds], # orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor), orien_targets[sampled_pos_inds], reduction='sum', # size_average=False, # beta=1, ) / sampled_inds.numel() # orien_loss = smooth_l1_loss( # orien_regression[sampled_pos_inds], # orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor), # size_average=False, # beta=1.0 / 9, # ) / sampled_inds.numel() # NO Orientation Loss During RPN Stage # print(orien_loss) return objectness_loss, box_loss, orien_loss
def forward(self, x): appearance_feature, proposals, cls_score, box_reg, targets = x self.device = appearance_feature.device appearance_feature = appearance_feature cls_score = cls_score box_reg = box_reg with torch.no_grad(): sorted_boxlists = self.prepare_ranking(cls_score, box_reg, proposals, targets, reg_iou=self.reg_iou) # concate value from different images boxes_per_image = [len(f) for f in proposals] idxs = [f.get_field('sorted_idx') for f in sorted_boxlists] scores = torch.cat([f.get_field('scores') for f in sorted_boxlists]) bboxes = torch.cat( [f.bbox.reshape(-1, self.fg_class, 4) for f in sorted_boxlists]) objectness = torch.cat([ f.get_field('objectness').reshape(-1, self.fg_class) for f in sorted_boxlists ]) all_scores = torch.cat( [f.get_field('all_scores') for f in sorted_boxlists]) # add iou information image_sizes = [f.size for f in sorted_boxlists] sorted_boxes_per_image = [[*f.shape][0] for f in idxs] appearance_feature = self.roi_feat_embedding_fc(appearance_feature) appearance_feature = appearance_feature.split(boxes_per_image, dim=0) sorted_features = [] nms_rank_embedding = [] for id, feature, box_per_image in zip(idxs, appearance_feature, boxes_per_image): feature = feature[id] size = feature.size() if size[0] <= self.first_n: first_n = size[0] else: first_n = self.first_n sorted_features.append(feature) #[rank_dim * batch , feat_dim] nms_rank_embedding.append( extract_rank_embedding( first_n, self.cfg.MODEL.RELATION_NMS.ROI_FEAT_DIM, device=feature.device)) # [first_n * batchsize, num_fg_classes, 128] sorted_features = torch.cat(sorted_features, dim=0) nms_rank_embedding = torch.cat(nms_rank_embedding, dim=0) nms_rank_embedding = self.nms_rank_fc(nms_rank_embedding) sorted_features = sorted_features + nms_rank_embedding[:, None, :] boxes_cls_1 = BoxList(bboxes[:, 0, :], image_sizes[0]) boxes_cls_2 = BoxList(bboxes[:, 1, :], image_sizes[0]) iou_1 = boxlist_iou(boxes_cls_1, boxes_cls_1) iou_2 = boxlist_iou(boxes_cls_2, boxes_cls_2) if self.cfg.MODEL.RELATION_NMS.USE_IOU: iou = [iou_1, iou_2] else: iou = None nms_position_matrix = extract_multi_position_matrix( bboxes, None, self.geo_feature_dim, 1000, clswise=self.cfg.MODEL.RELATION_NMS.CLS_WISE_RELATION, ) nms_attention_1 = self.relation_module(sorted_features, nms_position_matrix, iou) sorted_features = sorted_features + nms_attention_1 sorted_features = self.relu1(sorted_features) # [first_n * num_fg_classes, 128] sorted_features = sorted_features.view( -1, self.cfg.MODEL.RELATION_NMS.APPEARANCE_FEAT_DIM) sorted_features = self.classifier(sorted_features) # logit_reshape, [first_n, num_fg_classes, num_thread] sorted_features = sorted_features.view(-1, self.fg_class, len(self.target_thresh)) if not self.reg_iou: sorted_features = torch.sigmoid(sorted_features) scores = torch.cat([scores[:, :, None]] * len(self.target_thresh), dim=-1) loss_dict = {} if self.training: if self.reg_iou: # when use regression donot do sorted_features = scores * sorted_features reg_label = torch.cat( [f.get_field('labels_iou_reg') for f in sorted_boxlists]) reg_label = reg_label.to(scores.device) reg_label = reg_label.type(torch.cuda.FloatTensor) sorted_features = sorted_features.to(scores.device) sorted_features = sorted_features.type(torch.cuda.FloatTensor) if reg_label.shape is not None: reg_iou_loss = F.mse_loss(reg_label, sorted_features) else: reg_iou_loss = torch.tensor(0.).to(scores.device) loss_dict['nms_loss'] = reg_iou_loss else: sorted_features = scores * sorted_features labels = torch.cat( [f.get_field('labels') for f in sorted_boxlists]) labels = labels.to(scores.device) labels = labels.type(torch.cuda.FloatTensor) # WEIGHTED NMS nms_loss = F.binary_cross_entropy(scores * sorted_features, labels) loss_dict['nms_loss'] = nms_loss return None, loss_dict else: input_scores = scores if self.reg_iou: scores = sorted_features * (scores > self.fg_thread).float() else: scores = sorted_features * scores scores = self.merge_multi_thread_score_test(scores) scores = scores.split(sorted_boxes_per_image, dim=0) bboxes = bboxes.split(sorted_boxes_per_image, dim=0) input_scores = input_scores.split(sorted_boxes_per_image, dim=0) objectness = objectness.split(sorted_boxes_per_image, dim=0) all_scores = all_scores.split(sorted_boxes_per_image, dim=0) result = [] for i_score, score, bbox, obj, image_size, prob_boxhead \ in zip( input_scores, scores, bboxes, objectness, image_sizes, all_scores): result_per_image = [] # for nuclei index = (score[:, 1] >= self.fg_thread).nonzero()[:, 0] # cls_scores = i_score[index, i,0] cls_scores = score[index, 1] cls_scores_all = prob_boxhead[index, 1] cls_boxes = bbox[index, 1, :] cls_obj = obj[index, 1] boxlist_for_class = BoxList(cls_boxes, image_size, mode='xyxy') boxlist_for_class.add_field('scores', cls_scores) boxlist_for_class.add_field('objectness', cls_obj) boxlist_for_class.add_field('all_scores', cls_scores_all) boxlist_for_class = boxlist_nms(boxlist_for_class, 0.5, score_field="scores") num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), 2, dtype=torch.int64).to(self.device)) result_per_image.append(boxlist_for_class) index = (score[:, 0] >= self.fg_thread).nonzero()[:, 0] # cls_scores = i_score[index, i,0] cls_scores = score[index, 0] # pdb.set_trace() cls_scores_all = prob_boxhead[index, 0] cls_boxes = bbox[index, 0, :] cls_obj = obj[index, 0] boxlist_for_class = BoxList(cls_boxes, image_size, mode='xyxy') # Pos greedy NMS if POS_NMS!=-1 # boxlist_for_class.add_field('idx', index) boxlist_for_class.add_field('scores', cls_scores) boxlist_for_class.add_field('objectness', cls_obj) boxlist_for_class.add_field('all_scores', cls_scores_all) # pdb.set_trace() if self.nms: # for nuclei boxlist_for_class = boxlist_nms(boxlist_for_class, self.nms, score_field="scores") # pdb.set_trace() num_labels = len(boxlist_for_class) boxlist_for_class.add_field( "labels", torch.full((num_labels, ), 1, dtype=torch.int64).to(self.device)) result_per_image.append(boxlist_for_class) result_per_image = cat_boxlist(result_per_image) number_of_detections = len(result_per_image) # Limit to max_per_image detections **over all classes** if number_of_detections > self.detections_per_img > 0: cls_scores = result_per_image.get_field("scores") image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.detections_per_img + 1) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result_per_image = result_per_image[keep] result.append(result_per_image) return result, {}
def forward(self, images, features, gt_bbox=None, img_size=None, compute_average_recall_RPN=False, is_train=None, result_dir=None): if self.negatives_to_pick is None: self.negatives_to_pick = math.ceil( (self.batch_size * self.iterations) / self.cfg.NUM_IMAGES) features = self.head(features) if self.anchors is None: features = features[0][0] features_map_size = features.size() # Extract feature map info self.feat_size = features_map_size[0] self.height = features_map_size[1] self.width = features_map_size[2] # Generate anchors self.anchors = self.anchor_generator(images, features)[0][0] self.feature_ids = torch.empty((0, 2), dtype=torch.long, device='cuda') self.classifiers = torch.empty(0, dtype=torch.uint8, device='cuda') # Associate to each feature tensor an id, corresponding to its position and a classifier id corresponding to an anchor value for ind in range(0, int(self.anchors.bbox.size()[0])): feat_ij = [[ int(int(ind / self.num_classes) / self.width), int(int(ind / self.num_classes) % self.width) ]] self.feature_ids = torch.cat((self.feature_ids, torch.tensor(feat_ij, dtype=torch.long, device='cuda'))) cls = [ind % self.num_classes] self.classifiers = torch.cat((self.classifiers, torch.tensor(cls, dtype=torch.uint8, device='cuda'))) self.anchors.add_field('feature_id', self.feature_ids) self.anchors.add_field('classifier', self.classifiers) # Remove features with borders external to the image self.visible_anchors = self.anchors.get_field('visibility') self.anchors = self.anchors[self.visible_anchors] # Avoid computing unuseful regions self.still_to_complete = list(range(self.num_classes)) for i in self.still_to_complete: if self.anchors[self.anchors.get_field('classifier') == i].bbox.size()[0] == 0: self.still_to_complete.remove(i) print('Anchor %i does not have visible regions.' % i, 'Removed from the list.') if self.save_features: # Saving empty tensors path_to_save = os.path.join( result_dir, 'features_RPN', 'negatives_cl_{}_batch_{}'.format(i, 0)) torch.save( torch.empty((0, self.feat_size), device=self.training_device), path_to_save) path_to_save = os.path.join( result_dir, 'features_RPN', 'positives_cl_{}_batch_{}'.format(i, 0)) torch.save( torch.empty((0, self.feat_size), device=self.training_device), path_to_save) self.anchors_ids = copy.deepcopy(self.still_to_complete) # Initialize batches for minibootstrap for i in range(self.num_classes): self.negatives.append([]) self.current_batch.append(0) self.current_batch_size.append(0) self.positives.append([ torch.empty((0, self.feat_size), device=self.training_device) ]) for j in range(self.iterations): self.negatives[i].append( torch.empty((0, self.feat_size), device=self.training_device)) # Initialize tensors for box regression # Regressor features self.X = [ torch.empty((0, self.feat_size), dtype=torch.float32, device=self.training_device) ] # Regressor target values self.Y = [ torch.empty((0, 4), dtype=torch.float32, device=self.training_device) ] # Regressor overlap amounts self.O = None # Regressor classes self.C = [ torch.empty((0), dtype=torch.float32, device=self.training_device) ] else: features = features[0][0] anchors_to_return = self.anchors.copy_with_fields( self.anchors.fields()) # Resize ground truth boxes to anchors dimensions gt_bbox = gt_bbox.resize(anchors_to_return.size) # Compute anchors-gts ious ious = torch.squeeze(boxlist_iou(gt_bbox, anchors_to_return)) # Associate each anchor with the gt with max iou if gt_bbox.bbox.size()[0] > 1: ious, ious_index = torch.max(ious, dim=0) anchors_to_return.add_field('gt_bbox', gt_bbox.bbox[ious_index]) else: gts = torch.ones( (ious.size()[0], 4), device='cuda') * gt_bbox.bbox[0] anchors_to_return.add_field('gt_bbox', gts) anchors_to_return.add_field('overlap', ious) # Filter all the negatives, i.e. with iou with the gts < self.neg_iou_thresh negative_anchors_total = anchors_to_return[ious < self.neg_iou_thresh] indices_to_remove = [] for i in self.still_to_complete: # Filter negatives for the i-th anchor anchors_i = negative_anchors_total[ negative_anchors_total.get_field('classifier') == i] # Sample negatives, according to minibootstrap parameters if anchors_i.bbox.size()[0] > self.negatives_to_pick: anchors_i = anchors_i[torch.randint( anchors_i.bbox.size()[0], (self.negatives_to_pick, ))] # Compute their id, i.e. position in the features map ids = anchors_i.get_field('feature_id') ids_size = ids.size()[0] # Compute at most how many negatives to add to each batch reg_to_add = math.ceil(self.negatives_to_pick / self.iterations) # Initialize index of chosen negatives among all the negatives to pick ind_to_add = 0 for b in range(self.current_batch[i], self.iterations): # If the batch is full, start from the subsequent if self.negatives[i][b].size()[0] >= self.batch_size: # If features must be saved, save full batches and replace the batch in gpu with an empty tensor if self.save_features: path_to_save = os.path.join( result_dir, 'features_RPN', 'negatives_cl_{}_batch_{}'.format(i, b)) torch.save(self.negatives[i][b], path_to_save) self.negatives[i][b] = torch.empty( (0, self.feat_size), device=self.training_device) self.current_batch[i] += 1 if self.current_batch[i] >= self.iterations: indices_to_remove.append(i) continue else: # Compute the end index of negatives to add to the batch end_interval = int( ind_to_add + min(reg_to_add, self.batch_size - self.negatives[i] [b].size()[0], self.negatives_to_pick - ind_to_add, ids_size - ind_to_add)) # Extract features corresponding to the ids and add them to the ids # Diagonal choice done for computational efficiency feat = torch.index_select(features, 1, ids[ind_to_add:end_interval, 0]) feat = torch.index_select( feat, 2, ids[ind_to_add:end_interval, 1]).permute(1, 2, 0).view( (end_interval - ind_to_add)**2, self.feat_size) try: feat = feat[self.diag_list[end_interval - ind_to_add]] except: feat = feat[list( range(0, (end_interval - ind_to_add)**2 + (end_interval - ind_to_add) - 1, (end_interval - ind_to_add) + 1))] if self.training_device is 'cpu': self.negatives[i][b] = torch.cat( (self.negatives[i][b], feat.cpu())) else: self.negatives[i][b] = torch.cat( (self.negatives[i][b], feat)) # Update indices ind_to_add = end_interval if ind_to_add == self.negatives_to_pick: break # Check to avoid unuseful computations for index in indices_to_remove: self.still_to_complete.remove(index) # Select all the positives with iou with the gts > 0.7 positive_anchors = anchors_to_return[ anchors_to_return.get_field('overlap') > self.pos_iou_thresh] # Add to the positives anchors with max iou with a gt, if the gt doesn't have associated anchors with iou > 0.7 for elem in gt_bbox.bbox: if elem in positive_anchors.get_field('gt_bbox'): continue else: elem = elem.unsqueeze(0) # Find indices where there are anchors associated to this gt_bbox indices, _ = torch.min(torch.eq( anchors_to_return.get_field('gt_bbox'), elem.repeat(anchors_to_return.bbox.size()[0], 1)), dim=1, keepdim=True) # Additional check to avoid max on an empty tensor if True in indices: # Find max overlap with this gt_bbox values, _ = torch.max( anchors_to_return[indices.squeeze()].get_field( 'overlap'), 0) positives_i = anchors_to_return[indices.squeeze()] positives_i = positives_i[positives_i.get_field('overlap') == values.item()] positive_anchors = cat_boxlist( [positive_anchors, positives_i]) # Find anchors associated to the positives, to avoid unuseful computation pos_inds = torch.unique(positive_anchors.get_field('classifier')) for i in pos_inds: anchors_i = positive_anchors[positive_anchors.get_field( 'classifier') == i] ids = anchors_i.get_field('feature_id') ids_size = ids.size()[0] feat = torch.index_select(features, 1, ids[:, 0]) feat = torch.index_select(feat, 2, ids[:, 1]).permute(1, 2, 0).view( ids_size**2, self.feat_size) try: feat = feat[self.diag_list[ids_size]] except: feat = feat[list( range(0, ids_size**2 + ids_size - 1, ids_size + 1))] # Add positive features for the i-th anchor to the i-th positives list if self.training_device is 'cpu': self.positives[i][len(self.positives[i]) - 1] = torch.cat( (self.positives[i][len(self.positives[i]) - 1], feat.cpu())) else: self.positives[i][len(self.positives[i]) - 1] = torch.cat( (self.positives[i][len(self.positives[i]) - 1], feat)) if self.positives[i][len(self.positives[i]) - 1].size()[0] >= self.batch_size: if self.save_features: path_to_save = os.path.join( result_dir, 'features_RPN', 'positives_cl_{}_batch_{}'.format( i, len(self.positives[i]) - 1)) torch.save(self.positives[i][len(self.positives[i]) - 1], path_to_save) self.positives[i][len(self.positives[i]) - 1] = torch.empty( (0, self.feat_size), device=self.training_device) self.positives[i].append( torch.empty((0, self.feat_size), device=self.training_device)) # COXY computation for regressors ex_boxes = anchors_i.bbox gt_boxes = anchors_i.get_field('gt_bbox') src_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1 src_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1 src_ctr_x = ex_boxes[:, 0] + 0.5 * src_w src_ctr_y = ex_boxes[:, 1] + 0.5 * src_h gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1 gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1 gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h dst_ctr_x = (gt_ctr_x - src_ctr_x) / src_w dst_ctr_y = (gt_ctr_y - src_ctr_y) / src_h dst_scl_w = torch.log(gt_w / src_w) dst_scl_h = torch.log(gt_h / src_h) target = torch.stack((dst_ctr_x, dst_ctr_y, dst_scl_w, dst_scl_h), dim=1) if self.training_device is 'cpu': self.Y[len(self.Y) - 1] = torch.cat( (self.Y[len(self.Y) - 1], target.cpu()), dim=0) # Add class and features to C and X self.C[len(self.C) - 1] = torch.cat((self.C[len(self.C) - 1], torch.full( (ids_size, 1), i, dtype=torch.float32))) self.X[len(self.X) - 1] = torch.cat( (self.X[len(self.X) - 1], feat.cpu())) else: self.Y[len(self.Y) - 1] = torch.cat( (self.Y[len(self.Y) - 1], target), dim=0) # Add class and features to C and X self.C[len(self.C) - 1] = torch.cat((self.C[len(self.C) - 1], torch.full( (ids_size, 1), i, dtype=torch.float32, device='cuda'))) self.X[len(self.X) - 1] = torch.cat( (self.X[len(self.X) - 1], feat)) if self.X[len(self.X) - 1].size()[0] >= self.batch_size: if self.save_features: path_to_save = os.path.join( result_dir, 'features_RPN', 'reg_x_batch_{}'.format(len(self.X) - 1)) torch.save(self.X[len(self.X) - 1], path_to_save) self.X[len(self.X) - 1] = torch.empty( (0, self.feat_size), dtype=torch.float32, device=self.training_device) path_to_save = os.path.join( result_dir, 'features_RPN', 'reg_c_batch_{}'.format(len(self.C) - 1)) torch.save(self.C[len(self.C) - 1], path_to_save) self.C[len(self.C) - 1] = torch.empty( (0), dtype=torch.float32, device=self.training_device) path_to_save = os.path.join( result_dir, 'features_RPN', 'reg_y_batch_{}'.format(len(self.Y) - 1)) torch.save(self.Y[len(self.Y) - 1], path_to_save) self.Y[len(self.Y) - 1] = torch.empty( (0, 4), dtype=torch.float32, device=self.training_device) self.X.append( torch.empty((0, self.feat_size), dtype=torch.float32, device=self.training_device)) self.C.append( torch.empty((0), dtype=torch.float32, device=self.training_device)) self.Y.append( torch.empty((0, 4), dtype=torch.float32, device=self.training_device)) return {}, {}, 0
def __call__(self, square_anchors, guided_anchors, loc_masks, approx_anchors, objectness, box_regression, shapes, locs, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ featmap_sizes = [feat.shape[2:] for feat in objectness] loc_targets, loc_weights, loc_avg_factors = self.ga_loc_target( targets, featmap_sizes) locs, loc_targets, loc_weights = concat_locs(locs, loc_targets, loc_weights) loc_loss = self.loss_loc_fn.forward_weights( locs, loc_targets, loc_weights) / loc_avg_factors square_anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in square_anchors ] approx_anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in approx_anchors ] shape_targets, shape_weights = self.ga_shape_target( square_anchors, approx_anchors, targets) shapes = concat_shapes(shapes) shape_pos_inds, shape_neg_inds = self.fg_bg_sampler(shape_weights) shape_pos_inds = torch.nonzero(torch.cat(shape_pos_inds, dim=0)).squeeze(1) shape_neg_inds = torch.nonzero(torch.cat(shape_neg_inds, dim=0)).squeeze(1) anchor_total_num = shape_pos_inds.shape[0] + shape_neg_inds.shape[0] shape_targets = torch.cat(shape_targets, dim=0) square_anchors = cat_boxlist_broad(square_anchors) shapes = shapes[shape_pos_inds] shape_targets = shape_targets[shape_pos_inds] square_anchors = square_anchors[shape_pos_inds] shapes = self.anchor_box_coder.decode(shapes, square_anchors.bbox) shape_loss = bounded_iou_loss( shapes, shape_targets, beta=0.2, size_average=False) / anchor_total_num anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in guided_anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) objectness, box_regression = \ concat_box_prediction_layers(objectness, box_regression) objectness = objectness.squeeze() labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) box_loss = smooth_l1_loss( box_regression[sampled_pos_inds], regression_targets[sampled_pos_inds], beta=1.0 / 9, size_average=False, ) / (sampled_inds.numel()) objectness_loss = F.binary_cross_entropy_with_logits( objectness[sampled_inds], labels[sampled_inds]) return objectness_loss, box_loss, shape_loss, loc_loss
def add_gt_proposals(self, proposals, targets): """ Arguments: proposals: list[BoxList] targets: list[BoxList] """ # Get the device we're operating on device = proposals[0].bbox.device if cfg.ROTATE: if cfg.RECT_POLY_BALANCE == "Rot": gt_boxes = [ target.copy_with_fields(["xywht", "xyxy"]) for target in targets ] elif cfg.RECT_POLY_BALANCE == "Rect": gt_boxes = [ target.copy_with_fields(["xyxy"]) for target in targets ] for gt_box in gt_boxes: xyxy = gt_box.get_field("xyxy") xmin, ymin, xmax, ymax = xyxy.split(1, dim=-1) gt_box.bbox = torch.cat( (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax), dim=1) xywht = torch.cat( ((xmin + xmax) / 2., (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1, torch.ones_like(xmin) * (-3.14 / 2)), dim=1) gt_box.add_field("xywht", xywht) elif cfg.RECT_POLY_BALANCE == "Rect+Rot": gt_boxes_rects = [ target.copy_with_fields(["xyxy"]) for target in targets ] gt_boxes_rots = [ target.copy_with_fields(["xywht", "xyxy"]) for target in targets ] for gt_box in gt_boxes_rects: xyxy = gt_box.get_field("xyxy") xmin, ymin, xmax, ymax = xyxy.split(1, dim=-1) gt_box.bbox = torch.cat( (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax), dim=1) xywht = torch.cat( ((xmin + xmax) / 2., (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1, torch.ones_like(xmin) * (-3.14 / 2)), dim=1) gt_box.add_field("xywht", xywht) gt_boxes = [ cat_boxlist((gt_boxes_rect, gt_boxes_rot)) for gt_boxes_rect, gt_boxes_rot in zip( gt_boxes_rects, gt_boxes_rots) ] else: gt_boxes = [target.copy_with_fields([]) for target in targets] # later cat of bbox requires all fields to be present for all bbox # so we need to add a dummy for objectness that's missing for gt_box in gt_boxes: gt_box.add_field("objectness", torch.ones(len(gt_box), device=device)) proposals = [ cat_boxlist((proposal, gt_box)) for proposal, gt_box in zip(proposals, gt_boxes) ] return proposals
def __call__(self, anchors, box_cls, box_regression, targets, embeddings = None): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor """ anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) N = len(labels) if embeddings is not None: box_cls, box_regression, embeddings = \ concat_box_prediction_embeddings_layers(box_cls, box_regression, embeddings) else: box_cls, box_regression = \ concat_box_prediction_layers(box_cls, box_regression) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = torch.nonzero(labels > 0).squeeze(1) retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (max(1, pos_inds.numel() * self.regress_norm)) labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / (pos_inds.numel() + N) # triplet loss if embeddings is not None and self.embedding_loss == 2: margin = self.embed_margin # print('triplet margin:', margin) T_Loss = TripletLoss(margin) # hard negtive mining version anchor_embeddings, positive_embeddings, negative_embeddings = triplet_embeddings(embeddings[sampled_inds], labels[sampled_inds]) # anchor_embeddings, positive_embeddings, negative_embeddings = triplet_embeddings(embeddings, labels) triplet_loss = T_Loss(anchor_embeddings, positive_embeddings, negative_embeddings, size_average=True) # dynamic incremental margin # if triplet_loss == 0 and np.random.random() > 0.5: # RetinaNetLossComputation.TRIPLET_MARGIN += 1 return retinanet_cls_loss, retinanet_regression_loss, triplet_loss # pair loss elif embeddings is not None and self.embedding_loss == 1: # print('pair loss ===============================') margin = self.embed_margin C_loss = ContrastiveLoss(margin) embeddings1, embeddings2, targets = pair_embeddings(embeddings[sampled_inds], labels[sampled_inds]) pair_loss = C_loss(embeddings1, embeddings2, targets) return retinanet_cls_loss, retinanet_regression_loss, pair_loss else: return retinanet_cls_loss, retinanet_regression_loss
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) objectness (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: objectness_loss (Tensor) box_loss (Tensor """ anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] labels, regression_targets = self.prepare_targets(anchors, targets) # sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels) # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) # sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) num_layers = len(box_cls) box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the objectness and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression): N, A, H, W = box_cls_per_level.shape C = self.num_classes box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W) box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2) box_cls_per_level = box_cls_per_level.reshape(N, -1, C) box_regression_per_level = box_regression_per_level.view( N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute( 0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape( N, -1, 4) box_cls_flattened.append(box_cls_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = labels > 0 retinanet_regression_loss = self.regression_loss( box_regression[pos_inds], regression_targets[pos_inds], size_average=False, ) / (pos_inds.sum() * 4) labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func(box_cls, labels) / ( (labels > 0).sum() + N) losses = { "loss_retina_cls": retinanet_cls_loss, "loss_retina_reg": retinanet_regression_loss, } return losses
def __call__(self, anchors, box_cls, box_regression, targets): """ Arguments: anchors (list[BoxList]) box_cls (list[Tensor]) box_regression (list[Tensor]) targets (list[BoxList]) Returns: retinanet_cls_loss (Tensor) retinanet_regression_loss (Tensor) """ if isinstance(targets, dict): labels = targets['labels'] regression_targets = targets['regression_targets'] else: anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] labels, regression_targets = self.prepare_targets(anchors, targets) num_layers = len(box_cls) box_cls_flattened = [] box_regression_flattened = [] # for each feature level, permute the outputs to make them be in the # same format as the labels. Note that the labels are computed for # all feature levels concatenated, so we keep the same representation # for the box_cls and the box_regression for box_cls_per_level, box_regression_per_level in zip( box_cls, box_regression ): N, A, H, W = box_cls_per_level.shape C = self.num_classes box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W) box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2) box_cls_per_level = box_cls_per_level.reshape(N, -1, C) box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W) box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2) box_regression_per_level = box_regression_per_level.reshape(N, -1, 4) box_cls_flattened.append(box_cls_per_level) box_regression_flattened.append(box_regression_per_level) # concatenate on the first dimension (representing the feature levels), to # take into account the way the labels were generated (with all feature maps # being concatenated as well) box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) if not isinstance(targets, dict): labels = torch.cat(labels, dim=0) regression_targets = torch.cat(regression_targets, dim=0) pos_inds = labels > 0 retinanet_regression_loss = smooth_l1_loss( box_regression[pos_inds], regression_targets[pos_inds], beta=self.bbox_reg_beta, size_average=False, ) / (pos_inds.sum() * 4) retinanet_regression_loss *= self.bbox_reg_weight labels = labels.int() retinanet_cls_loss = self.box_cls_loss_func( box_cls, labels ) / ((labels > 0).sum() + N) return retinanet_cls_loss * self.weight, retinanet_regression_loss * self.weight
box_coder = BoxCoder(weights=None) #cfg.MODEL.RPN.BBOX_REG_WEIGHTS) fg_bg_sampler = BalancedPositiveNegativeSampler( cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, cfg.MODEL.RPN.POSITIVE_FRACTION) loss_evaluator = make_rpn_loss_evaluator(cfg, box_coder) start_iter = 0 for iteration, (images, targets, _) in enumerate(data_loader, start_iter): images = images.to(device) targets = [target.to(device) for target in targets] feature_maps = get_feature_maps(images.tensors, cfg.MODEL.RPN.ANCHOR_STRIDE) anchors = anchor_generator.forward(images, feature_maps) anchors = [ cat_boxlist(anchors_per_image) for anchors_per_image in anchors ] anchors_cnt = [len(a) for a in anchors] labels, regression_targets, matched_gt_ids, _ \ = loss_evaluator.prepare_targets(anchors, targets) sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels) sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1) sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1) sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)