def inference(self, inputs, box_cls, box_regression, center_ness, nms=None, pad=True): """ Arguments: inputs: same as FCOS.forward's batched_inputs box_cls: list of Tensor, Tensor's shape is [B,H,W,A*num_classes] box_delta: list of Tensor, Tensor's shape is [B,H,W,A*4] Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ assert len(box_cls[0].get_shape()) == 4, "error box cls dims" assert len(box_regression[0].get_shape()) == 4, "error box delta dims" B, _, _, _ = wmlt.combined_static_and_dynamic_shape(box_regression[0]) fm_sizes = [tf.shape(x)[1:3] for x in box_regression] box_cls = [reshape_to_N_HWA_K(x, self.num_classes) for x in box_cls] box_regression = [reshape_to_N_HWA_K(x, 4) for x in box_regression] center_ness = [tf.reshape(x, [B, -1]) for x in center_ness] box_cls = tf.concat(box_cls, axis=1) box_regression = tf.concat(box_regression, axis=1) center_ness = tf.concat(center_ness, axis=1) results = wmlt.static_or_dynamic_map_fn( lambda x: self.inference_single_image( x[0], x[1], x[2], fm_sizes, nms=nms, pad=pad), elems=[box_cls, box_regression, center_ness], dtype=[tf.float32, tf.int32, tf.float32, tf.int32], back_prop=False) outdata = { RD_BOXES: results[0], RD_LABELS: results[1], RD_PROBABILITY: results[2], RD_LENGTH: results[3] } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="FCOSGIou_result", category_index=DataLoader.category_index) return outdata
def inference(self, inputs, box_cls, box_delta, anchors, output_fix_nr=0): """ Arguments: inputs: same as RetinaNet.forward's batched_inputs box_cls: list of Tensor, Tensor's shape is [B,H,W,A*num_classes] box_delta: list of Tensor, Tensor's shape is [B,H,W,A*4] anchors: list of Tensor, Tensor's shape is [X,4]( X=H*W*A) Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ assert len(anchors[0].get_shape()) == 2, "error anchors dims" assert len(box_cls[0].get_shape()) == 4, "error box cls dims" assert len(box_delta[0].get_shape()) == 4, "error box delta dims" anchors_size = [tf.shape(x)[0] for x in anchors] anchors = tf.concat(anchors, axis=0) box_cls = [reshape_to_N_HWA_K(x, self.num_classes) for x in box_cls] box_delta = [reshape_to_N_HWA_K(x, 4) for x in box_delta] box_cls = tf.concat(box_cls, axis=1) box_delta = tf.concat(box_delta, axis=1) results = wmlt.static_or_dynamic_map_fn( lambda x: self.inference_single_image(x[0], x[1], anchors, anchors_size, output_fix_nr), elems=[box_cls, box_delta], dtype=[tf.float32, tf.int32, tf.float32, tf.int32, tf.int32], back_prop=False) outdata = { RD_BOXES: results[0], RD_LABELS: results[1], RD_PROBABILITY: results[2], RD_LENGTH: results[4], RD_INDICES: results[3] } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="RetinaNet_result", category_index=DataLoader.category_index) return outdata
def forward(self, inputs): """ Args: Same as in :class:`GeneralizedRCNN.forward` Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "proposals" whose value is a :class:`Instances` with keys "proposal_boxes" and "objectness_logits". """ inputs = self.preprocess_image(inputs) features = self.backbone(inputs) if isinstance(features,(list,tuple)): features = features[0] outdata,proposal_losses = self.proposal_generator(inputs, features) wsummary.detection_image_summary(images=inputs['image'],boxes=outdata[PD_BOXES],name="proposal_boxes") return outdata,proposal_losses
def _forward_box(self, inputs,features, proposals): head_outputs = [] img_size = get_img_size_from_batched_inputs(inputs) for k in range(self.num_cascade_stages): if k > 0: # The output boxes of the previous stage are the input proposals of the next stage proposals_boxes = head_outputs[-1].predict_boxes_for_gt_classes() if self.is_training: proposals = self._match_and_label_boxes(inputs,proposals_boxes, stage=k) else: proposals = {PD_BOXES:proposals_boxes} head_outputs.append(self._run_stage(features, proposals, k,img_size=img_size)) if self.cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG: results = head_outputs[-1].inference( self.test_score_thresh, self.test_nms_thresh, self.test_detections_per_img) wsummary.detection_image_summary(images=inputs[IMAGE], boxes=results[RD_BOXES], classes=results[RD_LABELS], lengths=results[RD_LENGTH], name=f"RCNN_result{k}") if self.is_training: losses = {} for stage, output in enumerate(head_outputs): stage_losses = output.losses() losses.update({k + "_stage{}".format(stage): v for k, v in stage_losses.items()}) return losses else: # Each is a list[Tensor] of length #image. Each tensor is Ri x (K+1) scores_per_stage = [h.predict_probs() for h in head_outputs] # Average the scores across heads scores = tf.stack(scores_per_stage,axis=-1) scores = tf.reduce_mean(scores,axis=-1,keepdims=False) # Use the boxes of the last head pred_instances = head_outputs[-1].inference( self.test_score_thresh, self.test_nms_thresh, self.test_detections_per_img, scores = scores ) return pred_instances
def inference(self, inputs, box_cls, box_delta, anchors): """ Arguments: box_cls, box_delta: Same as the output of :meth:`RetinaNetHead.forward` anchors (list[list[Boxes]]): a list of #images elements. Each is a list of #feature level Boxes. The Boxes contain anchors of this image on the specific feature level. Returns: results (List[Instances]): a list of #images elements. """ assert len(anchors[0].get_shape()) == 2, "error anchors dims" anchors_size = [tf.shape(x)[0] for x in anchors] anchors = tf.concat(anchors, axis=0) box_cls = [ reshape_to_N_HWA_K(x, self.num_classes + 1) for x in box_cls ] box_delta = [reshape_to_N_HWA_K(x, 4) for x in box_delta] box_cls = tf.concat(box_cls, axis=1) box_delta = tf.concat(box_delta, axis=1) results = wmlt.static_or_dynamic_map_fn( lambda x: self.inference_single_image(x[0], x[1], anchors, anchors_size), elems=[box_cls, box_delta], dtype=(tf.float32, tf.int32, tf.float32, tf.int32), back_prop=False) outdata = { RD_BOXES: results[0], RD_LABELS: results[1], RD_PROBABILITY: results[2], RD_LENGTH: results[3] } if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], scores=outdata[RD_PROBABILITY], lengths=outdata[RD_LENGTH], name="SSD_result", category_index=DataLoader.category_index) return outdata
def show_anchors(self, anchors, features, img_size=[512, 512]): with tf.device(":/cpu:0"): with tf.name_scope("show_anchors"): image = tf.ones(img_size) image = tf.expand_dims(image, axis=0) image = tf.expand_dims(image, axis=-1) image = tf.tile(image, [1, 1, 1, 3]) for i in range(len(anchors)): if not isinstance(self.aspect_ratios[i][0], Iterable): num_cell_anchors = len(self.aspect_ratios[i]) * len( self.sizes[i]) else: num_cell_anchors = len(self.aspect_ratios[i][0]) * len( self.sizes[i]) shape = wmlt.combined_static_and_dynamic_shape(features[i]) offset = ((shape[1] // 2) * shape[2] + shape[2] // 2) * num_cell_anchors boxes = anchors[i][offset:offset + num_cell_anchors] boxes = tf.expand_dims(boxes, axis=0) wsummary.detection_image_summary(images=image, boxes=boxes, name=f"level_{i}")
def inference(self,inputs,head_outputs): """ Arguments: inputs: same as CenterNet.forward's batched_inputs Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ self.inputs = inputs all_bboxes = [] all_scores = [] all_clses = [] all_length = [] img_size = tf.shape(inputs[IMAGE])[1:3] assert len(head_outputs)==1,f"Error head outputs len {len(head_outputs)}" nms = partial(odl.boxes_nms,threshold=self.nms_threshold) bboxes,clses, scores,length = self.get_box_in_a_single_layer(head_outputs[0],self.cfg.SCORE_THRESH_TEST) bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper(bboxes, clses, length, confidence=None, nms=nms, k=self.max_detections_per_image, sort=True) scores = wmlt.batch_gather(scores,nms_indexs) outdata = {RD_BOXES:bboxes,RD_LABELS:labels,RD_PROBABILITY:scores,RD_LENGTH:lens} if global_cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG: wsummary.detection_image_summary(images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="CenterNetOutput", category_index=DataLoader.category_index) return outdata
def trans_boxes(self, bboxes, levels, img_size): B, box_nr = wmlt.combined_static_and_dynamic_shape(levels) anchor_boxes_size = tf.tile(self.rcnn_anchor_boxes, [B, 1]) boxes_size = wmlt.batch_gather(anchor_boxes_size, levels) w = boxes_size / tf.to_float(img_size[1]) h = boxes_size / tf.to_float(img_size[0]) ymin, xmin, ymax, xmax = tf.unstack(bboxes, axis=-1) cy = (ymin + ymax) / 2 cx = (xmin + xmax) / 2 ymin = cy - h / 2 ymax = cy + h / 2 xmin = cx - w / 2 xmax = cx + w / 2 new_boxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) ##### log_bboxes = tf.concat([bboxes[:, :3], new_boxes[:, :3]], axis=1) log_labels = tf.convert_to_tensor([[1, 2, 3, 11, 12, 13]], dtype=tf.int32) log_labels = tf.tile(log_labels, [B, 1]) wsummary.detection_image_summary(self.batched_inputs[IMAGE], boxes=log_bboxes, classes=log_labels, name="to_anchor_bboxes") return new_boxes
def detection_image_summary(inputs, max_boxes_to_draw=20, min_score_thresh=0.2,name="detection_image_summary",max_outputs=3,show_mask=True): image = inputs.get('image',None) if 'gt_boxes' not in inputs: if image is not None: wsummary.image_summaries(image, name=name+"_onlyimg") return boxes = inputs.get('gt_boxes',None) classes = inputs.get('gt_labels',None) instance_masks = inputs.get('gt_masks',None) lengths = inputs.get('gt_length',None) if instance_masks is not None and show_mask: wsummary.detection_image_summary(image, boxes,classes,instance_masks=instance_masks, lengths=lengths,category_index=DataLoader.category_index, max_boxes_to_draw=max_boxes_to_draw, min_score_thresh=min_score_thresh, max_outputs=max_outputs, name=name) else: wsummary.detection_image_summary(image,boxes,classes, lengths=lengths,category_index=DataLoader.category_index, max_boxes_to_draw=max_boxes_to_draw, min_score_thresh=min_score_thresh, max_outputs=max_outputs, name=name) if GT_KEYPOINTS in inputs: wsummary.keypoints_image_summary(image,keypoints=inputs[GT_KEYPOINTS], lengths=lengths, keypoints_pair=global_cfg.MODEL.KEYPOINTS.POINTS_PAIRS, name="keypoints") '''wsummary.detection_image_summary(tf.ones_like(image)*255,boxes,classes,
def inference(self, inputs, head_outputs): """ Arguments: inputs: same as CenterNet.forward's batched_inputs Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ self.inputs = inputs all_bboxes = [] all_scores = [] all_clses = [] all_length = [] img_size = tf.shape(inputs[IMAGE])[1:3] for i, datas in enumerate(head_outputs): num_dets = max(self.topk_candidates // (4**i), 4) K = max(self.k // (4**i), 4) bboxes, scores, clses, length = self.get_box_in_a_single_layer( datas, num_dets, img_size, K) all_bboxes.append(bboxes) all_scores.append(scores) all_clses.append(clses) all_length.append(length) with tf.name_scope(f"merge_all_boxes"): bboxes, _ = wmlt.batch_concat_with_length(all_bboxes, all_length) scores, _ = wmlt.batch_concat_with_length(all_scores, all_length) clses, length = wmlt.batch_concat_with_length( all_clses, all_length) nms = functools.partial(tfop.boxes_nms, threshold=self.nms_threshold, classes_wise=True, k=self.max_detections_per_image) #预测时没有背景, 这里加上1使背景=0 clses = clses + 1 #bboxes = tf.Print(bboxes,["shape",tf.shape(bboxes),tf.shape(clses),length],summarize=100) bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper( bboxes, clses, length, confidence=None, nms=nms, k=self.max_detections_per_image, sort=True) scores = wmlt.batch_gather(scores, nms_indexs) #labels = clses+1 #lens = length outdata = { RD_BOXES: bboxes, RD_LABELS: labels, RD_PROBABILITY: scores, RD_LENGTH: lens } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="CenterNetOutput", category_index=DataLoader.category_index) return outdata
def losses(self): """ Args: For `gt_classes` and `gt_anchors_deltas` parameters, see :meth:`FCOSGIou.get_ground_truth`. Their shapes are (N, R) and (N, R, 4), respectively, where R is the total number of anchors across levels, i.e. sum(Hi x Wi x A) For `pred_class_logits` and `pred_anchor_deltas`, see :meth:`FCOSGIouHead.forward`. Returns: dict[str: Tensor]: mapping from a named loss to a scalar tensor storing the loss. Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" """ assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim" gt_results = self._get_ground_truth() loss_cls_list = [] loss_regression_list = [] loss_center_ness_list = [] total_num_foreground = [] img_size = tf.shape(self.batched_inputs[IMAGE])[1:3] for i, gt_results_item in enumerate(gt_results): gt_classes = gt_results_item['g_classes'] gt_boxes = gt_results_item['g_boxes'] g_center_ness = gt_results_item['g_center_ness'] pred_class_logits = self.pred_logits[i] pred_regression = self.pred_regression[i] pred_center_ness = self.pred_center_ness[i] foreground_idxs = (gt_classes > 0) num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32)) total_num_foreground.append(num_foreground) gt_classes_target = tf.one_hot(gt_classes, depth=self.num_classes + 1) gt_classes_target = gt_classes_target[..., 1:] # pred_center_ness = tf.expand_dims(pred_center_ness, axis=-1) wsummary.histogram_or_scalar(pred_center_ness, "center_ness") # logits loss loss_cls = tf.reduce_sum( wnn.sigmoid_cross_entropy_with_logits_FL( labels=gt_classes_target, logits=pred_class_logits, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma)) # regression loss pred_boxes = self.box2box_transform.apply_deltas( regression=pred_regression, img_size=img_size) if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG and gt_classes.get_shape( ).as_list()[0] > 1: log_boxes = self.box2box_transform.apply_deltas( regression=gt_results_item['g_regression'], img_size=img_size) log_boxes = odbox.tfabsolutely_boxes_to_relative_boxes( log_boxes, width=img_size[1], height=img_size[0]) boxes1 = tf.reshape(log_boxes[1:2], [1, -1, 4]) wsummary.detection_image_summary( images=self.batched_inputs[IMAGE][1:2], boxes=boxes1, name="FCOSGIou_decode_test") pred_center_ness = tf.boolean_mask(pred_center_ness, foreground_idxs) g_center_ness = tf.boolean_mask(g_center_ness, foreground_idxs) pred_boxes = tf.boolean_mask(pred_boxes, foreground_idxs) gt_boxes = tf.boolean_mask(gt_boxes, foreground_idxs) wsummary.histogram_or_scalar(pred_center_ness, "center_ness_pos") reg_loss_sum = (1.0 - odl.giou(pred_boxes, gt_boxes)) wmlt.variable_summaries_v2(reg_loss_sum, f"giou_loss{i}") pred_center_ness = tf.squeeze(pred_center_ness, axis=-1) reg_norm = tf.reduce_sum(g_center_ness) + 1e-5 reg_loss_sum = reg_loss_sum * g_center_ness wmlt.variable_summaries_v2(reg_loss_sum, f"loss_sum{i}") loss_box_reg = tf.reduce_sum(reg_loss_sum) * 300 / reg_norm wmlt.variable_summaries_v2(loss_box_reg, f"box_reg_loss_{i}") loss_center_ness = 0.5 * tf.nn.sigmoid_cross_entropy_with_logits( labels=g_center_ness, logits=pred_center_ness) loss_center_ness = tf.reduce_sum(loss_center_ness) * 0.1 wmlt.variable_summaries_v2(loss_center_ness, f"center_ness_loss{i}") loss_cls_list.append(loss_cls) loss_regression_list.append(loss_box_reg) loss_center_ness_list.append(loss_center_ness) total_num_foreground = tf.to_float( tf.maximum(tf.add_n(total_num_foreground), 1)) return { "fcos_loss_cls": tf.add_n(loss_cls_list) / total_num_foreground, "fcos_loss_center_ness": tf.add_n(loss_center_ness_list) / total_num_foreground, "fcos_loss_box_reg": tf.add_n(loss_regression_list) / total_num_foreground }
def mask_rcnn_loss(inputs, pred_mask_logits, proposals: EncodedData, fg_selection_mask, log=True): ''' :param inputs:inputs[GT_MASKS] [batch_size,N,H,W] :param pred_mask_logits: [Y,H,W,C] C==1 if cls_anostic_mask else num_classes, H,W is the size of mask not the position in org image :param proposals:proposals.indices:[batch_size,M], proposals.boxes [batch_size,M],proposals.gt_object_logits:[batch_size,M] :param fg_selection_mask: [X] X = batch_size*M Y = tf.reduce_sum(fg_selection_mask) :return: ''' cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 total_num_masks, mask_H, mask_W, C = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) assert mask_H == mask_W, "Mask prediction must be square!" gt_masks = inputs[GT_MASKS] #[batch_size,N,H,W] with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 batch_size, X, H, W = wmlt.combined_static_and_dynamic_shape(gt_masks) #background include in proposals, which's indices is -1 gt_masks = tf.reshape(gt_masks, [batch_size * X, H, W]) indices = btf.twod_indexs_to_oned_indexs(tf.nn.relu(proposals.indices), depth=X) indices = tf.boolean_mask(indices, fg_selection_mask) gt_masks = tf.gather(gt_masks, indices) boxes = proposals.boxes batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(boxes) boxes = tf.reshape(boxes, [batch_size * box_nr, box_dim]) boxes = tf.boolean_mask(boxes, fg_selection_mask) with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 gt_masks = tf.expand_dims(gt_masks, axis=-1) croped_masks_gt_masks = wmlt.tf_crop_and_resize( gt_masks, boxes, [mask_H, mask_W]) if not cls_agnostic_mask: gt_classes = proposals.gt_object_logits gt_classes = tf.reshape(gt_classes, [-1]) gt_classes = tf.boolean_mask(gt_classes, fg_selection_mask) pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) pred_mask_logits = wmlt.batch_gather(pred_mask_logits, gt_classes - 1) #预测中不包含背景 pred_mask_logits = tf.expand_dims(pred_mask_logits, axis=-1) if log and config.global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: with tf.device(":/cpu:0"): with tf.name_scope("mask_loss_summary"): pmasks_2d = tf.reshape(fg_selection_mask, [batch_size, box_nr]) boxes_3d = tf.expand_dims(boxes, axis=1) wsummary.positive_box_on_images_summary(inputs[IMAGE], proposals.boxes, pmasks=pmasks_2d) image = wmlt.select_image_by_mask(inputs[IMAGE], pmasks_2d) t_gt_masks = tf.expand_dims(tf.squeeze(gt_masks, axis=-1), axis=1) wsummary.detection_image_summary( images=image, boxes=boxes_3d, instance_masks=t_gt_masks, name="mask_and_boxes_in_mask_loss") log_mask = gt_masks log_mask = ivis.draw_detection_image_summary( log_mask, boxes=tf.expand_dims(boxes, axis=1)) log_mask = wmli.concat_images( [log_mask, croped_masks_gt_masks]) wmlt.image_summaries(log_mask, "mask", max_outputs=3) log_mask = wmli.concat_images( [gt_masks, tf.cast(pred_mask_logits > 0.5, tf.float32)]) wmlt.image_summaries(log_mask, "gt_vs_pred", max_outputs=3) mask_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=croped_masks_gt_masks, logits=pred_mask_logits) mask_loss = btf.safe_reduce_mean(mask_loss) return mask_loss pass
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (B,H, W,C) format. * instances (optional): groundtruth :class:`Instances` * proposals (optional): :class:`Instances`, precomputed proposals. Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "instances" whose value is a :class:`Instances`. The :class:`Instances` object has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ if not self.is_training: return self.inference(batched_inputs) batched_inputs = self.preprocess_image(batched_inputs) ''' 使用主干网络生成一个FeatureMap, 如ResNet的Res4(stride=16) ''' features = self.backbone(batched_inputs) if self.roi_heads_backbone is not None: roi_features = self.roi_heads_backbone(batched_inputs) pg_features = features else: if isinstance(features,(list,tuple)): pg_features = features[0] roi_features = features[1] else: pg_features = features roi_features = features if self.proposal_generator: proposals, proposal_losses = self.proposal_generator(batched_inputs, pg_features) else: assert "proposals" in batched_inputs[0] proposals = {"proposal_boxes":batched_inputs["proposals"]} proposal_losses = {} results, detector_losses = self.roi_heads(batched_inputs, roi_features, proposals) if len(results)>0: wsummary.detection_image_summary(images=batched_inputs[IMAGE], boxes=results[RD_BOXES], classes=results[RD_LABELS], lengths=results[RD_LENGTH], scores=results[RD_PROBABILITY], name="RCNN_result", category_index=DataLoader.category_index) losses = {} losses.update(detector_losses) losses.update(proposal_losses) return results,losses
def inference(self, batched_inputs, detected_instances=None, do_postprocess=True): """ Run inference on the given inputs. Args: batched_inputs (list[dict]): same as in :meth:`forward` detected_instances (None or list[Instances]): if not None, it contains an `Instances` object per image. The `Instances` object contains "pred_boxes" and "pred_classes" which are known boxes in the image. The inference will then skip the detection of bounding boxes, and only predict other per-ROI outputs. do_postprocess (bool): whether to apply post-processing on the outputs. Returns: same as in :meth:`forward`. """ assert not self.is_training batched_inputs = self.preprocess_image(batched_inputs) features = self.backbone(batched_inputs) if self.roi_heads_backbone is not None: roi_features = self.roi_heads_backbone(batched_inputs) pg_features = features else: if isinstance(features,(list,tuple)): pg_features = features[0] roi_features = features[1] else: pg_features = features roi_features = features if detected_instances is None: if self.proposal_generator: proposals, _ = self.proposal_generator(batched_inputs, pg_features) else: assert "proposals" in batched_inputs[0] proposals = [x["proposals"].to(self.device) for x in batched_inputs] results, _ = self.roi_heads(batched_inputs, roi_features, proposals) else: detected_instances = [x.to(self.device) for x in detected_instances] results = self.roi_heads.forward_with_given_boxes(roi_features, detected_instances) instance_masks = None if not self.cfg.MODEL.MASK_ON else results.get(RD_MASKS,None) if instance_masks is not None: shape = btf.combined_static_and_dynamic_shape(batched_inputs[IMAGE]) instance_masks = tf.cast(instance_masks>0.5,tf.float32) instance_masks = ivs.batch_tf_get_fullsize_mask(boxes=results[RD_BOXES], masks=instance_masks, size=shape[1:3] ) wsummary.detection_image_summary(images=batched_inputs[IMAGE], boxes=results[RD_BOXES],classes=results[RD_LABELS], lengths=results[RD_LENGTH], scores=results[RD_PROBABILITY], instance_masks=instance_masks,name="RCNN_result", category_index=DataLoader.category_index) if instance_masks is not None: wsummary.detection_image_summary(images=tf.zeros_like(batched_inputs[IMAGE]), boxes=results[RD_BOXES],classes=results[RD_LABELS], lengths=results[RD_LENGTH], instance_masks=instance_masks, name="RCNN_Mask_result", category_index=DataLoader.category_index) if do_postprocess: return self._postprocess(results, batched_inputs),None else: return results,None
def forward(self, inputs, features): features = [features[f] for f in self.in_features] gt_boxes = inputs.get(GT_BOXES, None) #gt_labels = inputs.gt_labels gt_length = inputs.get(GT_LENGTH, None) pred_objectness_logits, pred_anchor_deltas = self.rpn_head( inputs, features) anchors = self.rpn_head.anchor_generator(inputs, features) self.anchors_num_per_level = [ wmlt.combined_static_and_dynamic_shape(x)[0] for x in anchors ] outputs = build_outputs(self.cfg.MODEL.RPN.OUTPUTS, self.box2box_transform, self.anchor_matcher, self.batch_size_per_image, self.positive_fraction, pred_objectness_logits, pred_anchor_deltas, anchors, gt_boxes, gt_length=gt_length) if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: outputs.inputs = inputs if self.is_training: losses = { k: v * self.loss_weight for k, v in outputs.losses().items() } rpn_threshold = 0.0 else: rpn_threshold = self.cfg.MODEL.PROPOSAL_GENERATOR.SCORE_THRESH_TEST losses = {} # Find the top proposals by applying NMS and removing boxes that # are too small. The proposals are treated as fixed for approximate # joint training with roi heads. This approach ignores the derivative # w.r.t. the proposal boxes’ coordinates that are also network # responses, so is approximate. pre_nms_topk_max_per_layer = self.cfg.MODEL.RPN.PRE_NMS_TOPK_MAX_PER_LAYER proposals, logits = find_top_rpn_proposals( outputs.predict_proposals(), outputs.predict_objectness_logits(), self.nms_thresh, self.pre_nms_topk[self.is_training], self.post_nms_topk[self.is_training], self.anchors_num_per_level, score_threshold=rpn_threshold, is_training=self.is_training, pre_nms_topk_max_per_layer=pre_nms_topk_max_per_layer) if self.cfg.MODEL.RPN.SORT_RESULTS: with tf.name_scope("sort_rpn_results"): def fn(bboxes, keys): N = wmlt.combined_static_and_dynamic_shape(keys) new_keys, indices = tf.nn.top_k(keys, k=N[0]) bboxes = tf.gather(bboxes, indices) return [bboxes, keys] proposals, logits = tf.map_fn(lambda x: fn(x[0], x[1]), elems=[proposals, logits], back_prop=False) outdata = {PD_BOXES: proposals, PD_PROBABILITY: tf.nn.sigmoid(logits)} wsummary.detection_image_summary(images=inputs[IMAGE], boxes=outdata[PD_BOXES], name="rpn/proposals") return outdata, losses
def forward(self, batched_inputs, features): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (H, W, C) format. * instances: Instances Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: dict[str: Tensor]: mapping from a named loss to a tensor storing the loss. Used during training only. """ if len(self.in_features) == 0: print( f"Error no input features for retinanet, use all features {features.keys()}" ) features = list(features.values()) else: features = [features[f] for f in self.in_features] pred_logits, pred_regression, pred_center_ness = self.head(features) gt_boxes = batched_inputs[GT_BOXES] gt_length = batched_inputs[GT_LENGTH] gt_labels = batched_inputs[GT_LABELS] outputs = build_outputs( name=self.cfg.MODEL.FCOSPG.OUTPUTS, cfg=self.cfg.MODEL.FCOSPG, parent=self, box2box_transform=self.box2box_transform, pred_logits=pred_logits, pred_regression=pred_regression, pred_center_ness=pred_center_ness, gt_boxes=gt_boxes, gt_labels=gt_labels, gt_length=gt_length, batched_inputs=batched_inputs, max_detections_per_image=self.cfg.TEST.DETECTIONS_PER_IMAGE, ) results = outputs.inference(inputs=batched_inputs, box_cls=pred_logits, box_regression=pred_regression, center_ness=pred_center_ness) losses = {} if self.is_training: _losses = outputs.losses() for k, v in _losses.items(): losses["pg_" + k] = v outdata = { PD_BOXES: results[RD_BOXES], PD_PROBABILITY: results[RD_PROBABILITY] } wsummary.detection_image_summary(images=batched_inputs[IMAGE], boxes=outdata[PD_BOXES], name="fcospg/proposals") return outdata, losses