def pyapply_deltas(self, datas, img_size=None): ''' ''' h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) offset = datas['offset'] hw = datas['hw'] B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_ct) offset = tf.reshape(offset, [B, -1, 2]) hw = tf.reshape(hw, [B, -1, 2]) h_ct = self.pixel_nms(h_ct, threshold=self.score_threshold) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, k=self.k) C = btf.channel(h_ct) hw_inds = ct_inds // C K = self.k ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) offset = wmlt.batch_gather(offset, hw_inds) offset = tf.reshape(offset, [B, K, 2]) offset_y, offset_x = tf.unstack(offset, axis=-1) ct_xs = ct_xs + offset_x ct_ys = ct_ys + offset_y hw = wmlt.batch_gather(hw, hw_inds) hw = tf.reshape(hw, [B, K, 2]) h, w = tf.unstack(hw, axis=-1) ymin, xmin, ymax, xmax = [ ct_ys - h / 2, ct_xs - w / 2, ct_ys + h / 2, ct_xs + w / 2 ] bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) bboxes = odb.tfabsolutely_boxes_to_relative_boxes(bboxes, width=W, height=H) return bboxes, ct_clses, ct_scores, hw_inds
def sparse_softmax_cross_entropy_with_logits_alpha_balanced( _sentinel=None, # pylint: disable=invalid-name labels=None, logits=None, alpha="auto", max_alpha_scale=10.0, name=None): if alpha is None: return tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=logits,name=name) elif alpha != "auto": raise ValueError(f"alphpa shold be None or auto, not {alpha}.") with tf.variable_scope(name,default_name="sparse_softmax_cross_entropy_with_logits_alpha_balanced"): probability = tf.nn.softmax(logits) labels = tf.expand_dims(labels,axis=-1) r_probability = wmlt.batch_gather(probability,labels) r_probability = tf.squeeze(r_probability,axis=-1) r_probability = tf.maximum(1e-10*(1+r_probability),r_probability) num_classes = logits.get_shape().as_list()[-1] labels = tf.squeeze(labels,axis=-1) one_hot_labels = tf.one_hot(indices=labels,depth=num_classes) count = tf.reduce_sum(one_hot_labels,axis=list(range(one_hot_labels.get_shape().ndims-1)),keepdims=True) count = tf.cast(tf.maximum(count,1),tf.float32) ratio = tf.cast(tf.reduce_prod(tf.shape(labels)),tf.float32)/(count*float(num_classes)) alpha_t = tf.minimum(max_alpha_scale,1.0/ratio) alpha_t = alpha_t*tf.ones_like(one_hot_labels,dtype=tf.float32) labels = tf.expand_dims(labels,axis=-1) alpha_t = wmlt.batch_gather(params=alpha_t,indices=labels) alpha_t = tf.squeeze(alpha_t,axis=-1) loss = -alpha_t*tf.math.log(r_probability) return loss
def forward(self, boxes, gboxes, glabels, glength, *args, **kwargs): ''' :param boxes: [1,X,4] or [batch_size,X,4] proposal boxes :param gboxes: [batch_size,Y,4] groundtruth boxes :param glabels: [batch_size,Y] groundtruth labels :param glength: [batch_size] boxes size :return: labels: [batch_size,X,4], the label of boxes, -1 indict ignored box, which will not calculate loss, 0 is background scores: [batch_size,X], the overlap score with boxes' match gt box indices: [batch_size,X] the index of matched gt boxes when it's a positive anchor box, else it's -1 ''' with tf.name_scope("ATTSMatcher4"): iou_matrix = odb.batch_bboxes_pair_wrapv2(gboxes, boxes, fn=odb.get_iou_matrix, len0=glength, scope="get_iou_matrix") is_center_in_gtboxes = odb.batch_bboxes_pair_wrapv2( gboxes, boxes, fn=odb.is_center_in_boxes, len0=glength, dtype=tf.bool, scope="get_is_center_in_gtbboxes") wsummary.variable_summaries_v2(iou_matrix, "iou_matrix") with tf.device("/cpu:0"): iou_threshold = self.get_threshold(iou_matrix) iou_threshold = tf.minimum(iou_threshold, self.thresholds[-1]) iou_matrix = tf.where(is_center_in_gtboxes, iou_matrix, tf.zeros_like(iou_matrix)) scores, index = tf.nn.top_k(tf.transpose(iou_matrix, perm=[0, 2, 1]), k=1) B, Y, _ = btf.combined_static_and_dynamic_shape(gboxes) index = tf.squeeze(index, axis=-1) scores = tf.squeeze(scores, axis=-1) threshold = wmlt.batch_gather(iou_threshold, index) labels = wmlt.batch_gather(glabels, index, name="gather_labels", parallel_iterations=B, back_prop=False) is_good_score = tf.greater(scores, self.MIN_IOU_THRESHOLD) is_good_score = tf.logical_and(is_good_score, scores >= threshold) labels = tf.where(is_good_score, labels, tf.zeros_like(labels)) index = tf.where(is_good_score, index, tf.ones_like(index) * -1) if self.same_pos_label: labels = tf.where(tf.greater(labels, 0), tf.ones_like(labels) * self.same_pos_label, labels) return tf.stop_gradient(labels), tf.stop_gradient( scores), tf.stop_gradient(index)
def mask_rcnn_loss_old(inputs, pred_mask_logits, proposals: EncodedData, fg_selection_mask, log=True): ''' :param inputs:inputs[GT_MASKS] [batch_size,N,H,W] :param pred_mask_logits: [Y,H,W,C] C==1 if cls_anostic_mask else num_classes, H,W is the size of mask not the position in org image :param proposals:proposals.indices:[batch_size,M], proposals.boxes [batch_size,M],proposals.gt_object_logits:[batch_size,M] :param fg_selection_mask: [X] X = batch_size*M Y = tf.reduce_sum(fg_selection_mask) :return: ''' cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 total_num_masks, mask_H, mask_W, C = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) assert mask_H == mask_W, "Mask prediction must be square!" gt_masks = inputs[GT_MASKS] #[batch_size,N,H,W] with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 batch_size, X, H, W = wmlt.combined_static_and_dynamic_shape(gt_masks) #background include in proposals, which's indices is -1 gt_masks = wmlt.batch_gather(gt_masks, tf.nn.relu(proposals.indices)) gt_masks = tf.reshape(gt_masks, [-1, H, W]) gt_masks = tf.boolean_mask(gt_masks, fg_selection_mask) boxes = proposals.boxes batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(boxes) boxes = tf.reshape(boxes, [batch_size * box_nr, box_dim]) boxes = tf.boolean_mask(boxes, fg_selection_mask) with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 gt_masks = tf.expand_dims(gt_masks, axis=-1) croped_masks_gt_masks = wmlt.tf_crop_and_resize( gt_masks, boxes, [mask_H, mask_W]) if not cls_agnostic_mask: gt_classes = proposals.gt_object_logits gt_classes = tf.reshape(gt_classes, [-1]) gt_classes = tf.boolean_mask(gt_classes, fg_selection_mask) pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) pred_mask_logits = wmlt.batch_gather(pred_mask_logits, gt_classes - 1) #预测中不包含背景 pred_mask_logits = tf.expand_dims(pred_mask_logits, axis=-1) mask_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=croped_masks_gt_masks, logits=pred_mask_logits) mask_loss = tf.reduce_mean(mask_loss) return mask_loss pass
def predict_boxes_for_gt_classes(self): ''' 当后继还有mask或keypoint之类分支,它们可以在与RCNN相同的输入(即RPN的输出上处理), 也可以在RCNN的输出上处理, 这个函数用于辅助完成在RCNN的输出结果上进行处理的功能,现在的输入的proposal box已经是[batch_size,N,4], 经过处理后 还是这个形状 Detectron2所有的配置都没有使用这一功能,但理论上来说这样更好(但训练的效率更低) 为了防止前期不能生成好的结果,这里实现相对于Detectron2来说加入了gt_boxes :return: [batch_size,box_nr,box_dim] ''' with tf.name_scope("predict_boxes_for_gt_classes"): predicted_boxes = self.predict_boxes() B = self.proposals[PD_BOXES].get_shape().as_list()[-1] # If the box head is class-agnostic, then the method is equivalent to `predicted_boxes`. if predicted_boxes.get_shape().as_list()[-1] > B: gt_classes = tf.reshape(self.proposals.gt_object_logits, [-1]) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( self.proposals[PD_BOXES]) predicted_boxes = tf.reshape( predicted_boxes, [batch_size * box_nr, -1, box_dim]) predicted_boxes = wmlt.batch_gather(predicted_boxes, gt_classes) predicted_boxes = tf.reshape(predicted_boxes, [batch_size, box_nr, box_dim]) return predicted_boxes
def get_pred_iou_lossv1(self): ''' 使用预测的bboxes与gtbboxes的iou作为目标 :return: ''' with tf.name_scope("get_pred_iouv1_loss"): gt_proposal_deltas = wmlt.batch_gather( self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) proposal_bboxes = tf.reshape(self.proposals.boxes, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 pred_iou_logits = self.pred_iou_logits fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) pred_iou_logits_pos = tf.reshape( tf.boolean_mask(pred_iou_logits, fg_inds), [-1]) pred_iou_logits_neg = tf.reshape( tf.boolean_mask(pred_iou_logits, tf.logical_not(fg_inds)), [-1]) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) pred_bboxes = self.box2box_transform.apply_deltas( pred_proposal_deltas, boxes=proposal_bboxes) loss_box_reg = odl.giou(pred_bboxes, gt_proposal_deltas) loss_box_reg = tf.stop_gradient(loss_box_reg) loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL( labels=loss_box_reg, logits=pred_iou_logits_pos) loss_pos = tf.reduce_mean(loss_pos) loss_neg = wnn.sigmoid_cross_entropy_with_logits_FL( labels=tf.zeros_like(pred_iou_logits_neg), logits=pred_iou_logits_neg) loss_neg = tf.reduce_mean(loss_neg) * 0.5 tf.summary.scalar("iou_pos_loss", loss_pos) tf.summary.scalar("iou_neg_loss", loss_neg) loss = loss_pos + loss_neg return loss
def smooth_l1_loss(self): """ Compute the smooth L1 loss for box regression. Returns: scalar Tensor """ with tf.name_scope("box_regression_loss"): gt_proposal_deltas = wmlt.batch_gather(self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size,box_nr,box_dim = wmlt.combined_static_and_dynamic_shape(gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas,[batch_size*box_nr,box_dim]) proposal_bboxes = tf.reshape(self.proposals.boxes,[batch_size*box_nr,box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape().as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes-1 # Box delta loss is only computed between the prediction for the gt class k # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions # for non-gt classes and background. # Empty fg_inds produces a valid loss of zero as long as the size_average # arg to smooth_l1_loss is False (otherwise it uses mean internally # and would produce a nan loss). fg_inds = tf.greater(self.gt_classes,0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas,fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas,fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes,fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes,fg_inds) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape(pred_proposal_deltas,[-1,fg_num_classes,box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2(pred_proposal_deltas, gt_logits_i- 1) pred_bboxes = self.box2box_transform.apply_deltas(pred_proposal_deltas,boxes=proposal_bboxes) loss_box_reg = odl.giou_loss(pred_bboxes, gt_proposal_deltas) loss_box_reg = tf.reduce_sum(loss_box_reg) num_samples = wmlt.num_elements(self.gt_classes) # The loss is normalized using the total number of regions (R), not the number # of foreground regions even though the box regression loss is only defined on # foreground regions. Why? Because doing so gives equal training influence to # each foreground example. To see how, consider two different minibatches: # (1) Contains a single foreground region # (2) Contains 100 foreground regions # If we normalize by the number of foreground regions, the single example in # minibatch (1) will be given 100 times as much influence as each foreground # example in minibatch (2). Normalizing by the total number of regions, R, # means that the single example in minibatch (1) and each of the 100 examples # in minibatch (2) are given equal influence. loss_box_reg = loss_box_reg /num_samples wsummary.histogram_or_scalar(loss_box_reg,"fast_rcnn/box_reg_loss") return loss_box_reg*self.cfg.MODEL.ROI_HEADS.BOX_REG_LOSS_SCALE
def get_pred_centerness_loss(self): with tf.name_scope("get_pred_centerness_loss"): gt_proposal_deltas = wmlt.batch_gather( self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) proposal_bboxes = tf.reshape(self.proposals.boxes, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 pred_iou_logits = self.pred_iou_logits fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) pred_iou_logits_pos = tf.reshape( tf.boolean_mask(pred_iou_logits, fg_inds), [-1]) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) pred_bboxes = self.box2box_transform.apply_deltas( pred_proposal_deltas, boxes=proposal_bboxes) pred_bboxes = odb.to_cxyhw(proposal_bboxes) gt_bboxes = odb.to_cxyhw(gt_proposal_deltas) deltas = tf.abs(gt_bboxes[..., :2] - pred_bboxes[..., :2]) * 2 wsummary.histogram_or_scalar(deltas, "centerness_deltas") centerness = 1 - tf.reduce_max( deltas / (gt_bboxes[..., 2:] + 1e-8), axis=-1, keepdims=False) wsummary.histogram_or_scalar(centerness, "centerness") loss_pos = wnn.sigmoid_cross_entropy_with_logits_FL( labels=centerness, logits=pred_iou_logits_pos) wsummary.histogram_or_scalar(tf.nn.sigmoid(pred_iou_logits_pos), "pred_centerness") loss_pos = tf.reduce_mean(loss_pos) tf.summary.scalar("centerness_loss", loss_pos) loss = loss_pos return loss
def ae_loss(tag0, tag1, index, mask): ''' :param tag0: [B,N,C],top left tag :param tag1: [B,N,C], bottom right tag :param index: [B,M] :parma mask: [B,M] :return: ''' with tf.name_scope("pull_loss"): num = tf.reduce_sum(tf.cast(mask, tf.float32)) + 1e-4 #num = tf.Print(num,["X",num,tf.shape(tag0),tf.shape(tag1),tf.shape(index),tf.shape(mask)],summarize=100) tag0 = wmlt.batch_gather(tag0, index[:, :, 0]) tag1 = wmlt.batch_gather(tag1, index[:, :, 1]) tag_mean = (tag0 + tag1) / 2 tag0 = tf.pow(tag0 - tag_mean, 2) / num tag0 = tf.reduce_sum(tf.boolean_mask(tag0, mask)) tag1 = tf.pow(tag1 - tag_mean, 2) / num tag1 = tf.reduce_sum(tf.boolean_mask(tag1, mask)) #tag0 = tf.Print(tag0,["tag01",tag0,tag1],summarize=100) pull = tag0 + tag1 with tf.name_scope("push_loss"): neg_index = tfop.make_neg_pair_index(mask) push_mask = tf.greater(neg_index, -1) neg_index = tf.nn.relu(neg_index) num = tf.reduce_sum(tf.cast(push_mask, tf.float32)) + 1e-4 tag0 = wmlt.batch_gather(tag_mean, neg_index[:, :, 0]) tag1 = wmlt.batch_gather(tag_mean, neg_index[:, :, 1]) #tag0 = tf.Print(tag0,["X2",num,tf.shape(tag0),tf.shape(tag1),tf.shape(neg_index),tf.shape(push_mask)],summarize=100) tag0 = tf.boolean_mask(tag0, push_mask[..., 0]) tag1 = tf.boolean_mask(tag1, push_mask[..., 1]) #num = tf.Print(num,["X3",num,tf.shape(tag0),tf.shape(tag1),tf.shape(neg_index),tf.shape(push_mask)],summarize=100) push = tf.reduce_sum(tf.nn.relu(1 - tf.abs(tag0 - tag1))) / num #push = tf.Print(push,["push",push],summarize=100) return pull + push
def _get_regression_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ res = self.boxes_anchor_matcher(self.anchors,self.gt_boxes,self.gt_labels,self.gt_length, boxes_len = self.anchors_lens) gt_objectness_logits_i, scores, indices = res gt_anchor_deltas = wmlt.batch_gather(self.gt_boxes,indices) return gt_objectness_logits_i, gt_anchor_deltas
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ res = self.anchor_matcher(self.anchors,self.gt_boxes,tf.ones(tf.shape(self.gt_boxes)[:2]),self.gt_length, boxes_len=self.anchors_lens) gt_objectness_logits_i, scores, indices = res self.mid_results['anchor_matcher'] = res gt_anchor_deltas = wmlt.batch_gather(self.gt_boxes,tf.maximum(indices,0)) #gt_objectness_logits_i为相应anchor box的标签 return gt_objectness_logits_i, gt_anchor_deltas
def _match_and_label_boxes(self, inputs,proposals,stage): with tf.name_scope(f"match_and_label_boxes{stage}"): gt_boxes = inputs[GT_BOXES] gt_labels = inputs[GT_LABELS] gt_length = inputs[GT_LENGTH] # Augment proposals with ground-truth boxes. # In the case of learned proposals (e.g., RPN), when training starts # the proposals will be low quality due to random initialization. # It's possible that none of these initial # proposals have high enough overlap with the gt objects to be used # as positive examples for the second stage components (box head, # cls head, mask head). Adding the gt boxes to the set of proposals # ensures that the second stage components will have some positive # examples from the start of training. For RPN, this augmentation improves # convergence and empirically improves box AP on COCO by about 0.5 # points (under one tested configuration). if self.proposal_append_gt: proposals = self.add_ground_truth_to_proposals(proposals,gt_boxes,gt_length, nr=8, limits=None) res = self.proposal_matchers[stage](proposals, gt_boxes,gt_labels, gt_length) gt_logits_i, scores, indices = res if self.cfg.GLOBAL.DEBUG: with tf.device(":/cpu:0"): with tf.name_scope("match_and_label_boxes"): logmask = tf.greater(gt_logits_i,0) wsummary.detection_image_summary_by_logmask(images=inputs[IMAGE], boxes=proposals, classes=gt_logits_i, scores=scores, logmask=logmask, name="label_and_sample_proposals_summary") pgt_boxes = wmlt.batch_gather(inputs[GT_BOXES],tf.nn.relu(indices)) #background's indices is -1 wsummary.detection_image_summary_by_logmask(images=inputs[IMAGE], boxes=pgt_boxes, classes=gt_logits_i, scores=scores, logmask=logmask, name="label_and_sample_proposals_summary_by_gtboxes") res = EncodedData(gt_logits_i,scores,indices,proposals,gt_boxes,gt_labels) return res
def get_deltas(self, boxes, gboxes, labels, indices, img_size=None): """ the output is the offset of left-top corner and bottom-right corner the labels,indices is the output of matcher boxes:[batch_size,N,4] gboxes:[batch_size,M,4] labels:[batch_size,N] indices:[batch_size,N] output: [batch_size,N,4] """ with tf.name_scope("get_deltas"): rgtboxes = wmlt.batch_gather(gboxes, tf.nn.relu(indices)) deltas = (rgtboxes - boxes) / self.const_scale if self.scale: scale = tf.sqrt(odb.box_area(boxes)) deltas = deltas / tf.expand_dims(scale, axis=-1) return deltas
def test_batch_indices_to_mask(self): with self.test_session() as sess: data = [[1,4,5,6,7,9,0],[11,22,44,66,77,99,00]] indices = np.array([[1,2,5,0],[4,3,0,0]]) lens = [3,2] size = 7 r_mask = [[False,True,True,False,False,True,False], [False,False,False,True,True,False,False] ] r_indices = [[0,1,2,0],[1,0,0,0]] indices = tf.convert_to_tensor(indices) lens = tf.convert_to_tensor(lens) r_data = [[4,5,9,4],[77,66,66,66]] t_mask,t_indices = wmlt.batch_indices_to_mask(indices,lens,size) t_data = wmlt.batch_boolean_mask(data,t_mask,4) t_data = wmlt.batch_gather(t_data,t_indices) t_mask,t_indices,t_data = sess.run([t_mask,t_indices,t_data]) self.assertAllEqual(t_mask,r_mask) self.assertAllEqual(t_indices,r_indices) self.assertAllEqual(t_data,r_data)
def mask_rcnn_inference(pred_mask_logits, pred_instances): """ Convert pred_mask_logits to estimated foreground probability masks while also extracting only the masks for the predicted classes in pred_instances. For each predicted box, the mask of the same class is attached to the instance by adding a new RD_MASKS field to pred_instances. Args: pred_mask_logits (Tensor): A tensor of shape (B,Hmask, Wmask,C) or (B, Hmask, Wmask, 1) for class-specific or class-agnostic, where B is the total number of predicted masks in all images, C is the number of foreground classes, and Hmask, Wmask are the height and width of the mask predictions. The values are logits. pred_instances (dict): A dict of prediction results, pred_instances[RD_LABELS]:[batch_size,Y], pred_instances[RD_LENGTH], [batch_size] current the batch_size must be 1, and X == pred_instances[RD_LENGTH][0] == Y Returns: None. pred_instances will contain an extra RD_MASKS field storing a mask of size [batch_size,Y,Hmask, Wmask] for predicted class. Note that the masks are returned as a soft (non-quantized) masks the resolution predicted by the network; post-processing steps, such as resizing the predicted masks to the original image resolution and/or binarizing them, is left to the caller. """ cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 labels = pred_instances[RD_LABELS] batch_size, box_nr = wmlt.combined_static_and_dynamic_shape(labels) if not cls_agnostic_mask: # Select masks corresponding to the predicted classes pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) labels = tf.reshape(labels, [-1]) - 1 #去掉背景 #当同时预测多个图片时,labels后面可能有填充的0,上一步减1时可能出现负数 pred_mask_logits = wmlt.batch_gather(pred_mask_logits, tf.nn.relu(labels)) total_box_nr, H, W = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) pred_mask_logits = tf.reshape(pred_mask_logits, [batch_size, box_nr, H, W]) pred_mask_logits = tf.nn.sigmoid(pred_mask_logits) pred_instances[RD_MASKS] = pred_mask_logits
def trans_boxes(self, bboxes, levels, img_size): B, box_nr = wmlt.combined_static_and_dynamic_shape(levels) anchor_boxes_size = tf.tile(self.rcnn_anchor_boxes, [B, 1]) boxes_size = wmlt.batch_gather(anchor_boxes_size, levels) w = boxes_size / tf.to_float(img_size[1]) h = boxes_size / tf.to_float(img_size[0]) ymin, xmin, ymax, xmax = tf.unstack(bboxes, axis=-1) cy = (ymin + ymax) / 2 cx = (xmin + xmax) / 2 ymin = cy - h / 2 ymax = cy + h / 2 xmin = cx - w / 2 xmax = cx + w / 2 new_boxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1) ##### log_bboxes = tf.concat([bboxes[:, :3], new_boxes[:, :3]], axis=1) log_labels = tf.convert_to_tensor([[1, 2, 3, 11, 12, 13]], dtype=tf.int32) log_labels = tf.tile(log_labels, [B, 1]) wsummary.detection_image_summary(self.batched_inputs[IMAGE], boxes=log_bboxes, classes=log_labels, name="to_anchor_bboxes") return new_boxes
def inference(self,inputs,head_outputs): """ Arguments: inputs: same as CenterNet.forward's batched_inputs Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ self.inputs = inputs all_bboxes = [] all_scores = [] all_clses = [] all_length = [] img_size = tf.shape(inputs[IMAGE])[1:3] assert len(head_outputs)==1,f"Error head outputs len {len(head_outputs)}" nms = partial(odl.boxes_nms,threshold=self.nms_threshold) bboxes,clses, scores,length = self.get_box_in_a_single_layer(head_outputs[0],self.cfg.SCORE_THRESH_TEST) bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper(bboxes, clses, length, confidence=None, nms=nms, k=self.max_detections_per_image, sort=True) scores = wmlt.batch_gather(scores,nms_indexs) outdata = {RD_BOXES:bboxes,RD_LABELS:labels,RD_PROBABILITY:scores,RD_LENGTH:lens} if global_cfg.GLOBAL.SUMMARY_LEVEL<=SummaryLevel.DEBUG: wsummary.detection_image_summary(images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="CenterNetOutput", category_index=DataLoader.category_index) return outdata
def get_box_in_a_single_layer(self, datas, num_dets, img_size, K): ''' ''' #wsummary.variable_summaries_v2(datas['heatmaps_tl'],"hm_tl") h_tl = tf.nn.sigmoid(datas['heatmaps_tl']) h_br = tf.nn.sigmoid(datas['heatmaps_br']) h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) #wsummary.variable_summaries_v2(h_tl,"hm_a_tl") B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_tl) h_tl = self.pixel_nms(h_tl) h_br = self.pixel_nms(h_br) h_ct = self.pixel_nms(h_ct) tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = self._topk(h_tl, K=K) br_scores, br_inds, br_clses, br_ys, br_xs = self._topk(h_br, K=K) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, K=K) tl_ys = tf.tile(tf.reshape(tl_ys, [B, K, 1]), [1, 1, K]) tl_xs = tf.tile(tf.reshape(tl_xs, [B, K, 1]), [1, 1, K]) br_ys = tf.tile(tf.reshape(br_ys, [B, 1, K]), [1, K, 1]) br_xs = tf.tile(tf.reshape(br_xs, [B, 1, K]), [1, K, 1]) ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) ct_scores = tf.reshape(ct_scores, [B, K]) if 'offset_tl' in datas: tl_regr = wmlt.batch_gather(datas['offset_tl'], tl_inds) br_regr = wmlt.batch_gather(datas['offset_br'], br_inds) ct_regr = wmlt.batch_gather(datas['offset_ct'], br_inds) tl_regr = tf.reshape(tl_regr, [B, K, 1, 2]) br_regr = tf.reshape(br_regr, [B, 1, K, 2]) ct_regr = tf.reshape(ct_regr, [B, K, 2]) tl_xs = tl_xs + tl_regr[..., 0] tl_ys = tl_ys + tl_regr[..., 1] br_xs = br_xs + br_regr[..., 0] br_ys = br_ys + br_regr[..., 1] ct_xs = ct_xs + ct_regr[..., 0] ct_ys = ct_ys + ct_regr[..., 1] bboxes = tf.stack([tl_ys, tl_xs, br_ys, br_xs], axis=-1) #bboxes = tf.Print(bboxes,["box0",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], #boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H), #name="box0") tl_tag = wmlt.batch_gather(datas['tag_tl'], tl_inds) br_tag = wmlt.batch_gather(datas['tag_br'], br_inds) tl_tag = tf.expand_dims(tl_tag, axis=2) br_tag = tf.expand_dims(br_tag, axis=1) tl_tag = tf.tile(tl_tag, [1, 1, K, 1]) br_tag = tf.tile(br_tag, [1, K, 1, 1]) dists = tf.abs(tl_tag - br_tag) dists = tf.squeeze(dists, axis=-1) dis_inds = (dists > self.dis_threshold) tl_scores = tf.tile(tf.reshape(tl_scores, [B, K, 1]), [1, 1, K]) br_scores = tf.tile(tf.reshape(br_scores, [B, 1, K]), [1, K, 1]) scores = (tl_scores + br_scores) / 2 tl_clses = tf.tile(tf.reshape(tl_clses, [B, K, 1]), [1, 1, K]) br_clses = tf.tile(tf.reshape(br_clses, [B, 1, K]), [1, K, 1]) cls_inds = tf.not_equal(tl_clses, br_clses) width_inds = (br_xs < tl_xs) height_inds = (br_ys < tl_ys) all_inds = tf.logical_or(cls_inds, dis_inds) all_inds = tf.logical_or(all_inds, width_inds) all_inds = tf.logical_or(all_inds, height_inds) #all_inds = cls_inds scores = tf.where(all_inds, tf.zeros_like(scores), scores) scores, inds = tf.nn.top_k(tf.reshape(scores, [B, -1]), num_dets) wsummary.variable_summaries_v2(scores, "scores") wsummary.variable_summaries_v2(tl_scores, "tl_scores") wsummary.variable_summaries_v2(br_scores, "br_scores") bboxes = tf.reshape(bboxes, [B, -1, 4]) bboxes = wmlt.batch_gather(bboxes, inds) #bboxes = tf.Print(bboxes,["box1",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], # boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H), # name="box1") clses = tf.reshape(tl_clses, [B, -1]) clses = wmlt.batch_gather(clses, inds) '''tl_scores = tf.reshape(tl_scores,[B,-1,1]) tl_scores = wmlt.batch_gather(tl_scores,inds) br_scores = tf.reshape(br_scores,[B,-1,1]) br_scores = wmlt.batch_gather(br_scores,inds)''' ct = tf.stack([ct_ys / tf.to_float(H), ct_xs / tf.to_float(W)], axis=-1) bboxes = odbox.tfabsolutely_boxes_to_relative_boxes(bboxes, width=W, height=H) sizes = tf.convert_to_tensor(self.size_threshold, dtype=tf.float32) relative_size = sizes * tf.rsqrt( tf.cast(img_size[0] * img_size[1], tf.float32)) _, box_nr, _ = wmlt.combined_static_and_dynamic_shape(bboxes) length = tf.ones([B], tf.int32) * box_nr #bboxes = tf.Print(bboxes,["bboxes",tf.reduce_min(bboxes),tf.reduce_max(bboxes),tf.reduce_min(ct),tf.reduce_max(ct)],summarize=100) center_index = tfop.center_boxes_filter(bboxes=bboxes, bboxes_clses=clses, center_points=ct, center_clses=ct_clses, size_threshold=relative_size, bboxes_length=length, nrs=[3, 5]) def fn(bboxes, scores, clses, ct_score, c_index): ct_score = tf.gather(ct_score, tf.nn.relu(c_index)) scores = (scores * 2 + ct_score) / 3 #变成三个点的平均 mask = tf.logical_and(tf.greater_equal(c_index, 0), tf.greater(scores, self.score_threshold)) mask = tf.logical_and(tf.greater_equal(ct_score, 0.001), mask) bboxes = tf.boolean_mask(bboxes, mask) scores = tf.boolean_mask(scores, mask) clses = tf.boolean_mask(clses, mask) len = tf.reduce_sum(tf.cast(mask, tf.int32)) bboxes = tf.pad(bboxes, [[0, box_nr - len], [0, 0]]) scores = tf.pad(scores, [[0, box_nr - len]]) clses = tf.pad(clses, [[0, box_nr - len]]) return bboxes, scores, clses, len bboxes, scores, clses, length = tf.map_fn( lambda x: fn(x[0], x[1], x[2], x[3], x[4]), elems=(bboxes, scores, clses, ct_scores, center_index), dtype=(tf.float32, tf.float32, tf.int32, tf.int32)) #bboxes = tf.Print(bboxes,["box2",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], # boxes=tf.reshape(bboxes,[B,-1,4]),lengths=length, # name="box2") return bboxes, scores, clses, length
def losses(self): """ Args: Returns: """ all_encoded_datas = self._get_ground_truth() all_loss0 = [] all_loss1 = [] all_loss2 = [] all_offset_loss = [] all_embeading_loss = [] for i, outputs in enumerate(self.head_outputs): encoded_datas = all_encoded_datas[i] head_outputs = self.head_outputs[i] loss0 = tf.reduce_mean( wnn.focal_loss_for_heat_map( labels=encoded_datas["g_heatmaps_tl"], logits=head_outputs["heatmaps_tl"], scope="tl_loss")) loss1 = tf.reduce_mean( wnn.focal_loss_for_heat_map( labels=encoded_datas["g_heatmaps_br"], logits=head_outputs["heatmaps_br"], scope="br_loss")) loss2 = tf.reduce_mean( wnn.focal_loss_for_heat_map( labels=encoded_datas["g_heatmaps_ct"], logits=head_outputs["heatmaps_ct"], scope="ct_loss")) offset0 = wmlt.batch_gather(head_outputs['offset_tl'], encoded_datas['g_index'][:, :, 0]) offset1 = wmlt.batch_gather(head_outputs['offset_br'], encoded_datas['g_index'][:, :, 1]) offset2 = wmlt.batch_gather(head_outputs['offset_ct'], encoded_datas['g_index'][:, :, 2]) offset = tf.concat([offset0, offset1, offset2], axis=2) offset_loss = tf.losses.huber_loss( labels=encoded_datas['g_offset'], predictions=offset, loss_collection=None, weights=tf.cast( tf.expand_dims(encoded_datas['g_index_mask'], -1), tf.float32)) embeading_loss = self.ae_loss(head_outputs['tag_tl'], head_outputs['tag_br'], encoded_datas['g_index'], encoded_datas['g_index_mask']) all_loss0.append(loss0) all_loss1.append(loss1) all_loss2.append(loss2) all_offset_loss.append(offset_loss) all_embeading_loss.append(embeading_loss) loss0 = tf.add_n(all_loss0) loss1 = tf.add_n(all_loss1) loss2 = tf.add_n(all_loss2) offset_loss = tf.add_n(all_offset_loss) embeading_loss = tf.add_n(all_embeading_loss) #loss0 = tf.Print(loss0,["loss",loss0,loss1,loss2,offset_loss,embeading_loss],summarize=100) return { "heatmaps_tl_loss": loss0, "heatmaps_br_loss": loss1, "heatmaps_ct_loss": loss2, "offset_loss": offset_loss, 'embeading_loss': embeading_loss }
def find_top_rpn_proposals_for_single_level( proposals, pred_objectness_logits, nms_thresh, pre_nms_topk, post_nms_topk, score_threshold=-1.0, is_training=True, pre_nms_topk_max_per_layer=-1, ): with tf.name_scope("find_top_rpn_proposals_for_single_level"): ''' 通过top_k+gather排序 In Detectron2, they chosen the top candiate_nr*6 boxes ''' if pre_nms_topk_max_per_layer > 10: topk_nr = tf.minimum(pre_nms_topk, tf.shape(pred_objectness_logits)[1]) print( f"pre_nms_topk_max_per_layer = {pre_nms_topk_max_per_layer}.") topk_nr = tf.minimum(topk_nr, pre_nms_topk_max_per_layer) else: topk_nr = tf.minimum(pre_nms_topk, tf.shape(pred_objectness_logits)[1]) probability, indices = tf.nn.top_k(pred_objectness_logits, k=topk_nr) proposals = wmlt.batch_gather(proposals, indices) batch_size = pred_objectness_logits.get_shape().as_list()[0] if not is_training and batch_size > 1: print("RPN: Inference on multi images.") def fn(bboxes, probability): labels = tf.ones(tf.shape(bboxes)[0], dtype=tf.int32) if is_training or batch_size > 1: boxes, labels, indices = tfop.boxes_nms_nr2( bboxes, labels, k=post_nms_topk, threshold=nms_thresh, confidence=probability) probability = tf.gather(probability, indices) else: boxes, labels, indices = tfop.boxes_nms(bboxes, labels, k=post_nms_topk, threshold=nms_thresh, confidence=probability) probability = tf.gather(probability, indices) if score_threshold > 1e-10: p_mask = tf.greater(probability, score_threshold) indices = tf.constant([[0]], dtype=tf.int32) updates = tf.constant([1], dtype=tf.int32) shape = tf.shape(p_mask) lp_mask = tf.cast(tf.scatter_nd(indices, updates, shape), tf.bool) p_mask = tf.logical_or(p_mask, lp_mask) probability = tf.boolean_mask(probability, p_mask) boxes = tf.boolean_mask(boxes, p_mask) return [boxes, probability] boxes, probability = btf.try_static_or_dynamic_map_fn( lambda x: fn(x[0], x[1]), elems=[proposals, probability], dtype=[tf.float32, tf.float32], back_prop=False) return tf.stop_gradient(boxes), tf.stop_gradient(probability)
def inference(self, inputs, head_outputs): """ Arguments: inputs: same as CenterNet.forward's batched_inputs Returns: results: RD_BOXES: [B,N,4] RD_LABELS: [B,N] RD_PROBABILITY:[ B,N] RD_LENGTH:[B] """ self.inputs = inputs all_bboxes = [] all_scores = [] all_clses = [] all_length = [] img_size = tf.shape(inputs[IMAGE])[1:3] for i, datas in enumerate(head_outputs): num_dets = max(self.topk_candidates // (4**i), 4) K = max(self.k // (4**i), 4) bboxes, scores, clses, length = self.get_box_in_a_single_layer( datas, num_dets, img_size, K) all_bboxes.append(bboxes) all_scores.append(scores) all_clses.append(clses) all_length.append(length) with tf.name_scope(f"merge_all_boxes"): bboxes, _ = wmlt.batch_concat_with_length(all_bboxes, all_length) scores, _ = wmlt.batch_concat_with_length(all_scores, all_length) clses, length = wmlt.batch_concat_with_length( all_clses, all_length) nms = functools.partial(tfop.boxes_nms, threshold=self.nms_threshold, classes_wise=True, k=self.max_detections_per_image) #预测时没有背景, 这里加上1使背景=0 clses = clses + 1 #bboxes = tf.Print(bboxes,["shape",tf.shape(bboxes),tf.shape(clses),length],summarize=100) bboxes, labels, nms_indexs, lens = odl.batch_nms_wrapper( bboxes, clses, length, confidence=None, nms=nms, k=self.max_detections_per_image, sort=True) scores = wmlt.batch_gather(scores, nms_indexs) #labels = clses+1 #lens = length outdata = { RD_BOXES: bboxes, RD_LABELS: labels, RD_PROBABILITY: scores, RD_LENGTH: lens } if global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: wsummary.detection_image_summary( images=inputs[IMAGE], boxes=outdata[RD_BOXES], classes=outdata[RD_LABELS], lengths=outdata[RD_LENGTH], scores=outdata[RD_PROBABILITY], name="CenterNetOutput", category_index=DataLoader.category_index) return outdata
def prediction_on_single_image(self, class_prediction, bboxes_regs, proposal_bboxes, threshold=0.5, classes_wise=False, topk_per_image=-1, ious=None, nms=None): with tf.name_scope("prediction_on_single_image"): # 删除背景 class_prediction = class_prediction[:, 1:] num_classes = class_prediction.get_shape().as_list()[-1] probability, nb_labels = tf.nn.top_k(class_prediction, k=1) # 背景的类别为0,前面已经删除了背景,需要重新加上 labels = nb_labels + 1 '''num_classes = class_prediction.get_shape().as_list()[-1]-1 probability, labels = tf.nn.top_k(class_prediction, k=1) probability = tf.where(tf.greater(labels,0),probability,tf.zeros_like(probability)) nb_labels = tf.where(tf.greater(labels,0),labels-1,labels)''' ndims = class_prediction.get_shape().ndims probability = tf.squeeze(probability, axis=ndims - 1) labels = tf.squeeze(labels, axis=ndims - 1) res_indices = tf.range(tf.shape(labels)[0]) # 按类别在bboxes_regs选择相应类的回归参数 if classes_wise: nb_labels = tf.reshape(nb_labels, [-1]) box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( proposal_bboxes) bboxes_regs = tf.reshape(bboxes_regs, [box_nr, num_classes, box_dim]) bboxes_regs = wmlt.batch_gather(bboxes_regs, nb_labels) del nb_labels proposal_bboxes.get_shape().assert_is_compatible_with( bboxes_regs.get_shape()) ''' NMS前数据必须已经排好序 通过top_k+gather排序 ''' if ious is None: probability, indices = tf.nn.top_k(probability, k=tf.shape(probability)[0]) else: _, indices = tf.nn.top_k(ious, k=tf.shape(ious)[0]) probability = tf.gather(probability, indices) labels = tf.gather(labels, indices) bboxes_regs = tf.gather(bboxes_regs, indices) proposal_bboxes = tf.gather(proposal_bboxes, indices) res_indices = tf.gather(res_indices, indices) pmask = tf.greater(probability, threshold) probability = tf.boolean_mask(probability, pmask) labels = tf.boolean_mask(labels, pmask) proposal_bboxes = tf.boolean_mask(proposal_bboxes, pmask) boxes_regs = tf.boolean_mask(bboxes_regs, pmask) res_indices = tf.boolean_mask(res_indices, pmask) boxes = self.box2box_transform.apply_deltas(deltas=boxes_regs, boxes=proposal_bboxes) candiate_nr = tf.shape( probability )[0] if topk_per_image < 0 else topk_per_image #最多可返回candiate_nr个box if ious is None: boxes, labels, indices = nms(boxes, labels, confidence=probability) else: ious = tf.gather(ious, res_indices) boxes, labels, indices = nms(boxes, labels, confidence=ious) probability = tf.gather(probability, indices) res_indices = tf.gather(res_indices, indices) probability = probability[:topk_per_image] boxes = boxes[:topk_per_image] labels = labels[:topk_per_image] probability = probability[:topk_per_image] res_indices = res_indices[:topk_per_image] len = tf.shape(probability)[0] boxes = tf.pad(boxes, paddings=[[0, candiate_nr - len], [0, 0]]) labels = tf.pad(labels, paddings=[[0, candiate_nr - len]]) probability = tf.pad(probability, paddings=[[0, candiate_nr - len]]) res_indices = tf.pad(res_indices, paddings=[[0, candiate_nr - len]]) boxes = tf.reshape(boxes, [candiate_nr, 4]) labels = tf.reshape(labels, [candiate_nr]) probability = tf.reshape(probability, [candiate_nr]) res_indices = tf.reshape(res_indices, [candiate_nr]) return boxes, labels, probability, res_indices, len
def inference(self, score_thresh, nms_thresh, topk_per_image, pred_iou_logits=None, proposal_boxes=None, scores=None): """ Args: score_thresh (float): same as fast_rcnn_inference. nms_thresh (float): same as fast_rcnn_inference. topk_per_image (int): same as fast_rcnn_inference. scores:[batch_size,box_nr,num_classes+1] Returns: list[Instances]: same as fast_rcnn_inference. list[Tensor]: same as fast_rcnn_inference. """ with tf.name_scope("fast_rcnn_outputs_inference"): output_fix_nr = self.cfg.MODEL.ROI_HEADS.OUTPUTS_FIX_NR_BOXES if output_fix_nr < 1: nms = functools.partial( tfop.boxes_nms, threshold=nms_thresh, classes_wise=self.cfg.MODEL.ROI_HEADS.CLASSES_WISE_NMS) else: nms = functools.partial( tfop.boxes_nms_nr2, threshold=nms_thresh, classes_wise=self.cfg.MODEL.ROI_HEADS.CLASSES_WISE_NMS, k=output_fix_nr, allow_less_output=True) if proposal_boxes is None: proposal_boxes = self.proposals[PD_BOXES] batch_size, bor_nr, box_dim = proposal_boxes.get_shape().as_list() _, L = wmlt.combined_static_and_dynamic_shape( self.pred_proposal_deltas) if scores is None: probability = self.predict_probs(self.pred_class_logits) else: probability = scores if pred_iou_logits is not None: if self.cfg.MODEL.ROI_HEADS.PRED_IOU_VERSION == 5: ious = tf.clip_by_value(pred_iou_logits, clip_value_min=0, clip_value_max=1) else: ious = tf.nn.sigmoid(pred_iou_logits, "pred_iou") if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.RESEARCH and not self.is_training: add_to_research_datas("pb_ious", ious, [-1]) #预测的iou assert len(probability.get_shape() ) == 2, "error probability shape" raw_probability = tf.expand_dims( probability, axis=0) #only support batch_size=1 if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.RESEARCH and not self.is_training: matcher = Matcher([1e-3], allow_low_quality_matches=False, cfg=self.cfg, parent=self) mh_res0 = matcher(proposal_boxes, self.parent.batched_inputs[GT_BOXES], self.parent.batched_inputs[GT_LABELS], self.parent.batched_inputs[GT_LENGTH]) add_to_research_datas("pb_scores", mh_res0[1], [-1]) #proposal_boxes与gt boxes的 iou box_size = tf.sqrt(odb.box_area(proposal_boxes)) add_to_research_datas("pb_size", box_size, [-1]) #proposal_boxes与gt boxes的 iou assert len( probability.get_shape()) == 2, "error probability shape" total_box_nr, K = wmlt.combined_static_and_dynamic_shape( probability) probability = tf.reshape(probability, [batch_size, -1, K]) if pred_iou_logits is not None: if self.cfg.MODEL.ROI_HEADS.PRED_IOU_VERSION == 3: ious = tf.reshape(ious, [batch_size, -1, 1]) probability = probability * ious ious = None print("Pred centerness.") else: ious = tf.reshape(ious, [batch_size, -1]) pred_proposal_deltas = tf.reshape(self.pred_proposal_deltas, [batch_size, -1, L]) classes_wise = (L != box_dim) if pred_iou_logits is None or ious is None or ( not self.cfg.MODEL.ROI_HEADS.USE_IOU_IN_TEST): boxes, labels, probability, res_indices, lens = tf.map_fn( lambda x: self.prediction_on_single_image( x[0], x[1], x[2], score_thresh, classes_wise=classes_wise, topk_per_image=topk_per_image, nms=nms), elems=(probability, pred_proposal_deltas, proposal_boxes), dtype=(tf.float32, tf.int32, tf.float32, tf.int32, tf.int32)) else: #nms时使用iou排序 boxes, labels, probability, res_indices, lens = tf.map_fn( lambda x: self.prediction_on_single_image( x[0], x[1], x[2], score_thresh, classes_wise=classes_wise, topk_per_image=topk_per_image, ious=x[3], nms=nms), elems=(probability, pred_proposal_deltas, proposal_boxes, ious), dtype=(tf.float32, tf.int32, tf.float32, tf.int32, tf.int32)) with tf.name_scope("remove_null_boxes"): #max_len=0会引导程序异常退出,原因未知 max_len = tf.maximum(1, tf.reduce_max(lens)) boxes = boxes[:, :max_len] labels = labels[:, :max_len] probability = probability[:, :max_len] res_indices = res_indices[:, :max_len] results = { RD_BOXES: boxes, RD_LABELS: labels, RD_PROBABILITY: probability, RD_INDICES: res_indices, RD_LENGTH: lens } if self.cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.RESEARCH and not self.is_training: with tf.device("/cpu:0"): if pred_iou_logits is not None and ious is not None: ious = wmlt.batch_gather(ious, res_indices) else: ious = None scores0 = wmlt.batch_gather(mh_res0[1], res_indices) l = lens[0] mh_res1 = matcher(boxes[:, :l], self.parent.batched_inputs[GT_BOXES], self.parent.batched_inputs[GT_LABELS], self.parent.batched_inputs[GT_LENGTH]) add_to_research_datas("rd_scores", mh_res1[1][:, :l], [-1]) if ious is not None: add_to_research_datas("rd_ious", ious[:, :l], [-1]) if pred_iou_logits is not None: probs = wmlt.batch_gather(raw_probability, res_indices) add_to_research_datas("rd_probs", probs[:, :l], [-1]) else: add_to_research_datas("rd_probs", probability[:, :l], [-1]) add_to_research_datas("rd_probs", probability[:, :l], [-1]) add_to_research_datas("rd_scores_old", scores0[:, :l], [-1]) rd_box_size = tf.sqrt(odb.box_area(boxes[:, :l])) add_to_research_datas("rd_size", rd_box_size, [-1]) return results
def forward(self, boxes, gboxes, glabels, glength, boxes_len, *args, **kwargs): ''' :param boxes: [1,X,4] or [batch_size,X,4] proposal boxes :param gboxes: [batch_size,Y,4] groundtruth boxes :param glabels: [batch_size,Y] groundtruth labels :param glength: [batch_size] boxes size :param boxes_len: [len0,len1,len2,...] sum(boxes_len)=X, boxes len in each layer :return: labels: [batch_size,X,4], the label of boxes, -1 indict ignored box, which will not calculate loss, 0 is background scores: [batch_size,X], the overlap score with boxes' match gt box indices: [batch_size,X] the index of matched gt boxes when it's a positive anchor box, else it's -1 ''' with tf.name_scope("ATTSMatcher"): assert isinstance(boxes_len, (list, tuple)), "error boxes len type." dis_matrix = odb.batch_bboxes_pair_wrapv2(gboxes, boxes, fn=odb.get_bboxes_dis, len0=glength, scope="get_dis_matrix") iou_matrix = odb.batch_bboxes_pair_wrapv2(gboxes, boxes, fn=odb.get_iou_matrix, len0=glength, scope="get_iou_matrix") is_center_in_gtboxes = odb.batch_bboxes_pair_wrapv2( gboxes, boxes, fn=odb.is_center_in_boxes, len0=glength, dtype=tf.bool, scope="get_is_center_in_gtbboxes") #dis_matrix = tf.Print(dis_matrix,[tf.shape(dis_matrix),tf.reduce_sum(boxes_len)],summarize=100) #在每一层获取距离最近的k个proposal box dis_matrix = tf.split(dis_matrix, boxes_len, axis=2) offsets = [0] with tf.name_scope("get_offset"): for i in range(len(boxes_len) - 1): n_off = offsets[-1] + boxes_len[i] offsets.append(n_off) pos_indices = [] for tl, bl, dism in zip(offsets, boxes_len, dis_matrix): values, indices = tf.nn.top_k(-dism, k=tf.minimum(self.k, bl), sorted=False) indices = indices + tl pos_indices.append(indices) pos_indices = tf.concat(pos_indices, axis=-1) pos_ious = btf.batch_gather(iou_matrix, pos_indices, name="gather_pos_ious") #对各层top k中iou大于MIN_IOU_THRESHOLD的统计mean+std iou_mean, iou_var = self.moments(pos_ious, threshold=self.MIN_IOU_THRESHOLD, axes=[-1]) #wsummary.histogram_or_scalar(iou_mean,"iou_mean") with tf.device("/cpu:0"): max_iou_threshold = tf.reduce_max(pos_ious, axis=-1, keepdims=True) iou_std = tf.sqrt(iou_var) iou_threshold = iou_mean + iou_std iou_threshold = tf.minimum(max_iou_threshold, iou_threshold) ''' 原算法中表示的为仅从上面的topk中取正样本,这里从所有的样本中取正样本 ''' #iou大于iou_threshold且中心点在gt box内的设置为正样本 is_pos = tf.logical_and(iou_matrix >= iou_threshold, is_center_in_gtboxes) iou_matrix = tf.where(is_pos, iou_matrix, tf.zeros_like(iou_matrix)) scores, index = tf.nn.top_k(tf.transpose(iou_matrix, perm=[0, 2, 1]), k=1) B, Y, _ = btf.combined_static_and_dynamic_shape(gboxes) index = tf.squeeze(index, axis=-1) scores = tf.squeeze(scores, axis=-1) labels = wmlt.batch_gather(glabels, index, name="gather_labels", parallel_iterations=B, back_prop=False) is_good_score = tf.greater(scores, self.MIN_IOU_THRESHOLD) labels = tf.where(is_good_score, labels, tf.zeros_like(labels)) index = tf.where(is_good_score, index, tf.ones_like(index) * -1) #iou_matrix=iou_matrix[:1,:glength[0]] #iou_matrix = tf.reduce_sum(iou_matrix,axis=-1) #wsummary.histogram_or_scalar(iou_matrix,"iou_matrix") if self.same_pos_label: labels = tf.where(tf.greater(labels, 0), tf.ones_like(labels) * self.same_pos_label, labels) return tf.stop_gradient(labels), tf.stop_gradient( scores), tf.stop_gradient(index)
def mask_rcnn_loss(inputs, pred_mask_logits, proposals: EncodedData, fg_selection_mask, log=True): ''' :param inputs:inputs[GT_MASKS] [batch_size,N,H,W] :param pred_mask_logits: [Y,H,W,C] C==1 if cls_anostic_mask else num_classes, H,W is the size of mask not the position in org image :param proposals:proposals.indices:[batch_size,M], proposals.boxes [batch_size,M],proposals.gt_object_logits:[batch_size,M] :param fg_selection_mask: [X] X = batch_size*M Y = tf.reduce_sum(fg_selection_mask) :return: ''' cls_agnostic_mask = pred_mask_logits.get_shape().as_list()[-1] == 1 total_num_masks, mask_H, mask_W, C = wmlt.combined_static_and_dynamic_shape( pred_mask_logits) assert mask_H == mask_W, "Mask prediction must be square!" gt_masks = inputs[GT_MASKS] #[batch_size,N,H,W] with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 batch_size, X, H, W = wmlt.combined_static_and_dynamic_shape(gt_masks) #background include in proposals, which's indices is -1 gt_masks = tf.reshape(gt_masks, [batch_size * X, H, W]) indices = btf.twod_indexs_to_oned_indexs(tf.nn.relu(proposals.indices), depth=X) indices = tf.boolean_mask(indices, fg_selection_mask) gt_masks = tf.gather(gt_masks, indices) boxes = proposals.boxes batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(boxes) boxes = tf.reshape(boxes, [batch_size * box_nr, box_dim]) boxes = tf.boolean_mask(boxes, fg_selection_mask) with tf.device("/cpu:0"): #当输入图像分辨率很高时这里可能会消耗过多的GPU资源,因此改在CPU上执行 gt_masks = tf.expand_dims(gt_masks, axis=-1) croped_masks_gt_masks = wmlt.tf_crop_and_resize( gt_masks, boxes, [mask_H, mask_W]) if not cls_agnostic_mask: gt_classes = proposals.gt_object_logits gt_classes = tf.reshape(gt_classes, [-1]) gt_classes = tf.boolean_mask(gt_classes, fg_selection_mask) pred_mask_logits = tf.transpose(pred_mask_logits, [0, 3, 1, 2]) pred_mask_logits = wmlt.batch_gather(pred_mask_logits, gt_classes - 1) #预测中不包含背景 pred_mask_logits = tf.expand_dims(pred_mask_logits, axis=-1) if log and config.global_cfg.GLOBAL.SUMMARY_LEVEL <= SummaryLevel.DEBUG: with tf.device(":/cpu:0"): with tf.name_scope("mask_loss_summary"): pmasks_2d = tf.reshape(fg_selection_mask, [batch_size, box_nr]) boxes_3d = tf.expand_dims(boxes, axis=1) wsummary.positive_box_on_images_summary(inputs[IMAGE], proposals.boxes, pmasks=pmasks_2d) image = wmlt.select_image_by_mask(inputs[IMAGE], pmasks_2d) t_gt_masks = tf.expand_dims(tf.squeeze(gt_masks, axis=-1), axis=1) wsummary.detection_image_summary( images=image, boxes=boxes_3d, instance_masks=t_gt_masks, name="mask_and_boxes_in_mask_loss") log_mask = gt_masks log_mask = ivis.draw_detection_image_summary( log_mask, boxes=tf.expand_dims(boxes, axis=1)) log_mask = wmli.concat_images( [log_mask, croped_masks_gt_masks]) wmlt.image_summaries(log_mask, "mask", max_outputs=3) log_mask = wmli.concat_images( [gt_masks, tf.cast(pred_mask_logits > 0.5, tf.float32)]) wmlt.image_summaries(log_mask, "gt_vs_pred", max_outputs=3) mask_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=croped_masks_gt_masks, logits=pred_mask_logits) mask_loss = btf.safe_reduce_mean(mask_loss) return mask_loss pass
def apply_deltas(self, datas, num_dets, img_size=None): ''' ''' h_tl = tf.nn.sigmoid(datas['heatmaps_tl']) h_br = tf.nn.sigmoid(datas['heatmaps_br']) h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_tl) h_tl = self.pixel_nms(h_tl, threshold=self.nms_threshold) h_br = self.pixel_nms(h_br, threshold=self.nms_threshold) h_ct = self.pixel_nms(h_ct, threshold=self.nms_threshold) tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = self._topk(h_tl, K=self.k) br_scores, br_inds, br_clses, br_ys, br_xs = self._topk(h_br, K=self.k) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, K=self.k) K = self.k tl_ys = tf.tile(tf.reshape(tl_ys, [B, K, 1]), [1, 1, K]) tl_xs = tf.tile(tf.reshape(tl_xs, [B, K, 1]), [1, 1, K]) br_ys = tf.tile(tf.reshape(br_ys, [B, 1, K]), [1, K, 1]) br_xs = tf.tile(tf.reshape(br_xs, [B, 1, K]), [1, K, 1]) ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) tl_regr = wmlt.batch_gather(datas['offset_tl'], tl_inds) br_regr = wmlt.batch_gather(datas['offset_br'], br_inds) ct_regr = wmlt.batch_gather(datas['offset_ct'], br_inds) tl_regr = tf.reshape(tl_regr, [B, K, 1, 2]) br_regr = tf.reshape(br_regr, [B, 1, K, 2]) ct_regr = tf.reshape(ct_regr, [B, K, 2]) tl_xs = tl_xs + tl_regr[..., 0] tl_ys = tl_ys + tl_regr[..., 1] br_xs = br_xs + br_regr[..., 0] br_ys = br_ys + br_regr[..., 1] ct_xs = ct_xs + ct_regr[..., 0] ct_ys = ct_ys + ct_regr[..., 1] bboxes = tf.stack([tl_ys, tl_xs, br_ys, br_xs], axis=-1) tl_tag = wmlt.batch_gather(datas['tag_tl'], tl_inds) br_tag = wmlt.batch_gather(datas['tag_br'], br_inds) dists = tf.abs(tl_tag - br_tag) dis_inds = (dists > self.dis_threshold) tl_scores = tf.tile(tf.reshape(tl_scores, K, 1), [1, 1, K]) br_scores = tf.tile(tf.reshape(br_scores, K, 1), [1, 1, K]) scores = (tl_scores + br_scores) / 2 tl_clses = tf.tile(tf.reshape(tl_clses, K, 1), [1, 1, K]) br_clses = tf.tile(tf.reshape(br_clses, K, 1), [1, 1, K]) cls_inds = tf.not_equal(tl_clses, br_clses) width_inds = (br_xs < tl_xs) height_inds = (br_ys < tl_ys) ct = tf.stack([ct_xs, ct_ys], axis=-1) center_inds = tfop.center_filter(bboxes, ct, sizes=[], nr=[3, 5]) all_inds = tf.logical_or(cls_inds, dis_inds) all_inds = tf.logical_or(all_inds, width_inds) all_inds = tf.logical_or(all_inds, height_inds) scores = tf.where(all_inds, tf.zeros_like(scores), scores) scores, inds = tf.nn.top_k(tf.reshape(scores, [B, -1]), num_dets) bboxes = tf.reshape(bboxes, [B, -1, 4]) bboxes = wmlt.batch_gather(bboxes, inds) clses = tf.reshape(tl_clses, [B, -1, 1]) clses = wmlt.batch_gather(clses, inds) tl_scores = tf.reshape(tl_scores, [B, -1, 1]) tl_scores = wmlt.batch_gather(tl_scores, inds) br_scores = tf.reshape(br_scores, [B, -1, 1]) br_scores = wmlt.batch_gather(br_scores, inds) return bboxes, scores, tl_scores, br_scores, clses,