def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): """ Sample some boxes from all proposals for training. #fg is guaranteed to be > 0, because ground truth boxes will be added as proposals. Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 Returns: A BoxProposals instance. sampled_boxes: tx4 floatbox, the rois sampled_labels: t int64 labels, in [0, #class). Positive means foreground. fg_inds_wrt_gt: #fg indices, each in range [0, m-1]. It contains the matching GT of each foreground roi. """ iou = pairwise_iou(boxes, gt_boxes) # nxm proposal_metrics(iou) # add ground truth as proposals as well boxes = tf.concat([boxes, gt_boxes], axis=0) # (n+m) x 4 iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0) # (n+m) x m # #proposal=n+m from now on def sample_fg_bg(iou): fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int( cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg') fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum( cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg') bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds fg_inds, bg_inds = sample_fg_bg(iou) # fg,bg indices w.r.t proposals best_iou_ind = tf.argmax(iou, axis=1) # #proposal, each in 0~m-1 fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes ret_boxes = tf.gather(boxes, all_indices) ret_labels = tf.concat( [tf.gather(gt_labels, fg_inds_wrt_gt), tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0) # stop the gradient -- they are meant to be training targets return BoxProposals( tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), tf.stop_gradient(ret_labels, name='sampled_labels'), tf.stop_gradient(fg_inds_wrt_gt))
def match_box_with_gt(self, boxes, iou_threshold): """ Args: boxes: Nx4 Returns: BoxProposals """ if self.is_training: with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)): iou = pairwise_iou(boxes, self.gt_boxes) # NxM 计算每个box和每个gt的iou max_iou_per_box = tf.reduce_max(iou, axis=1) # N 每个box取iou最大值 best_iou_ind = tf.argmax(iou, axis=1) # N 每个boxiou最大值的位置 labels_per_box = tf.gather( self.gt_labels, best_iou_ind) # 从gt_labels中取出iou最大值位置对应的label fg_mask = max_iou_per_box >= iou_threshold # 大于阈值就mask fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) # 把mask处理掉 labels_per_box = tf.stop_gradient( labels_per_box * tf.cast(fg_mask, tf.int64)) # mask掉的停止梯度下降 return BoxProposals(boxes, labels_per_box, fg_inds_wrt_gt) # 不太清楚 BoxProposals 是指? else: return BoxProposals(boxes)
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): """ Sample some ROIs from all proposals for training. #fg is guaranteed to be > 0, because grount truth boxes are added as RoIs. Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 Returns: sampled_boxes: tx4 floatbox, the rois sampled_labels: t labels, in [0, #class-1]. Positive means foreground. fg_inds_wrt_gt: #fg indices, each in range [0, m-1]. It contains the matching GT of each foreground roi. """ iou = pairwise_iou(boxes, gt_boxes) # nxm proposal_metrics(iou) # add ground truth as proposals as well boxes = tf.concat([boxes, gt_boxes], axis=0) # (n+m) x 4 iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0) # (n+m) x m # #proposal=n+m from now on def sample_fg_bg(iou): fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int( cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg') fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum( cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg') bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds fg_inds, bg_inds = sample_fg_bg(iou) # fg,bg indices w.r.t proposals best_iou_ind = tf.argmax(iou, axis=1) # #proposal, each in 0~m-1 fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes ret_boxes = tf.gather(boxes, all_indices) ret_labels = tf.concat( [tf.gather(gt_labels, fg_inds_wrt_gt), tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0) # stop the gradient -- they are meant to be training targets return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), \ tf.stop_gradient(ret_labels, name='sampled_labels'), \ tf.stop_gradient(fg_inds_wrt_gt)
def match_box_with_gt(self, boxes, gt_boxes, gt_labels, iou_threshold): from utils.box_ops import pairwise_iou if self.is_training: with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)): iou = pairwise_iou(boxes, gt_boxes) # NxM max_iou_per_box = tf.reduce_max(iou, axis=1) # N best_iou_ind = tf.argmax(iou, axis=1) # N labels_per_box = tf.gather(gt_labels, best_iou_ind) fg_mask = max_iou_per_box >= iou_threshold fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) labels_per_box = tf.stop_gradient(labels_per_box * tf.to_int64(fg_mask)) return [ boxes, labels_per_box, fg_inds_wrt_gt, gt_boxes, gt_labels ] else: return [boxes, None, None, None, None]
def match_box_with_gt(self, boxes, iou_threshold): """ Args: boxes: Nx4 Returns: BoxProposals """ if self.is_training: with tf.name_scope("match_box_with_gt_{}".format(iou_threshold)): iou = pairwise_iou(boxes, self.gt_boxes) # NxM max_iou_per_box = tf.reduce_max(iou, axis=1) # N best_iou_ind = tf.argmax(iou, axis=1) # N labels_per_box = tf.gather(self.gt_labels, best_iou_ind) fg_mask = max_iou_per_box >= iou_threshold fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) labels_per_box = tf.stop_gradient(labels_per_box * tf.cast(fg_mask, tf.int64)) return BoxProposals(boxes, labels_per_box, fg_inds_wrt_gt) else: return BoxProposals(boxes)
def match_box_with_gt(self, boxes, iou_threshold): """ Args: boxes: Nx4 Returns: BoxProposals """ if self.is_training: with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)): iou = pairwise_iou(boxes, self.gt_boxes) # NxM max_iou_per_box = tf.reduce_max(iou, axis=1) # N best_iou_ind = tf.argmax(iou, axis=1) # N labels_per_box = tf.gather(self.gt_labels, best_iou_ind) fg_mask = max_iou_per_box >= iou_threshold fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) labels_per_box = tf.stop_gradient(labels_per_box * tf.to_int64(fg_mask)) return BoxProposals( boxes, labels_per_box, fg_inds_wrt_gt, self.gt_boxes, self.gt_labels) else: return BoxProposals(boxes)
def match_box_with_gt(self, boxes, iou_threshold): """ Args: boxes: Nx4 Returns: BoxProposals """ if self.training: with tf.name_scope('match_box_with_gt_{}'.format(iou_threshold)): iou = pairwise_iou(boxes, self.gt_boxes) # NxM max_iou_per_box = tf.reduce_max(iou, axis=1) # N best_iou_ind = tf.cond(tf.shape(iou)[1] > 0, lambda: tf.argmax(iou, axis=1), # #proposal, each in 0~m-1 lambda: tf.zeros([tf.shape(iou)[0]], dtype=tf.int64)) labels_per_box = tf.gather(self.gt_labels, best_iou_ind) fg_mask = max_iou_per_box >= iou_threshold fg_inds_wrt_gt = tf.boolean_mask(best_iou_ind, fg_mask) labels_per_box = tf.stop_gradient(labels_per_box * tf.cast(fg_mask, tf.int64)) return BoxProposals(boxes, labels_per_box, fg_inds_wrt_gt) else: return BoxProposals(boxes)
def losses(self): encoded_fg_gt_boxes = encode_bbox_target( self.proposals.matched_gt_boxes(), self.proposals.fg_boxes()) * self.bbox_regression_weights decoded_boxes = self.decoded_output_boxes() decoded_boxes = tf.reshape(decoded_boxes, [-1, 4]) gt_boxes = tf.reshape(self.proposals.gt_boxes, [-1, 4]) iou = pairwise_iou(decoded_boxes, gt_boxes) max_iou = tf.reduce_max(iou, axis=1) # if only bg gt_boxes, all ious are 0. pos_mask = tf.stop_gradient(tf.not_equal(self.proposals.labels, 0)) nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32)) max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou), max_iou) max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1])) return fastrcnn_losses_iou(self.proposals.labels, self.label_logits, max_iou, tf.reshape(self.iou_logits, [-1]), encoded_fg_gt_boxes, self.fg_box_logits())
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels): """ Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 Returns: sampled_boxes: tx4 floatbox, the rois target_boxes: tx4 encoded box, the regression target labels: t labels """ @under_name_scope() def assign_class_to_roi(iou, gt_boxes, gt_labels): """ Args: iou: nxm (nr_proposal x nr_gt) Returns: fg_mask: n boolean, whether each roibox is fg roi_labels: n int32, best label for each roi box best_gt_boxes: nx4 """ # find best gt box for each roi box best_iou_ind = tf.argmax(iou, axis=1) # n, each in 1~m best_iou = tf.reduce_max(iou, axis=1) # n, best_gt_boxes = tf.gather(gt_boxes, best_iou_ind) # nx4 best_gt_labels = tf.gather(gt_labels, best_iou_ind) # n, each in 1~C fg_mask = best_iou >= config.FASTRCNN_FG_THRESH return fg_mask, best_gt_labels, best_gt_boxes iou = pairwise_iou(boxes, gt_boxes) # nxm with tf.name_scope('proposal_metrics'): # find best roi for each gt, for summary only best_iou = tf.reduce_max(iou, axis=0) mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt') summaries = [mean_best_iou] with tf.device('/cpu:0'): for th in [0.3, 0.5]: recall = tf.truediv( tf.count_nonzero(best_iou >= th), tf.size(best_iou, out_type=tf.int64), name='recall_iou{}'.format(th)) summaries.append(recall) add_moving_summary(*summaries) # n, n, nx4 fg_mask, roi_labels, best_gt_boxes = assign_class_to_roi(iou, gt_boxes, gt_labels) # don't have to add gt for training, but add it anyway fg_inds = tf.reshape(tf.where(fg_mask), [-1]) fg_inds = tf.concat([fg_inds, tf.cast( tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0], tf.int64)], 0) num_fg = tf.size(fg_inds) num_fg = tf.minimum(int( config.FASTRCNN_BATCH_PER_IM * config.FASTRCNN_FG_RATIO), num_fg, name='num_fg') fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg]) bg_inds = tf.where(tf.logical_not(fg_mask))[:, 0] num_bg = tf.size(bg_inds) num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg, num_bg, name='num_bg') bg_inds = tf.slice(tf.random_shuffle(bg_inds), [0], [num_bg]) add_moving_summary(num_fg, num_bg) all_boxes = tf.concat([boxes, gt_boxes], axis=0) all_matched_gt_boxes = tf.concat([best_gt_boxes, gt_boxes], axis=0) all_labels = tf.concat([roi_labels, gt_labels], axis=0) ind_in_all = tf.concat([fg_inds, bg_inds], axis=0) # ind in all n+m boxes ret_boxes = tf.gather(all_boxes, ind_in_all, name='sampled_boxes') ret_matched_gt_boxes = tf.gather(all_matched_gt_boxes, ind_in_all) ret_encoded_boxes = encode_bbox_target(ret_matched_gt_boxes, ret_boxes) ret_encoded_boxes = ret_encoded_boxes * tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS) # bg boxes will not be trained on ret_labels = tf.concat( [tf.gather(all_labels, fg_inds), tf.zeros_like(bg_inds, dtype=tf.int64)], axis=0, name='sampled_labels') return ret_boxes, tf.stop_gradient(ret_encoded_boxes), tf.stop_gradient(ret_labels)
def sample_sniper_targets(boxes, gt_boxes, gt_labels): """ Sample some ROIs from all proposals for training. Filter boxes first and make sure left are all in scale range #fg is guaranteed to be > 0, because ground truth boxes are added as RoIs. Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 Returns: sampled_boxes: tx4 floatbox, the rois sampled_labels: t labels, in [0, #class-1]. Positive means foreground. fg_inds_wrt_gt: #fg indices, each in range [0, m-1]. It contains the matching GT of each foreground roi. """ iou = pairwise_iou(boxes, gt_boxes) # nxm proposal_metrics(iou) # add ground truth as proposals as well boxes = tf.concat([boxes, gt_boxes], axis=0) # (n+m) x 4 iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0) # (n+m) x m # #proposal=n+m from now on #filter proposal by box size # box_range = cfg.SNIPER.VALID_RANGES[scale_index] # box_range = tf.gather(cfg.SNIPER.VALID_RANGES, scale_index) # print(box_range) # minbox = box_range[0] # maxbox = box_range[1] # # print(minbox, maxbox) # minbox = 0 if minbox == -1 else minbox # maxbox = sys.maxsize if maxbox == -1 else maxbox # boxes_filtered = np.argwhere( # np.logical_and.reduce( # np.logical_or((boxes[:, 2] - boxes[:, 0]) > minbox, # (boxes[:, 3] - boxes[:, 1]) > minbox), # (boxes[:, 2] - boxes[:, 0]) < maxbox, # (boxes[:, 3] - boxes[:, 1]) < maxbox)).flatten().tolist() # boxes = boxes[boxes_filtered] # iou = boxes[boxes_filtered] def sample_fg_bg(iou): fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int(cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg') fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum(cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg') bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds fg_inds, bg_inds = sample_fg_bg(iou) # fg,bg indices w.r.t proposals best_iou_ind = tf.argmax(iou, axis=1) # #proposal, each in 0~m-1 fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes ret_boxes = tf.gather(boxes, all_indices) ret_labels = tf.concat([ tf.gather(gt_labels, fg_inds_wrt_gt), tf.zeros_like(bg_inds, dtype=tf.int64) ], axis=0) # stop the gradient -- they are meant to be training targets return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'), \ tf.stop_gradient(ret_labels, name='sampled_labels'), \ tf.stop_gradient(fg_inds_wrt_gt)
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_ids, orig_shape = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w # decode into actual image coordinates pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # size? #proposals*h*w*c? roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples # outputs from fg proposals fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) # rcnn_labels: the labels of the proposals # fg_sampled_boxes: fg proposals # matched_gt_boxes: just like RPN, the gt boxes # that match the corresponding fg proposals fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) # acquire pred for re-id training # turning NMS off gives re-id branch more training samples if cfg.RE_ID.NMS: boxes, final_labels, final_probs = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) else: boxes, final_labels, final_probs = self.fastrcnn_inference_id( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) # scale = tf.sqrt(tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) * # tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32)) # final_boxes = boxes / scale # # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. # final_boxes = tf_clip_boxes(final_boxes, orig_shape) # IOU, discard bad dets, assign re-id labels # the results are already NMS so no need to NMS again # crop from conv4 with dets (maybe plus gts) # feedforward re-id branch # resizing during ROIalign? iou = pairwise_iou(boxes, gt_boxes) # are the gt boxes resized? tp_mask = tf.reduce_max(iou, axis=1) >= cfg.RE_ID.IOU_THRESH iou = tf.boolean_mask(iou, tp_mask) # return iou to debug def re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap): with tf.variable_scope('id_head'): num_of_samples_used = tf.get_variable( 'num_of_samples_used', initializer=0, trainable=False) num_of_samples_used = num_of_samples_used.assign_add( tf.shape(pred_boxes)[0]) boxes_on_featuremap = pred_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # name scope? # stop gradient roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_idhead = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling( 'gap', feature_idhead, data_format='channels_first') init = tf.variance_scaling_initializer() hidden = FullyConnected('fc6', feature_gap, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, 1024, kernel_initializer=init, activation=tf.nn.relu) hidden = FullyConnected('fc8', hidden, 256, kernel_initializer=init, activation=tf.nn.relu) id_logits = FullyConnected( 'class', hidden, cfg.DATA.NUM_ID, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=pred_matching_gt_ids, logits=id_logits) label_loss = tf.reduce_mean(label_loss, name='label_loss') return label_loss, num_of_samples_used def check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap): pred_gt_ind = tf.argmax(iou, axis=1) # output following tensors # pick out the -2 class here pred_matching_gt_ids = tf.gather(gt_ids, pred_gt_ind) pred_boxes = tf.boolean_mask(boxes, tp_mask) # label 1 corresponds to unid pedes unid_ind = tf.not_equal(pred_matching_gt_ids, 1) pred_matching_gt_ids = tf.boolean_mask(pred_matching_gt_ids, unid_ind) pred_boxes = tf.boolean_mask(pred_boxes, unid_ind) ret = tf.cond( tf.equal(tf.size(pred_boxes), 0), lambda: (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)), lambda: re_id_loss(pred_boxes, pred_matching_gt_ids, featuremap)) return ret with tf.name_scope('id_head'): # no detection has IOU > 0.7, re-id returns 0 loss re_id_loss, num_of_samples_used = tf.cond( tf.equal(tf.size(iou), 0), lambda: (tf.constant(cfg.RE_ID.STABLE_LOSS), tf.constant(0)), lambda: check_unid_pedes(iou, gt_ids, boxes, tp_mask, featuremap)) add_tensor_summary(num_of_samples_used, ['scalar'], name='num_of_samples_used') # for debug, use tensor name to take out the handle # return re_id_loss # pred_gt_ind = tf.argmax(iou, axis=1) # # output following tensors # # pick out the -2 class here # pred_gt_ids = tf.gather(gt_ids, pred_gt_ind) # pred_boxes = tf.boolean_mask(boxes, tp_mask) # unid_ind = pred_gt_ids != 1 # return unid_ind # return tf.shape(boxes)[0] unnormed_id_loss = tf.identity(re_id_loss, name='unnormed_id_loss') re_id_loss = tf.divide(re_id_loss, cfg.RE_ID.LOSS_NORMALIZATION, 're_id_loss') add_moving_summary(unnormed_id_loss) add_moving_summary(re_id_loss) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') # weights on the losses? total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, re_id_loss, wd_cost ], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: if cfg.RE_ID.QUERY_EVAL: # resize the gt_boxes in dataflow final_boxes = gt_boxes else: final_boxes, final_labels, _ = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) with tf.variable_scope('id_head'): preds_on_featuremap = final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) # name scope? # stop gradient roi_resized = roi_align(featuremap, preds_on_featuremap, 14) feature_idhead = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_gap = GlobalAvgPooling('gap', feature_idhead, data_format='channels_first') hidden = FullyConnected('fc6', feature_gap, 1024, activation=tf.nn.relu) hidden = FullyConnected('fc7', hidden, 1024, activation=tf.nn.relu) fv = FullyConnected('fc8', hidden, 256, activation=tf.nn.relu) id_logits = FullyConnected( 'class', fv, cfg.DATA.NUM_ID, kernel_initializer=tf.random_normal_initializer( stddev=0.01)) scale = tf.sqrt( tf.cast(image_shape2d[0], tf.float32) / tf.cast(orig_shape[0], tf.float32) * tf.cast(image_shape2d[1], tf.float32) / tf.cast(orig_shape[1], tf.float32)) rescaled_final_boxes = final_boxes / scale # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. # rescaled_final_boxes_pre_clip = tf.identity(rescaled_final_boxes, name='re_boxes_pre_clip') rescaled_final_boxes = tf_clip_boxes(rescaled_final_boxes, orig_shape) rescaled_final_boxes = tf.identity(rescaled_final_boxes, 'rescaled_final_boxes') fv = tf.identity(fv, name='feature_vector') prob = tf.nn.softmax(id_logits, name='re_id_probs')
def sample_cascade_rcnn_targets(boxes, gt_boxes, gt_labels, stage_num): """ Sample some ROIs from all proposals for training. #fg is guaranteed to be > 0, because grount truth boxes are added as RoIs. Args: boxes: nx4 region proposals, floatbox gt_boxes: mx4, floatbox gt_labels: m, int32 stage_num: Returns: sampled_boxes: tx4 floatbox, the rois sampled_labels: t labels, in [0, #class-1]. Positive means foreground. fg_inds_wrt_gt: #fg indices, each in range [0, m-1]. It contains the matching GT of each foreground roi. """ prefix = '' if stage_num == 1: prefix = '_1st' fg_thresh = cfg.CASCADERCNN.FG_THRESH_1ST if stage_num == 2: prefix = '_2nd' fg_thresh = cfg.CASCADERCNN.FG_THRESH_2ND elif stage_num == 3: prefix = '_3rd' fg_thresh = cfg.CASCADERCNN.FG_THRESH_3RD iou = pairwise_iou(boxes, gt_boxes) # nxm proposal_metrics_cascade(iou, stage_num) # add ground truth as proposals as well boxes = tf.concat([boxes, gt_boxes], axis=0) # (n+m) x 4 iou = tf.concat([iou, tf.eye(tf.shape(gt_boxes)[0])], axis=0) # (n+m) x m # #proposal=n+m from now on def sample_fg_bg(iou, prefix): fg_mask = tf.reduce_max(iou, axis=1) >= fg_thresh fg_inds = tf.reshape(tf.where(fg_mask), [-1]) num_fg = tf.minimum(int(cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO), tf.size(fg_inds), name='num_fg' + prefix) fg_inds = tf.random_shuffle(fg_inds)[:num_fg] bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1]) num_bg = tf.minimum(cfg.FRCNN.BATCH_PER_IM - num_fg, tf.size(bg_inds), name='num_bg' + prefix) bg_inds = tf.random_shuffle(bg_inds)[:num_bg] add_moving_summary(num_fg, num_bg) return fg_inds, bg_inds fg_inds, bg_inds = sample_fg_bg(iou, prefix) # fg,bg indices w.r.t proposals best_iou_ind = tf.argmax(iou, axis=1) # #proposal, each in 0~m-1 fg_inds_wrt_gt = tf.gather(best_iou_ind, fg_inds) # num_fg all_indices = tf.concat([fg_inds, bg_inds], axis=0) # indices w.r.t all n+m proposal boxes ret_boxes = tf.gather(boxes, all_indices) ret_labels = tf.concat([ tf.gather(gt_labels, fg_inds_wrt_gt), tf.zeros_like(bg_inds, dtype=tf.int64) ], axis=0) # stop the gradient -- they are meant to be training targets return tf.stop_gradient(ret_boxes, name='sampled_proposal_boxes'+prefix), \ tf.stop_gradient(ret_labels, name='sampled_labels'+prefix), \ tf.stop_gradient(fg_inds_wrt_gt)
def rpn_losses_iou(anchor_labels, anchor_boxes, gt_boxes, rpn_boxes, label_logits, box_logits, iou_logits): """ Args: anchor_labels: fHxfWxNA anchor_boxes: fHxfWxNAx4, encoded gt_boxes: rpn_boxes: fHxfWxNA decoded label_logits: fHxfWxNA box_logits: fHxfWxNAx4 iou_logits: fHxfWxNA Returns: label_loss, box_loss, iou_loss """ with tf.device('/cpu:0'): valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1)) pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1)) nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor') nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor') # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0. valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask) valid_label_logits = tf.boolean_mask(label_logits, valid_mask) with tf.name_scope('label_metrics'): valid_label_prob = tf.nn.sigmoid(valid_label_logits) summaries = [] with tf.device('/cpu:0'): for th in [0.5, 0.2, 0.1]: valid_prediction = tf.cast(valid_label_prob > th, tf.int32) nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction') pos_prediction_corr = tf.count_nonzero(tf.logical_and( valid_label_prob > th, tf.equal(valid_prediction, valid_anchor_labels)), dtype=tf.int32) placeholder = 0.5 # A small value will make summaries appear lower. recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos)) recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th)) precision = tf.to_float( tf.truediv(pos_prediction_corr, nr_pos_prediction)) precision = tf.where(tf.equal(nr_pos_prediction, 0), placeholder, precision, name='precision_th{}'.format(th)) summaries.extend([precision, recall]) add_moving_summary(*summaries) # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # But the total RPN loss will be fine. TODO make the summary op smarter placeholder = 0. ce_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) # label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM) # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') # alpha = 0.75 # gamma = 2.0 # probs = tf.sigmoid(valid_label_logits) # alpha_t = tf.ones_like(valid_label_logits) * alpha # alpha_t = tf.where(valid_anchor_labels > 0, alpha_t, 1.0 - alpha_t) # probs_t = tf.where(valid_anchor_labels > 0, probs, 1.0 - probs) # weight_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma) # # label_loss = tf.reduce_sum(weight_matrix * label_loss) * (1. / cfg.RPN.BATCH_PER_IM) # # label_loss = weight_matrix * ce_loss # # #n_pos = tf.reduce_sum(valid_anchor_labels) # n_false = tf.reduce_sum(tf.cast(tf.greater(ce_loss, -tf.log(0.5)), tf.float32)) # def has_pos(): # return tf.reduce_sum(label_loss) / tf.cast(n_false, tf.float32) # def no_pos(): # return tf.reduce_sum(label_loss) # label_loss = tf.cond(n_false > 0, has_pos, no_pos) # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') # find the most wrongly classified examples: n_selected = cfg.FRCNN.BATCH_PER_IM n_selected = tf.cast(n_selected, tf.int32) n_selected = tf.minimum(n_selected, tf.size(valid_anchor_labels)) # label_loss = alpha_t * label_loss vals, _ = tf.nn.top_k(ce_loss, k=n_selected) try: th = vals[-1] except: th = 1 selected_mask = ce_loss >= th loss_weight = tf.cast(selected_mask, tf.float32) label_loss = tf.reduce_sum( ce_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight) label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss') pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask) pos_box_logits = tf.boolean_mask(box_logits, pos_mask) delta = 1.0 / 9 # box_loss = tf.losses.huber_loss( # pos_anchor_boxes, pos_box_logits, delta=delta, # reduction=tf.losses.Reduction.SUM) / delta box_loss = tf.losses.huber_loss(pos_anchor_boxes, pos_box_logits, reduction=tf.losses.Reduction.SUM) box_loss = box_loss * (50. / cfg.RPN.BATCH_PER_IM) box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss') # iou loss: smooth l1 loss rpn_boxes = tf.reshape(rpn_boxes, [-1, 4]) gt_boxes = tf.reshape(gt_boxes, [-1, 4]) iou = pairwise_iou(rpn_boxes, gt_boxes) # nxm max_iou = tf.reduce_max(iou, axis=1) # if only bg gt_boxes, all ious are 0. max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou), max_iou) max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1]), name='rpn_box_gt_iou') iou_logits = tf.nn.sigmoid(iou_logits) iou_logits = tf.reshape(iou_logits, [-1]) iou_loss = tf.losses.huber_loss(max_iou, iou_logits, reduction='none') n_selected = cfg.FRCNN.BATCH_PER_IM n_selected = tf.cast(n_selected, tf.int32) vals, _ = tf.nn.top_k(iou_loss, k=n_selected) th = vals[-1] selected_mask = iou_loss >= th loss_weight = tf.cast(selected_mask, tf.float32) iou_loss = tf.reduce_sum( iou_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight) iou_loss = tf.identity(iou_loss, name='iou_loss') add_moving_summary(label_loss, box_loss, iou_loss, nr_valid, nr_pos) return label_loss, box_loss, iou_loss
def get_mask_single_iou(curr_damage_anchors_batch, house_bboxes, iou_thr): iou_matrix = pairwise_iou(curr_damage_anchors_batch, house_bboxes) iou_max = tf.math.reduce_max(iou_matrix, axis=1) mask = tf.greater(iou_max, tf.constant(iou_thr, dtype=tf.float32)) return mask