def fastrcnn_inference(self, image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx #class x 4): Returns: boxes (mx4): labels (m): each >= 1 """ rcnn_box_logits = rcnn_box_logits[:, 1:, :] rcnn_box_logits.set_shape([None, cfg.DATA.NUM_CATEGORY, None]) label_probs = tf.nn.softmax(rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, cfg.DATA.NUM_CATEGORY, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') return final_boxes, final_labels
def generate_rpn_proposals(boxes, scores, img_shape, pre_nms_topk, post_nms_topk=None): #输出的是rpn预测的boxes和score """ Sample RPN proposals by the following steps: 1. Pick top k1 by scores 2. NMS them 3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output. Args: boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already scores: n float, the logits img_shape: [h, w] pre_nms_topk, post_nms_topk (int): See above. Returns: boxes: kx4 float scores: k logits """ assert boxes.shape.ndims == 2, boxes.shape if post_nms_topk is None: post_nms_topk = pre_nms_topk topk = tf.minimum(pre_nms_topk, tf.size(scores)) topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False) topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = clip_boxes(topk_boxes, img_shape) #找top k个score和对应的boxes topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) # nx1x2 each wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n, topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid) #找出valid boxes和它们的score # TODO not needed topk_valid_boxes_y1x1y2x2 = tf.reshape(tf.reverse( topk_valid_boxes_x1y1x2y2, axis=[2]), (-1, 4), name='nms_input_boxes') nms_indices = tf.image.non_max_suppression( topk_valid_boxes_y1x1y2x2, topk_valid_scores, max_output_size=post_nms_topk, iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH) #nms排除多余的框 topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4)) proposal_boxes = tf.gather(topk_valid_boxes, nms_indices) proposal_scores = tf.gather(topk_valid_scores, nms_indices) #要的boxes和score的结果,这里没再筛,“Default k2 == k1, i.e. does not filter the NMS output.” tf.sigmoid(proposal_scores, name='probs') # for visualization return tf.stop_gradient(proposal_boxes, name='boxes'), tf.stop_gradient(proposal_scores, name='scores')
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets if self.training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) # The boxes to be used to crop RoIs. # Use all proposal boxes in inference boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks') return []
def generate_rpn_proposals(boxes, scores, img_shape, pre_nms_topk, post_nms_topk=None): """ Sample RPN proposals by the following steps: 1. Pick top k1 by scores 2. NMS them 3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output. Args: boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already scores: n float, the logits img_shape: [h, w] pre_nms_topk, post_nms_topk (int): See above. Returns: boxes: kx4 float scores: k logits """ assert boxes.shape.ndims == 2, boxes.shape if post_nms_topk is None: post_nms_topk = pre_nms_topk topk = tf.minimum(pre_nms_topk, tf.size(scores)) topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False) topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = clip_boxes(topk_boxes, img_shape) topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) # nx1x2 each wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n, topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid) # TODO not needed topk_valid_boxes_y1x1y2x2 = tf.reshape( tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]), (-1, 4), name='nms_input_boxes') nms_indices = tf.image.non_max_suppression( topk_valid_boxes_y1x1y2x2, # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578 tf.exp(topk_valid_scores), max_output_size=post_nms_topk, iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH) topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4)) final_boxes = tf.gather(topk_valid_boxes, nms_indices) final_scores = tf.gather(topk_valid_scores, nms_indices) tf.sigmoid(final_scores, name='probs') # for visualization return tf.stop_gradient(final_boxes, name='boxes'), tf.stop_gradient(final_scores, name='scores')
def fastrcnn_inference_cascade(self, image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits, stage_num): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx #class x 4): Returns: boxes (mx4): labels (m): each >= 1 """ if stage_num == 1: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE1 elif stage_num == 2: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE2 elif stage_num == 3: bbox_reg_weights = cfg.CASCADERCNN.BBOX_REG_WEIGHTS_STAGE3 prefix = '' if stage_num == 1: prefix = '_1st' elif stage_num == 2: prefix = '_2nd' elif stage_num == 3: prefix ='_3rd' rcnn_box_logits = rcnn_box_logits[:, 1:, :] rcnn_box_logits.set_shape([None, cfg.DATA.NUM_CATEGORY, None]) label_probs = tf.nn.softmax(rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, cfg.DATA.NUM_CATEGORY, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(bbox_reg_weights, dtype=tf.float32), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) #TODO add box voting after NMS if cfg.TEST.BOX_VOTING.ENABLED: final_boxes, final_probs, pred_indices = fastrcnn_predictions_box_voting(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs'+prefix) final_boxes = tf.identity(final_boxes, 'final_boxes'+prefix) final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels'+prefix) else: pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs'+prefix) final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes'+prefix) final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels'+prefix) return final_boxes, final_labels
def run_head(self, proposals, stage): """ Args: proposals: BoxProposals stage: 0, 1, 2 Returns: FastRCNNHead Nx4, updated boxes """ reg_weights = tf.constant(cfg.CASCADE.BBOX_REG_WEIGHTS[stage], dtype=tf.float32) pooled_feature = self.roi_func(proposals.boxes) # N,C,S,S pooled_feature = self.scale_gradient(pooled_feature) head_feature = self.fastrcnn_head_func('head', pooled_feature) label_logits, box_logits = fastrcnn_outputs( 'outputs', head_feature, self.num_classes, class_agnostic_regression=True) head = FastRCNNHead(proposals, box_logits, label_logits, reg_weights) refined_boxes = head.decoded_output_boxes_class_agnostic() refined_boxes = clip_boxes(refined_boxes, self.image_shape2d) return head, tf.stop_gradient(refined_boxes, name='output_boxes')
def run_head(self, proposals, stage): """ Args: proposals: BoxProposals stage: 0, 1, 2 Returns: FastRCNNHead Nx4, updated boxes """ reg_weights = tf.constant(cfg.CASCADE.BBOX_REG_WEIGHTS[stage], dtype=tf.float32) pooled_feature = self.roi_func(proposals.boxes) # N,C,S,S pooled_feature = self.scale_gradient(pooled_feature) head_feature = self.fastrcnn_head_func('head', pooled_feature) label_logits, box_logits = fastrcnn_outputs( 'outputs', head_feature, self.num_classes, class_agnostic_regression=True) head = FastRCNNHead(proposals, box_logits, label_logits, self.gt_boxes, reg_weights) refined_boxes = head.decoded_output_boxes_class_agnostic() refined_boxes = clip_boxes(refined_boxes, self.image_shape2d) return head, tf.stop_gradient(refined_boxes, name='output_boxes')
def fastrcnn_inference(self, image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx #class x 4): Returns: boxes (mx4): labels (m): each >= 1 """ rcnn_box_logits = rcnn_box_logits[:, 1:, :] # throw away the bg logit # we can see the bg is not included as a class here # print(rcnn_box_logits.shape) rcnn_box_logits.set_shape([None, cfg.DATA.NUM_CATEGORY, None]) # print(rcnn_label_logits.shape) # tf.nn.softmax has a default -1 (last) axis label_probs = tf.nn.softmax( rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile( tf.expand_dims(rcnn_boxes, 1), [1, cfg.DATA.NUM_CATEGORY, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions( decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') return final_boxes, final_labels, final_probs
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') fastrcnn_predictions(decoded_boxes, label_scores, name_scope='output')
def run_head(self, proposals, stage): """ Args: proposals: BoxProposals stage: 0, 1, 2 Returns: FastRCNNHead Nx4, updated boxes """ reg_weights = tf.constant(cfg.CASCADE.BBOX_REG_WEIGHTS[stage], dtype=tf.float32) # 创建cascade的权重,是持久化常量浮点数 pooled_feature = self.roi_func(proposals.boxes) # N,C,S,S # FIXME if roi_func_extra != None: pooled_feature = tf.concate( [pooled_feature, self.roi_func_extra(proposals.boxes)], 0) pooled_feature = self.scale_gradient(pooled_feature) # 这里不太理解为什么重新赋值 head_feature = self.fastrcnn_head_func('head', pooled_feature) # 82-87不太理解..... # changed by Paul label_logits, box_logits = fastrcnn_outputs( 'outputs_new', head_feature, self.num_classes, class_agnostic_regression=True) head = FastRCNNHead(proposals, box_logits, label_logits, self.gt_boxes, reg_weights) refined_boxes = head.decoded_output_boxes_class_agnostic() refined_boxes = clip_boxes(refined_boxes, self.image_shape2d) # tf.stop_gradient:停止梯度计算;参数 - 张量 + 操作名称 return head, tf.stop_gradient(refined_boxes, name='output_boxes')
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w assert len(features) == 5, "Features have to be P23456!" gt_boxes, gt_labels, *_ = targets if self.training: proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align(features[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(features[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead( proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CLASS) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( features[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align(features[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks') return []
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] image = self.preprocess(inputs['image']) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456 ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: proposals = BoxProposals(proposal_boxes) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align( p23456[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(p23456[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, image_shape2d, cfg.DATA.NUM_CLASS) if is_training: all_losses = [] all_losses.extend( multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits)) all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks')
def build_graph(self, *inputs): # TODO need to make tensorpack handles dict better inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference proposals = BoxProposals(proposal_boxes) boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if is_training: all_losses = [] # rpn loss all_losses.extend( rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)) # fastrcnn loss all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks')
def roi_heads(self, image, ref_features, ref_box, features, proposals, targets, hard_negative_features=None, hard_positive_features=None, hard_positive_ious=None, hard_positive_gt_boxes=None, hard_positive_jitter_boxes=None, precomputed_ref_features=None, extra_feats=None): image_shape2d = tf.shape(image)[2:] # h,w assert len(features) == 5, "Features have to be P23456!" gt_boxes, gt_labels, *_ = targets if self.training: proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if precomputed_ref_features is None: roi_aligned_ref_features = multilevel_roi_align( ref_features[:4], ref_box[tf.newaxis], 7) else: roi_aligned_ref_features = precomputed_ref_features[tf.newaxis] # ////////// roi_aligned_extra_features = extra_feats[tf.newaxis] # ////////// if cfg.MODE_SHARED_CONV_REDUCE: scope = tf.get_variable_scope() else: scope = "" assert cfg.FPN.CASCADE def roi_func(boxes, already_aligned_features=None): if already_aligned_features is None: aligned_features = multilevel_roi_align(features[:4], boxes, 7) else: # for hard example mining aligned_features = already_aligned_features tiled = tf.tile(roi_aligned_ref_features, [tf.shape(aligned_features)[0], 1, 1, 1]) concat_features = tf.concat((tiled, aligned_features), axis=1) with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): reduced_features = Conv2D('conv_reduce', concat_features, 256, 1, activation=None) return reduced_features def roi_func_extra(boxes, already_aligned_features=None): if already_aligned_features is None: aligned_features = multilevel_roi_align(features[:4], boxes, 7) else: # for hard example mining aligned_features = already_aligned_features tiled = tf.tile(roi_aligned_extra_features, [tf.shape(aligned_features)[0], 1, 1, 1]) concat_features = tf.concat((tiled, aligned_features), axis=1) with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): reduced_features = Conv2D('conv_reduce', concat_features, 256, 1, activation=None) return reduced_features if cfg.MODE_HARD_MINING and self.training: fastrcnn_head = CascadeRCNNHeadWithHardExamples( proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CLASS, hard_negative_features, hard_positive_features, cfg.HARD_NEGATIVE_LOSS_SCALING_FACTOR, cfg.HARD_POSITIVE_LOSS_SCALING_FACTOR, hard_positive_ious, hard_positive_gt_boxes, hard_positive_jitter_boxes) else: if cfg.MODE_EXTRA_FEATURES: fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CLASS, roi_func_extra) else: fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CLASS) if cfg.EXTRACT_GT_FEATURES: # get boxes and features for each of the three cascade stages! b0 = proposals.boxes b1, b2, _ = fastrcnn_head._cascade_boxes f0 = multilevel_roi_align(features[:4], b0, 7) f1 = multilevel_roi_align(features[:4], b1, 7) f2 = multilevel_roi_align(features[:4], b2, 7) tf.concat([b0, b1, b2], axis=0, name="boxes_for_extraction") tf.concat([f0, f1, f2], axis=0, name="features_for_extraction") if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( features[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) if cfg.MEASURE_IOU_DURING_TRAINING: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output_train') # if predictions are empty, this might break... # to prevent, stack dummy box boxes_for_iou = tf.concat([ final_boxes[:1], tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32) ], axis=0) from examples.FasterRCNN.utils.box_ops import pairwise_iou iou_at_1 = tf.identity(pairwise_iou(gt_boxes[:1], boxes_for_iou)[0, 0], name="train_iou_at_1") add_moving_summary(iou_at_1) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( features[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks') return []
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [RPNAnchors( all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn))] image = self.preprocess(inputs['image']) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: proposals = BoxProposals(proposal_boxes) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(p23456[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead( proposals, roi_func, fastrcnn_head_func, image_shape2d, cfg.DATA.NUM_CLASS) if is_training: all_losses = [] all_losses.extend(multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits)) all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost( '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference proposals = BoxProposals(proposal_boxes) boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead(proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if is_training: all_losses = [] # rpn loss all_losses.extend(rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)) # fastrcnn loss all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append(maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost( '.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores(name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks')