def run_head(self, proposals, stage): """ Args: proposals: BoxProposals stage: 0, 1, 2 Returns: FastRCNNHead Nx4, updated boxes """ reg_weights = tf.constant(cfg.CASCADE.BBOX_REG_WEIGHTS[stage], dtype=tf.float32) pooled_feature = self.roi_func(proposals.boxes) # N,C,S,S pooled_feature = self.scale_gradient(pooled_feature) head_feature = self.fastrcnn_head_func('head', pooled_feature) label_logits, box_logits = fastrcnn_outputs( 'outputs', head_feature, self.num_classes, class_agnostic_regression=True) head = FastRCNNHead(proposals, box_logits, label_logits, reg_weights) refined_boxes = head.decoded_output_boxes_class_agnostic() refined_boxes = clip_boxes(refined_boxes, self.image_shape2d) return head, tf.stop_gradient(refined_boxes, name='output_boxes')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) image = self.preprocess(inputs['image']) # 1CHW # build resnet c4 featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) # HEAD_DIM = 1024, NUM_ANCHOR = 15 # rpn_label_logits: fHxfWxNA # rpn_box_logits: fHxfWxNAx4 anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) # anchor_boxes is Groundtruth boxes corresponding to each anchor anchors = anchors.narrow_to(featuremap) # ?? image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox # ProposalCreator (get the topk proposals) proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TEST_PRE_NMS_TOPK, # 2000 cfg.RPN.TEST_POST_NMS_TOPK) # 1000 x, y, w, h = tf.split(inputs['gt_boxes'], 4, axis=1) gt_boxes = tf.concat([x, y, x + w, y + h], axis=1) boxes_on_featuremap = gt_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE ) # ANCHOR_STRIDE = 16 roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1] ) # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3] # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') # ?? fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) # ?? # Returns: # cls_logits: Tensor("fastrcnn/class/output:0", shape=(n, 81), dtype=float32) # reg_logits: Tensor("fastrcnn/output_box:0", shape=(n, 81, 4), dtype=float32) # ------------------Fastrcnn_Head------------------------ fastrcnn_head = FastRCNNHead( proposal_boxes, fastrcnn_box_logits, fastrcnn_label_logits, # tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) # [10., 10., 5., 5.] decoded_boxes = fastrcnn_head.decoded_output_boxes( ) # pre_boxes_on_images decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_scores') # class scores, summed to one for each box. final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1] ) # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3] # Keep C5 feature to be shared with mask branch mask_logits = maskrcnn_upXconv_head('maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 # Assume only person here person_labels = tf.ones_like(inputs['male']) indices = tf.stack( [tf.range(tf.size(person_labels)), tf.to_int32(person_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 final_mask_logits = tf.sigmoid(final_mask_logits, name='output/masks') mask = False if mask: final_mask_logits_expand = tf.expand_dims(final_mask_logits, axis=1) final_mask_logits_tile = tf.tile(final_mask_logits_expand, multiples=[1, 1024, 1, 1]) fg_roi_resized = tf.where(final_mask_logits_tile >= 0.5, roi_resized, roi_resized * 1.0) feature_attrs = resnet_conv5_attr( fg_roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) else: feature_attrs = resnet_conv5_attr( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) feature_attrs_gap = GlobalAvgPooling('gap', feature_attrs, data_format='channels_first') attrs_logits = attrs_head('attrs', feature_attrs_gap) attrs_loss = all_attrs_losses(inputs, attrs_logits, attr_losses_v2) all_losses = [attrs_loss] # male loss wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(wd_cost, total_cost) return total_cost
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training all_anchors_fpn = get_all_anchors_fpn() multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] image = self.preprocess(inputs['image']) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456 ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: proposals = BoxProposals(proposal_boxes) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align( p23456[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(p23456[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, image_shape2d, cfg.DATA.NUM_CLASS) if is_training: all_losses = [] all_losses.extend( multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits)) all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) is_training = get_current_tower_context().is_training image = self.preprocess(inputs['image']) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) gt_boxes, gt_labels = inputs['gt_boxes'], inputs['gt_labels'] if is_training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference proposals = BoxProposals(proposal_boxes) boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if is_training: all_losses = [] # rpn loss all_losses.extend( rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits)) # fastrcnn loss all_losses.extend(fastrcnn_head.losses()) if cfg.MODE_MASK: # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(inputs['gt_masks'], 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) wd_cost = regularize_cost('.*/W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') all_losses.append(wd_cost) total_cost = tf.add_n(all_losses, 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks')
def build_graph(self, *inputs): inputs = dict(zip(self.input_names, inputs)) image = self.preprocess(inputs['image']) # 1CHW # build resnet c4 featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) # build rpn rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) # HEAD_DIM = 1024, NUM_ANCHOR = 15 # rpn_label_logits: fHxfWxNA # rpn_box_logits: fHxfWxNAx4 anchors = RPNAnchors(get_all_anchors(), inputs['anchor_labels'], inputs['anchor_boxes']) # anchor_boxes is Groundtruth boxes corresponding to each anchor anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox # ProposalCreator (get the topk proposals) proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TEST_PRE_NMS_TOPK, # 6000 cfg.RPN.TEST_POST_NMS_TOPK) # 1000 boxes_on_featuremap = proposal_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE ) # ANCHOR_STRIDE = 16 # ROI_align roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) # 14x14 for each roi feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1] ) # nxcx7x7 # RESNET_NUM_BLOCK = [3, 4, 6, 3] # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) # Returns: # cls_logits: Tensor("fastrcnn/class/output:0", shape=(n, 81), dtype=float32) # reg_logits: Tensor("fastrcnn/output_box:0", shape=(n, 81, 4), dtype=float32) # ------------------Fastrcnn_Head------------------------ proposals = BoxProposals(proposal_boxes) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, # tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) # [10., 10., 5., 5.] decoded_boxes = fastrcnn_head.decoded_output_boxes( ) # pre_boxes_on_images decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = tf.nn.softmax(fastrcnn_label_logits, name='fastrcnn_all_scores') # class scores, summed to one for each box. final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output')