def rpn(self, image, features, inputs): featuremap = features[0] rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors( get_all_anchors(stride=cfg.RPN.ANCHOR_STRIDE, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE), inputs['anchor_labels'], inputs['anchor_boxes']) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits( rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if self.training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if self.training else cfg.RPN.TEST_POST_NMS_TOPK) if self.training: losses = rpn_losses(anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def batch_flatten(x): """ Flatten the tensor except the first dimension. """ shape = x.get_shape().as_list()[1:] if None not in shape: return tf.reshape(x, [-1, int(np.prod(shape))]) return tf.reshape(x, tf.stack([tf.shape(x)[0], -1]))
def rpn(self, image, features, inputs): assert len(cfg.RPN.ANCHOR_SIZES) == len(cfg.FPN.ANCHOR_STRIDES) image_shape2d = tf.shape(image)[2:] # h,w all_anchors_fpn = get_all_anchors_fpn(strides=cfg.FPN.ANCHOR_STRIDES, sizes=cfg.RPN.ANCHOR_SIZES, ratios=cfg.RPN.ANCHOR_RATIOS, max_size=cfg.PREPROC.MAX_SIZE) multilevel_anchors = [ RPNAnchors(all_anchors_fpn[i], inputs['anchor_labels_lvl{}'.format(i + 2)], inputs['anchor_boxes_lvl{}'.format(i + 2)]) for i in range(len(all_anchors_fpn)) ] self.slice_feature_and_anchors(features, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [ rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in features ] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] multilevel_pred_boxes = [ anchor.decode_logits(logits) for anchor, logits in zip( multilevel_anchors, multilevel_box_logits) ] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_pred_boxes, multilevel_label_logits, image_shape2d) if self.training: losses = multilevel_rpn_losses(multilevel_anchors, multilevel_label_logits, multilevel_box_logits) else: losses = [] return BoxProposals(proposal_boxes), losses
def mod_conv2d(x, y, fmaps, kernel, demodulate=True, gain=1, use_wscale=True, lrmul=1, fused_modconv=True, eps=1e-8, padding='SAME', name="mod_conv2d"): shape = x.get_shape().as_list() # [n, h, w, c] cin = shape[-1] with tf.variable_scope(name, reuse=tf.AUTO_REUSE): # Get weight w = get_weight([kernel, kernel, cin, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, name='W') ww = w[tf.newaxis] # introduce minibatch dimension # Modulate s = get_bias( cin, base_std=0, use_wscale=use_wscale, lrmul=lrmul, name='bs') + 1 vh = VariableHolder(W=w, bs=s) s = tf.tile(s[tf.newaxis], [tf.shape(x)[0], 1]) # introduce minibatch dimension if y is not None: y_style, w_style = dense(y, cin, gain=gain, use_wscale=use_wscale, lrmul=lrmul) s = s + y_style vh.Ws = w_style ww = ww * tf.cast(s[:, tf.newaxis, tf.newaxis, :, tf.newaxis], w.dtype) # scale input feature maps # Demodulate if demodulate: d = tf.rsqrt( tf.reduce_sum(tf.square(ww), axis=[1, 2, 3], keepdims=True) + eps) # scaling factor ww = ww * d # Reshape/scale input if fused_modconv: x = tf.reshape(tf.transpose(x, [0, 3, 1, 2]), [1, -1, shape[1], shape[2]]) # [1, n*cin, h, w] w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [kernel, kernel, cin, -1]) # [k, k, cin, n*cout] x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1, 1, 1, 1], padding=padding) out_shape = x.get_shape().as_list() x = tf.transpose( tf.reshape(x, [-1, fmaps, out_shape[2], out_shape[3]]), [0, 2, 3, 1]) else: x = x * tf.cast(s[:, tf.newaxis, tf.newaxis, :], x.dtype) x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NHWC', strides=[1, 1, 1, 1], padding=padding) if demodulate: x = x * tf.cast(tf.reshape(d, [-1, 1, 1, fmaps]), x.dtype) ret = tf.identity(x) ret.variables = vh return ret
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w assert len(features) == 5, "Features have to be P23456!" gt_boxes, gt_labels, *_ = targets if self.training: proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) if not cfg.FPN.CASCADE: roi_feature_fastrcnn = multilevel_roi_align( features[:4], proposals.boxes, 7) head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) else: def roi_func(boxes): return multilevel_roi_align(features[:4], boxes, 7) fastrcnn_head = CascadeRCNNHead(proposals, roi_func, fastrcnn_head_func, (gt_boxes, gt_labels), image_shape2d, cfg.DATA.NUM_CATEGORY) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss roi_feature_maskrcnn = multilevel_roi_align( features[:4], proposals.fg_boxes(), 14, name_scope='multilevel_roi_align_mask') maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align( features[:4], final_boxes, 14) maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC) mask_logits = maskrcnn_head_func( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='output/masks') return []
def roi_heads(self, image, features, proposals, targets): image_shape2d = tf.shape(image)[2:] # h,w featuremap = features[0] gt_boxes, gt_labels, *_ = targets if self.training: # sample proposal boxes in training proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes, gt_labels) # The boxes to be used to crop RoIs. # Use all proposal boxes in inference boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs( 'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY) fastrcnn_head = FastRCNNHead( proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes, tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32)) if self.training: all_losses = fastrcnn_head.losses() if cfg.MODE_MASK: gt_masks = targets[2] # maskrcnn loss # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds()) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), proposals.fg_boxes(), proposals.fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') all_losses.append( maskrcnn_loss(mask_logits, proposals.fg_labels(), target_masks_for_fg)) return all_losses else: decoded_boxes = fastrcnn_head.decoded_output_boxes() decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') label_scores = fastrcnn_head.output_scores( name='fastrcnn_all_scores') final_boxes, final_scores, final_labels = fastrcnn_predictions( decoded_boxes, label_scores, name_scope='output') if cfg.MODE_MASK: roi_resized = roi_align( featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5( roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([ tf.range(tf.size(final_labels)), tf.cast(final_labels, tf.int32) - 1 ], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='output/masks') return []