def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois')
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv(blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv(blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Bounding-box regression conv num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.Conv(blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Classification PS RoI pooling model.net.PSRoIPool(['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE) model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES)) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool(['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE)
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale): """Use a ResNet "conv5" / "stage5" head for mask prediction. Weights and computation are shared with the conv5 box head. Computation can only be shared during training, since inference is cascaded. v0upshare design: conv5, convT 2x2. """ # Since box and mask head are shared, these must match assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if model.train: # share computation with bbox head at training time dim_conv5 = 2048 blob_conv5 = model.net.SampleAs(['res5_2_sum', 'roi_has_mask_int32'], ['_[mask]_res5_2_sum_sliced']) else: # re-compute at test time blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale) dim_reduced = cfg.MRCNN.DIM_REDUCED blob_mask = model.ConvTranspose( blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), # std only for gauss bias_init=const_fill(0.0)) model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC(blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill') blob_out = model.Conv(blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, { 'std': 0.001 }), bias_init=const_fill(0.0)) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation('mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def add_topdown_lateral_module( model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral ): """Add a top-down lateral module.""" # Lateral 1x1 conv if cfg.FPN.USE_GN: # use GroupNorm lat = model.ConvGN( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, group_gn=get_group_gn(dim_top), kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {})), bias_init=const_fill(0.0) ) else: lat = model.Conv( fpn_lateral, fpn_bottom + '_lateral', dim_in=dim_lateral, dim_out=dim_top, kernel=1, pad=0, stride=1, weight_init=( const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ), bias_init=const_fill(0.0) ) # Top-down 2x upsampling td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2) # Sum lateral and top-down model.net.Sum([lat, td], fpn_bottom)
def mask_rcnn_fcn_head_v1upXconvs_gn(model, blob_in, dim_in, spatial_scale, num_convs): """v1upXconvs design: X * (conv 3x3), convT 2x2, with GroupNorm""" current = model.RoIFeatureTransform( blob_in, blob_out='_mask_roi_feat', blob_rois='mask_rois', method=cfg.MRCNN.ROI_XFORM_METHOD, resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION, sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale) dilation = cfg.MRCNN.DILATION dim_inner = cfg.MRCNN.DIM_REDUCED for i in range(num_convs): current = model.ConvGN(current, '_mask_fcn' + str(i + 1), dim_in, dim_inner, group_gn=get_group_gn(dim_inner), kernel=3, pad=1 * dilation, stride=1, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=('ConstantFill', { 'value': 0. })) current = model.Relu(current, current) dim_in = dim_inner # upsample layer model.ConvTranspose(current, 'conv5_mask', dim_inner, dim_inner, kernel=2, pad=0, stride=2, weight_init=(cfg.MRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_inner
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale): """v0up design: conv5, deconv 2x2 (no weight sharing with the box head).""" blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks( model, blob_in, dim_in, spatial_scale) dim_reduced = cfg.MRCNN.DIM_REDUCED model.ConvTranspose(blob_conv5, 'conv5_mask', dim_conv5, dim_reduced, kernel=2, pad=0, stride=2, weight_init=('GaussianFill', { 'std': 0.001 }), bias_init=const_fill(0.0)) blob_mask = model.Relu('conv5_mask', 'conv5_mask') return blob_mask, dim_reduced
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_fpn(model, fpn_level_info): """Add FPN connections based on the model described in the FPN paper.""" # FPN levels are built starting from the highest/coarest level of the # backbone (usually "conv5"). First we build down, recursively constructing # lower/finer resolution FPN levels. Then we build up, constructing levels # that are even higher/coarser than the starting level. fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() # Count the number of backbone stages that we will generate FPN levels for # starting from the coarest backbone stage (usually the "conv5"-like level) # E.g., if the backbone level info defines stages 4 stages: "conv5", # "conv4", ... "conv2" and min_level=2, then we end up with 4 - (2 - 2) = 4 # backbone stages to add FPN to. num_backbone_stages = ( len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) ) lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages] output_blobs = [ 'fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ] fpn_dim_lateral = fpn_level_info.dims xavier_fill = ('XavierFill', {}) # For the coarsest backbone level: 1x1 conv only seeds recursion if cfg.FPN.USE_GN: # use GroupNorm c = model.ConvGN( lateral_input_blobs[0], output_blobs[0], # note: this is a prefix dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) output_blobs[0] = c # rename it else: model.Conv( lateral_input_blobs[0], output_blobs[0], dim_in=fpn_dim_lateral[0], dim_out=fpn_dim, kernel=1, pad=0, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) # # Step 1: recursively build down starting from the coarsest backbone level # # For other levels add top-down and lateral connections for i in range(num_backbone_stages - 1): add_topdown_lateral_module( model, output_blobs[i], # top-down blob lateral_input_blobs[i + 1], # lateral blob output_blobs[i + 1], # next output blob fpn_dim, # output dimension fpn_dim_lateral[i + 1] # lateral input dimension ) # Post-hoc scale-specific 3x3 convs blobs_fpn = [] spatial_scales = [] for i in range(num_backbone_stages): if cfg.FPN.USE_GN: # use GroupNorm fpn_blob = model.ConvGN( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, group_gn=get_group_gn(fpn_dim), kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) else: fpn_blob = model.Conv( output_blobs[i], 'fpn_{}'.format(fpn_level_info.blobs[i]), dim_in=fpn_dim, dim_out=fpn_dim, kernel=3, pad=1, stride=1, weight_init=xavier_fill, bias_init=const_fill(0.0) ) blobs_fpn += [fpn_blob] spatial_scales += [fpn_level_info.spatial_scales[i]] # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper P6_blob_in = blobs_fpn[0] P6_name = P6_blob_in + '_subsampled_2x' # Use max pooling to simulate stride 2 subsampling P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2) blobs_fpn.insert(0, P6_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: fpn_blob = fpn_level_info.blobs[0] dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): fpn_blob_in = fpn_blob if i > HIGHEST_BACKBONE_LVL + 1: fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu') fpn_blob = model.Conv( fpn_blob_in, 'fpn_' + str(i), dim_in=dim_in, dim_out=fpn_dim, kernel=3, pad=1, stride=2, weight_init=xavier_fill, bias_init=const_fill(0.0) ) dim_in = fpn_dim blobs_fpn.insert(0, fpn_blob) spatial_scales.insert(0, spatial_scales[0] * 0.5) return blobs_fpn, fpn_dim, spatial_scales
def add_cascade_fast_rcnn_outputs(model, blobs_in, dim, stage_num): """Add RoI classification and bounding box regression output ops.""" # Box regression layer num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) if stage_num == 1: model.FC( blobs_in[0], 'cls_score_1st', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score_1st', 'cls_prob_1st', engine='CUDNN') model.FC( blobs_in[0], 'bbox_pred_1st', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) elif stage_num == 2: model.FC( blobs_in[0], 'cls_score_2nd', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability assert len(blobs_in) == 2, 'during inference, need fc2_2nd and fc2_1st_2nd as in put blobsin rcnn stage 2' model.Softmax('cls_score_2nd', 'cls_prob_2nd_2nd', engine='CUDNN') cls_prob_2nd_2nd = model.Softmax('cls_score_2nd', 'cls_prob_2nd_2nd', engine='CUDNN') model.FCShared( blobs_in[1], 'cls_score_1st_2nd', dim, model.num_classes, weight='cls_score_1st_w', bias='cls_score_1st_b' ) cls_prob_1st_2nd = model.Softmax('cls_score_1st_2nd', 'cls_prob_1st_2nd', engine='CUDNN') model.Sum([cls_prob_2nd_2nd, cls_prob_1st_2nd], 'cls_prob_2nd') model.Scale('cls_prob_2nd', 'cls_prob_2nd', scale=0.5) model.FC( blobs_in[0], 'bbox_pred_2nd', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) elif stage_num == 3: model.FC( blobs_in[0], 'cls_score_3rd', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability assert len(blobs_in) == 3, 'during inference, need fc2_2nd and fc2_1st_2nd as in put blobsin rcnn stage 3' model.Softmax('cls_score_3rd', 'cls_prob_3rd_3rd', engine='CUDNN') cls_prob_3rd_3rd = model.Softmax('cls_score_3rd', 'cls_prob_3rd_3rd', engine='CUDNN') model.FCShared( blobs_in[1], 'cls_score_1st_3rd', dim, model.num_classes, weight='cls_score_1st_w', bias='cls_score_1st_b' ) cls_prob_1st_3rd = model.Softmax('cls_score_1st_3rd', 'cls_prob_1st_3rd', engine='CUDNN') model.FCShared( blobs_in[2], 'cls_score_2nd_3rd', dim, model.num_classes, weight='cls_score_2nd_w', bias='cls_score_2nd_b' ) cls_prob_2nd_3rd = model.Softmax('cls_score_2nd_3rd', 'cls_prob_2nd_3rd', engine='CUDNN') model.Sum([cls_prob_1st_3rd, cls_prob_2nd_3rd, cls_prob_3rd_3rd], 'cls_prob_3rd') model.Scale('cls_prob_3rd', 'cls_prob_3rd', scale=0.33333333) model.FC( blobs_in[0], 'bbox_pred_3rd', dim, num_bbox_reg_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_roi_cascade_2mlp_head(model, blob_in, dim_in, spatial_scale, stage_num): """Add cascade ReLU MLP with two hidden layers.""" hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION if stage_num == 1: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_1st', blob_rois='rois_1st', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC(roi_feat, 'fc1' + '_1st', dim_in * roi_size * roi_size, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc1' + '_1st', 'fc1' + '_1st') model.FC('fc1' + '_1st', 'fc2' + '_1st', hidden_dim, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc2' + '_1st', 'fc2' + '_1st') return ['fc2' + '_1st'], hidden_dim elif stage_num == 2: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_2nd', blob_rois='rois_2nd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC(roi_feat, 'fc1' + '_2nd', dim_in * roi_size * roi_size, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc1' + '_2nd', 'fc1' + '_2nd') model.FC('fc1' + '_2nd', 'fc2' + '_2nd', hidden_dim, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc2' + '_2nd', 'fc2' + '_2nd') if not model.train: model.FCShared(roi_feat, 'fc1' + '_1st' + '_2nd', dim_in * roi_size * roi_size, hidden_dim, weight='fc1_1st_w', bias='fc1_1st_b') model.Relu('fc1' + '_1st' + '_2nd', 'fc1' + '_1st' + '_2nd') model.FCShared('fc1' + '_1st' + '_2nd', 'fc2' + '_1st' + '_2nd', hidden_dim, hidden_dim, weight='fc2_1st_w', bias='fc2_1st_b') model.Relu('fc2' + '_1st' + '_2nd', 'fc2' + '_1st' + '_2nd') return ['fc2' + '_2nd', 'fc2' + '_1st' + '_2nd'], hidden_dim elif stage_num == 3: roi_feat = model.RoIFeatureTransform( blob_in, 'roi_feat_3rd', blob_rois='rois_3rd', method=cfg.FAST_RCNN.ROI_XFORM_METHOD, resolution=roi_size, sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO, spatial_scale=spatial_scale ) model.FC(roi_feat, 'fc1' + '_3rd', dim_in * roi_size * roi_size, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc1' + '_3rd', 'fc1' + '_3rd') model.FC('fc1' + '_3rd', 'fc2' + '_3rd', hidden_dim, hidden_dim, weight_init=("MSRAFill", {}), bias_init=const_fill(0.0)) model.Relu('fc2' + '_3rd', 'fc2' + '_3rd') if not model.train: model.FCShared(roi_feat, 'fc1' + '_1st' + '_3rd', dim_in * roi_size * roi_size, hidden_dim, weight='fc1_1st_w', bias='fc1_1st_b') model.Relu('fc1' + '_1st' + '_3rd', 'fc1' + '_1st' + '_3rd') model.FCShared('fc1' + '_1st' + '_3rd', 'fc2' + '_1st' + '_3rd', hidden_dim, hidden_dim, weight='fc2_1st_w', bias='fc2_1st_b') model.Relu('fc2' + '_1st' + '_3rd', 'fc2' + '_1st' + '_3rd') model.FCShared(roi_feat, 'fc1' + '_2nd' + '_3rd', dim_in * roi_size * roi_size, hidden_dim, weight='fc1_2nd_w', bias='fc1_2nd_b') model.Relu('fc1' + '_2nd' + '_3rd', 'fc1' + '_2nd' + '_3rd') model.FCShared('fc1' + '_2nd' + '_3rd', 'fc2' + '_2nd' + '_3rd', hidden_dim, hidden_dim, weight='fc2_2nd_w', bias='fc2_2nd_b') model.Relu('fc2' + '_2nd' + '_3rd', 'fc2' + '_2nd' + '_3rd') return ['fc2' + '_3rd', 'fc2' + '_1st' + '_2nd', 'fc2' + '_2nd' + '_3rd'], hidden_dim