def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv(blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv(blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # # Bounding-box regression conv num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes) model.Conv(blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Classification PS RoI pooling model.net.PSRoIPool(['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE) model.net.Reshape('cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES)) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool(['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale) model.AveragePool('psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE)
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors(stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) # Proposal bbox regression deltas model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois')
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0)) if cfg.MODEL.ATTR and model.train: in_dim = dim if cfg.MODEL.CLS_EMBED: # first slice the fc7 feature model.net.SelectFG([blob_in, 'fg_idx'], 'fc7_fg') model.create_param(param_name='class_embedding', initializer=initializers.Initializer( "GaussianFill", std=0.01), shape=[model.num_classes, 256]) # op that just takes the class index and returns the corresponding row model.net.Embed(['class_embedding', 'labels_int32_fg'], 'embed_fg') # then do concatenation model.net.Concat(['fc7_fg', 'embed_fg'], ['concat_attr', 'concat_split'], axis=1) in_dim += 256 else: model.net.SelectFG([blob_in, 'fg_idx'], 'concat_attr') model.FC('concat_attr', 'fc_attr', in_dim, 512, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('fc_attr', 'fc_attr') model.FC('fc_attr', 'attr_score', 512, model.num_attributes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0))
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC(blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0))
def add_fast_rcnn_outputs(model, blob_in, dim): """Add RoI classification and bounding box regression output ops.""" model.FC( blob_in, 'cls_score', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('cls_score', 'cls_prob', engine='CUDNN') model.FC( blob_in, 'bbox_pred', dim, model.num_classes * 4, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_prn_outputs(model, blob_in, dim): """Add RoI classification output ops.""" blob_out = model.FC(blob_in, 'prn_logits', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) if not model.train: # == if test # Only add sigmoid when testing; during training the sigmoid is # combined with the label cross entropy loss for numerical stability blob_out = model.net.Sigmoid('prn_logits', 'prn_probs', engine='CUDNN') return blob_out
def add_mlp_outputs(model, blob_in, dim): """Add classification ops.""" model.FC( blob_in, 'logits', dim, model.num_classes, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train: # == if test # Only add softmax when testing; during training the softmax is combined # with the label cross entropy loss for numerical stability model.Softmax('logits', 'cls_prob', engine='CUDNN')
def add_boundary_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either boundary logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 1 if cfg.BOUNDARY.USE_FC_OUTPUT: # Predict boundarys with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC( blob_in, 'boundary_fcn_logits', dim, num_cls * cfg.BOUNDARY.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict boundary using Conv # Use GaussianFill for class-agnostic boundary prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.BOUNDARY.CONV_INIT if cfg.BOUNDARY.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'boundary_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.BOUNDARY.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'boundary_fcn_logits', 'boundary_fcn_logits_up', num_cls, num_cls, cfg.BOUNDARY.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'boundary_fcn_probs') return blob_out
def add_mask_rcnn_outputs(model, blob_in, dim): """Add Mask R-CNN specific outputs: either mask logits or probs.""" num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1 if cfg.MRCNN.USE_FC_OUTPUT: # Predict masks with a fully connected layer (ignore 'fcn' in the blob # name) blob_out = model.FC( blob_in, 'mask_fcn_logits', dim, num_cls * cfg.MRCNN.RESOLUTION**2, weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) ) else: # Predict mask using Conv # Use GaussianFill for class-agnostic mask prediction; fills based on # fan-in can be too large in this case and cause divergence fill = ( cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill' ) blob_out = model.Conv( blob_in, 'mask_fcn_logits', dim, num_cls, kernel=1, pad=0, stride=1, weight_init=(fill, {'std': 0.001}), bias_init=const_fill(0.0) ) if cfg.MRCNN.UPSAMPLE_RATIO > 1: blob_out = model.BilinearInterpolation( 'mask_fcn_logits', 'mask_fcn_logits_up', num_cls, num_cls, cfg.MRCNN.UPSAMPLE_RATIO ) if not model.train: # == if test blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs') return blob_out
def bottleneck_transformation( model, blob_in, dim_in, dim_out, stride, prefix, dim_inner, dilation=1, group=1 ): """Add a bottleneck transformation to the model.""" # In original resnet, stride=2 is on 1x1. # In fb.torch resnet, stride=2 is on 3x3. (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride) # conv 1x1 -> BN -> ReLU cur = model.ConvAffine( blob_in, prefix + '_branch2a', dim_in, dim_inner, kernel=1, stride=str1x1, pad=0, inplace=True ) cur = model.Relu(cur, cur) # conv 3x3 -> BN -> ReLU cur = model.ConvAffine( cur, prefix + '_branch2b', dim_inner, dim_inner, kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation, group=group, #moblenet group=dim_inner else group=group inplace=True ) cur = model.Relu(cur, cur) # conv 1x1 -> BN (no ReLU) # NB: for now this AffineChannel op cannot be in-place due to a bug in C2 # gradient computation for graphs like this cur = model.ConvAffine( cur, prefix + '_branch2c', dim_inner, dim_out, kernel=1, stride=1, pad=0, inplace=False ) SE_poo1 = model.AveragePool(cur,prefix+'_branch2c_se_pool',global_pooling=1) SE_conv = model.Conv(SE_poo1, prefix + '_branch2c_se_con1', dim_out, int(dim_out/16), kernel=1, stride=1, pad=0, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) SE_conv = model.Relu(SE_conv,SE_conv) SE_conv = model.Conv(SE_conv, prefix + '_branch2c_se_con2', int(dim_out/16), dim_out, kernel=1, stride=1, pad=0, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) SE_sig = model.net.Sigmoid(SE_conv, SE_conv) #SE = model.net.Scale([SE_sig]) cur = model.net.Mul([cur, SE_sig], prefix + '_branch2c_se', broadcast=1) #cur = model.net.Add([cur,SE], cur, broadcast=1, axis=1,2) return cur
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale): if dim_reduce is not None: # Optional dim reduction blob_in = model.Conv( blob_in, 'conv_dim_reduce', dim_in, dim_reduce, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) blob_in = model.Relu(blob_in, blob_in) dim_in = dim_reduce # Classification conv model.Conv( blob_in, 'conv_cls', dim_in, model.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # # Bounding-box regression conv num_bbox_reg_classes = ( 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes ) model.Conv( blob_in, 'conv_bbox_pred', dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Classification PS RoI pooling model.net.PSRoIPool( ['conv_cls', 'rois'], ['psroipooled_cls', '_mapping_channel_cls'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=model.num_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_cls', 'cls_score_4d', kernel=cfg.RFCN.PS_GRID_SIZE ) model.net.Reshape( 'cls_score_4d', ['cls_score', '_cls_scores_shape'], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not model.train: model.Softmax('cls_score', 'cls_prob', engine='CUDNN') # Bbox regression PS RoI pooling model.net.PSRoIPool( ['conv_bbox_pred', 'rois'], ['psroipooled_bbox', '_mapping_channel_bbox'], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) model.AveragePool( 'psroipooled_bbox', 'bbox_pred', kernel=cfg.RFCN.PS_GRID_SIZE )
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose( blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv( blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}), bias_init=const_fill(0.0) ) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation( blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE ) return blob_out
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [UnscopeGPUName(rpn_cls_probs_fpn._name), UnscopeGPUName(rpn_bbox_pred_fpn._name), 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_keypoint_outputs(model, blob_in, dim): """Add Mask R-CNN keypoint specific outputs: keypoint heatmaps.""" # NxKxHxW upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1) if cfg.KRCNN.USE_DECONV: # Apply ConvTranspose to the feature representation; results in 2x # upsampling blob_in = model.ConvTranspose(blob_in, 'kps_deconv', dim, cfg.KRCNN.DECONV_DIM, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0)) model.Relu('kps_deconv', 'kps_deconv') dim = cfg.KRCNN.DECONV_DIM if upsample_heatmap: blob_name = 'kps_score_lowres' else: blob_name = 'kps_score' if cfg.KRCNN.USE_DECONV_OUTPUT: # Use ConvTranspose to predict heatmaps; results in 2x upsampling blob_out = model.ConvTranspose(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=cfg.KRCNN.DECONV_KERNEL, pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1), stride=2, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) else: # Use Conv to predict heatmaps; does no upsampling blob_out = model.Conv(blob_in, blob_name, dim, cfg.KRCNN.NUM_KEYPOINTS, kernel=1, pad=0, stride=1, weight_init=(cfg.KRCNN.CONV_INIT, { 'std': 0.001 }), bias_init=const_fill(0.0)) if upsample_heatmap: # Increase heatmap output size via bilinear upsampling blob_out = model.BilinearInterpolation(blob_out, 'kps_score', cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.NUM_KEYPOINTS, cfg.KRCNN.UP_SCALE) return blob_out
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales): """Add RPN on FPN specific outputs.""" num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS) dim_out = dim_in k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scales[k_max - lvl] # in reversed order slvl = str(lvl) if lvl == k_min: # Create conv ops with randomly initialized weights and # zeroed biases for the first FPN level; these will be shared by # all other FPN levels # RPN hidden representation conv_rpn_fpn = model.Conv( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.Conv( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.Conv( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: # Share weights and biases sk_min = str(k_min) # RPN hidden representation conv_rpn_fpn = model.ConvShared( bl_in, 'conv_rpn_fpn' + slvl, dim_in, dim_out, kernel=3, pad=1, stride=1, weight='conv_rpn_fpn' + sk_min + '_w', bias='conv_rpn_fpn' + sk_min + '_b' ) model.Relu(conv_rpn_fpn, conv_rpn_fpn) # Proposal classification scores rpn_cls_logits_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_cls_logits_fpn' + slvl, dim_in, num_anchors, kernel=1, pad=0, stride=1, weight='rpn_cls_logits_fpn' + sk_min + '_w', bias='rpn_cls_logits_fpn' + sk_min + '_b' ) # Proposal bbox regression deltas rpn_bbox_pred_fpn = model.ConvShared( conv_rpn_fpn, 'rpn_bbox_pred_fpn' + slvl, dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight='rpn_bbox_pred_fpn' + sk_min + '_w', bias='rpn_bbox_pred_fpn' + sk_min + '_b' ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed lvl_anchors = generate_anchors( stride=2.**lvl, sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ), aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS ) rpn_cls_probs_fpn = model.net.Sigmoid( rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl ) model.GenerateProposals( [rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'], ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl], anchors=lvl_anchors, spatial_scale=sc )
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale): """Add RPN outputs to a single scale model (i.e., no FPN).""" anchors = generate_anchors( stride=1. / spatial_scale, sizes=cfg.RPN.SIZES, aspect_ratios=cfg.RPN.ASPECT_RATIOS ) num_anchors = anchors.shape[0] dim_out = dim_in # RPN hidden representation model.Conv( blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) model.Relu('conv_rpn', 'conv_rpn') # Proposal classification scores model.Conv( 'conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) # Proposal bbox regression deltas model.Conv( 'conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) if not model.train or cfg.MODEL.FASTER_RCNN: # Proposals are needed during: # 1) inference (== not model.train) for RPN only and Faster R-CNN # OR # 2) training for Faster R-CNN # Otherwise (== training for RPN only), proposals are not needed model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs') model.GenerateProposals( ['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'], ['rpn_rois', 'rpn_roi_probs'], anchors=anchors, spatial_scale=spatial_scale ) if cfg.MODEL.FASTER_RCNN: if model.train: # Add op that generates training labels for in-network RPN proposals model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info']) else: # Alias rois to rpn_rois for inference model.net.Alias('rpn_rois', 'rois')
def add_rfcn_heads(blob_in,rois,spatial_scale,num_bbox_reg_classes,lvl): # # Bounding-box regression conv # Classification PS RoI pooling if lvl==0: conv_cls=self.Conv( blob_in, 'conv_cls_{}'.format(lvl), dim_in, self.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) conv_bbox_pred=self.Conv( blob_in, 'conv_bbox_pred_{}'.format(lvl), dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) ) else: conv_cls=self.ConvShared( blob_in, 'conv_cls_{}'.format(lvl), dim_in, self.num_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight='conv_cls_0_w', bias='conv_cls_0_b' ) conv_bbox_pred=self.ConvShared( blob_in, 'conv_bbox_pred_{}'.format(lvl), dim_in, 4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2, kernel=1, pad=0, stride=1, weight='conv_bbox_pred_0_w', bias='conv_bbox_pred_0_b' ) self.net.PSRoIPool( [conv_cls, rois], ['psroipooled_cls'+str(lvl), '_mapping_channel_cls'+str(lvl)], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=self.num_classes, spatial_scale=spatial_scale ) self.AveragePool( 'psroipooled_cls'+str(lvl), 'cls_score_4d'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE ) cls_blob_out,_=self.net.Reshape( 'cls_score_4d'+str(lvl), ['cls_score'+str(lvl), '_cls_scores_shape'+str(lvl)], shape=(-1, cfg.MODEL.NUM_CLASSES) ) if not self.train: self.Softmax('cls_score'+str(lvl), 'cls_prob'+str(lvl), engine='CUDNN') # Bbox regression PS RoI pooling self.net.PSRoIPool( [conv_bbox_pred, rois], ['psroipooled_bbox'+str(lvl), '_mapping_channel_bbox'+str(lvl)], group_size=cfg.RFCN.PS_GRID_SIZE, output_dim=4 * num_bbox_reg_classes, spatial_scale=spatial_scale ) bbox_blob_out=self.AveragePool( 'psroipooled_bbox'+str(lvl), 'bbox_pred'+str(lvl), kernel=cfg.RFCN.PS_GRID_SIZE ) return cls_blob_out,bbox_blob_out