def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0, panet=False): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoICrop', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order if not panet: bl_rois = blob_rois + '_fpn' + str(lvl) else: bl_rois = blob_rois if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy(rpn_ret[bl_rois])).cuda(device_id) if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron xform_out = RoIPoolFunction(resolution, resolution, sc)(bl_in, rois) elif method == 'RoICrop': # Warning!: Not check if implementation matches Detectron grid_xy = net_utils.affine_grid_gen( rois, bl_in.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() xform_out = RoICropFunction()(bl_in, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = RoIAlignFunction( resolution, resolution, sc, sampling_ratio)(bl_in, rois) bl_out_list.append(xform_out) if not panet: # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled = torch.cat(bl_out_list, dim=0) # Unshuffle to match rois from dataloader device_id = xform_shuffled.get_device() restore_bl = rpn_ret[blob_rois + '_idx_restore_int32'] restore_bl = Variable( torch.from_numpy(restore_bl.astype('int64', copy=False))).cuda(device_id) xform_out = xform_shuffled[restore_bl] else: return bl_out_list else: # Single feature level # rois: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) device_id = blobs_in.get_device() rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id) if method == 'RoIPoolF': xform_out = RoIPoolFunction(resolution, resolution, spatial_scale)(blobs_in, rois) elif method == 'RoICrop': grid_xy = net_utils.affine_grid_gen(rois, blobs_in.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() xform_out = RoICropFunction()(blobs_in, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = RoIAlignFunction( resolution, resolution, spatial_scale, sampling_ratio)(blobs_in, rois) return xform_out
def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoICrop', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy(rpn_ret[bl_rois])).cuda(device_id) if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron xform_out = RoIPoolFunction(resolution, resolution, sc)(bl_in, rois) elif method == 'RoICrop': # Warning!: Not check if implementation matches Detectron grid_xy = net_utils.affine_grid_gen( rois, bl_in.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() xform_out = RoICropFunction()(bl_in, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = RoIAlignFunction( resolution, resolution, sc, sampling_ratio)(bl_in, rois) bl_out_list.append(xform_out) # The pooled features from all levels are concatenated along the # batch dimension into a single 4D tensor. xform_shuffled = torch.cat(bl_out_list, dim=0) # Unshuffle to match rois from dataloader device_id = xform_shuffled.get_device() restore_bl = rpn_ret[blob_rois + '_idx_restore_int32'] restore_bl = Variable( torch.from_numpy(restore_bl.astype('int64', copy=False))).cuda(device_id) xform_out = xform_shuffled[restore_bl] else: # Single feature level # rois: holds R regions of interest, each is a 5-tuple # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # rectangle (x1, y1, x2, y2) device_id = blobs_in.get_device() rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id) if method == 'RoIPoolF': xform_out = RoIPoolFunction(resolution, resolution, spatial_scale)(blobs_in, rois) elif method == 'RoICrop': grid_xy = net_utils.affine_grid_gen(rois, blobs_in.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() xform_out = RoICropFunction()(blobs_in, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = RoIAlignFunction( resolution, resolution, spatial_scale, sampling_ratio)(blobs_in, rois) return xform_out
def roi_feature_transform(self, blobs_in, rpn_ret, blob_rois='rois', method='RoIPoolF', resolution=7, spatial_scale=1. / 16., sampling_ratio=0): """Add the specified RoI pooling method. The sampling_ratio argument is supported for some, but not all, RoI transform methods. RoIFeatureTransform abstracts away: - Use of FPN or not - Specifics of the transform method """ assert method in {'RoIPoolF', 'RoICrop', 'RoIAlign'}, \ 'Unknown pooling method: {}'.format(method) if isinstance(blobs_in, list): # FPN case: add RoIFeatureTransform to each FPN level device_id = blobs_in[0].get_device() k_max = cfg.FPN.ROI_MAX_LEVEL # coarsest level of pyramid k_min = cfg.FPN.ROI_MIN_LEVEL # finest level of pyramid assert len(blobs_in) == k_max - k_min + 1 bl_out_list = [] for lvl in range(k_min, k_max + 1): bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order sc = spatial_scale[k_max - lvl] # in reversed order bl_rois = blob_rois + '_fpn' + str(lvl) if len(rpn_ret[bl_rois]): rois = Variable(torch.from_numpy( rpn_ret[bl_rois])).cuda(device_id) if method == 'RoIPoolF': # Warning!: Not check if implementation matches Detectron xform_out = RoIPoolFunction(resolution, resolution, sc)(bl_in, rois) elif method == 'RoICrop': # Warning!: Not check if implementation matches Detectron grid_xy = net_utils.affine_grid_gen( rois, bl_in.size()[2:], self.grid_size) grid_yx = torch.stack([ grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0] ], 3).contiguous() xform_out = RoICropFunction()( bl_in, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: xform_out = F.max_pool2d(xform_out, 2, 2) elif method == 'RoIAlign': xform_out = RoIAlignFunction(resolution, resolution, sc, sampling_ratio)(bl_in, rois) # print(xform_out.size()) # input() bl_out_list.append(xform_out) #print(xform_out.size()) #input() #if bl_out_list[0].size(2) == 7: # FLAG_MASK = False #else: # FLAG_MASK = True #feats = torch.FloatTensor(bl_out_list[0].size()).fill_(0).cuda(device_id) #feats = torch.autograd.Variable(feats) #if FLAG_MASK: # for i in range(len(bl_out_list)): # feats_t = bl_out_list[i] # feats_t = F.relu(self.mask_fcn_1[i](feats_t)) # feats = torch.max(feats, feats_t) #else: # for i in range(len(bl_out_list)): # feats_t = bl_out_list[i] # feats_t = F.relu(self.box_fcn_1[i](feats_t)) # feats = torch.max(feats, feats_t) # ## The pooled features from all levels are concatenated along the ## batch dimension into a single 4D tensor. ## xform_shuffled = torch.cat(bl_out_list, dim=0) #xform_shuffled = feats # ## Unshuffle to match rois from dataloader #device_id = xform_shuffled.get_device() #xform_out = xform_shuffled #print(xform_out.size()) #else: # # Single feature level # # rois: holds R regions of interest, each is a 5-tuple # # (batch_idx, x1, y1, x2, y2) specifying an image batch index and a # # rectangle (x1, y1, x2, y2) # device_id = blobs_in.get_device() # rois = Variable(torch.from_numpy(rpn_ret[blob_rois])).cuda(device_id) # if method == 'RoIPoolF': # xform_out = RoIPoolFunction(resolution, resolution, spatial_scale)(blobs_in, rois) # elif method == 'RoICrop': # grid_xy = net_utils.affine_grid_gen(rois, blobs_in.size()[2:], self.grid_size) # grid_yx = torch.stack( # [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() # xform_out = RoICropFunction()(blobs_in, Variable(grid_yx).detach()) # if cfg.CROP_RESIZE_WITH_MAX_POOL: # xform_out = F.max_pool2d(xform_out, 2, 2) # elif method == 'RoIAlign': # xform_out = RoIAlignFunction( # resolution, resolution, spatial_scale, sampling_ratio)(blobs_in, rois) return bl_out_list