def __init__(self, RelDN, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, ): super(RoIHeads, self).__init__() batch_size_per_image_so = 64 positive_fraction_so = 0.5 self.box_similarity = box_ops.box_iou self.RelDN = RelDN # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image_so, positive_fraction_so) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.mask_roi_pool = mask_roi_pool self.mask_head = mask_head self.mask_predictor = mask_predictor self.keypoint_roi_pool = keypoint_roi_pool self.keypoint_head = keypoint_head self.keypoint_predictor = keypoint_predictor
def __init__(self, feature_extractor: Im2VecEncoder, pooler_resolution: int = 7, pooler_sampling_ratio: int = 2, decoder_thresh: float = 0.1, decoder_nms_thresh: float = 0.5, decoder_detections_per_image: int = 100, matcher_high_thresh: float = 0.5, matcher_low_thresh: float = 0.5, allow_low_quality_matches: bool = True, batch_size_per_image: int = 256, balance_sampling_fraction: float = 0.25): super(FasterRCNNROIHead, self).__init__() self.roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], output_size=pooler_resolution, sampling_ratio=pooler_sampling_ratio) self.feature_extractor = feature_extractor self.box_coder = det_utils.BoxCoder(weights=(10., 10., 5., 5.)) self.decoder_thresh = decoder_thresh self.decoder_nms_thresh = decoder_nms_thresh self.decoder_detections_per_image = decoder_detections_per_image self.proposal_matcher = det_utils.Matcher( high_threshold=matcher_high_thresh, low_threshold=matcher_low_thresh, allow_low_quality_matches=allow_low_quality_matches) self.sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction=balance_sampling_fraction)
def __init__( self, anchor_generator, head, # fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, # pre_nms_top_n, post_nms_top_n, nms_thresh): super(RegionProposalNetwork, self).__init__() self.anchor_generator = anchor_generator self.head = head self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0)) # used during training self.box_similarity = box_ops.box_iou self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False, ) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) # used during testing self._pre_nms_top_n = pre_nms_top_n self._post_nms_top_n = post_nms_top_n self.nms_thresh = nms_thresh self.min_size = 1e-3
def __init__(self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, # Mask mask_coarse_head=None, mask_point_head=None, ): super(PointRendRoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.mask_coarse_in_features=("0",) #对应FPN特征图中的P2 self.mask_coarse_side_size=14 self._feature_scales={'0':0.25,'1':0.125,'2':0.0625,'3':0.03125,'4':0.015625} #FPN每阶段特征尺寸与原图像尺寸的比例 self.mask_coarse_head=mask_coarse_head self.mask_point_in_features =["0"] #0对应FPN中的p2 self.mask_point_train_num_points =14*14 self.mask_point_oversample_ratio =3 self.mask_point_importance_sample_ratio =0.75 #next two parameters are use in the adaptive subdivions inference procedure self.mask_point_subdivision_steps =5 self.mask_point_subdivision_num_points =28*28 self.mask_point_head=mask_point_head
def test_balanced_positive_negative_sampler(self): sampler = _utils.BalancedPositiveNegativeSampler(4, 0.25) # keep all 6 negatives first, then add 3 positives, last two are ignore matched_idxs = [torch.tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1])] pos, neg = sampler(matched_idxs) # we know the number of elements that should be sampled for the positive (1) # and the negative (3), and their location. Let's make sure that they are # there assert pos[0].sum() == 1 assert pos[0][6:9].sum() == 1 assert neg[0].sum() == 3 assert neg[0][0:6].sum() == 3
def __init__(self): super(RoIHeads, self).__init__() self.box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) resolution = self.box_roi_pool.output_size[0] representation_size = 1024 self.box_head = TwoMLPHead(256 * resolution**2, representation_size) self.rlp_head = copy.deepcopy(self.box_head) representation_size = 1024 self.box_predictor = FastRCNNPredictor(representation_size, cfg.BOX.NUM_CLASSES) self.RelDN = reldn_heads.reldn_head(self.box_head.fc7.out_features * 3) # concat of SPO self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( cfg.BOX.FG_IOU_THRESH, cfg.BOX.BG_IOU_THRESH, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( cfg.BOX.BATCH_SIZE_PER_IMAGE, cfg.BOX.POSITIVE_FRACTION) self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler( cfg.MODEL.BATCH_SIZE_PER_IMAGE_SO, cfg.MODEL.POSITIVE_FRACTION_SO) self.fg_bg_sampler_rlp = det_utils.BalancedPositiveNegativeSampler( cfg.MODEL.BATCH_SIZE_PER_IMAGE_REL, cfg.MODEL.POSITIVE_FRACTION_REL) bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
def __init__(self, backbone: ImageEncoder, positive_fraction: float = 0.5, match_thresh_low: float = 0.3, match_thresh_high: float = 0.7, anchor_sizes: List[int] = (128, 256, 512), anchor_aspect_ratios: List[float] = (0.5, 1.0, 2.0), batch_size_per_image: int = 256, pre_nms_top_n: int = 6000, post_nms_top_n: int = 300, nms_thresh: float = 0.7, min_size: int = 0, fpn_post_nms_top_n: int = 1000, fpn_post_nms_per_batch: int = True, allow_low_quality_matches: bool = True, initializer: InitializerApplicator = InitializerApplicator()) -> None: super(RPN, self).__init__(None) self._rpn_head = RPNHead(256, 3) self.min_size = min_size self.pre_nms_top_n = pre_nms_top_n self.nms_thresh = nms_thresh self.post_nms_top_n = post_nms_top_n # the BoxCoder just converts the relative regression offsets into absolute # coordinates self.box_coder = det_utils.BoxCoder(weights=(1., 1., 1., 1.)) # sampler is responsible for selecting a subset of anchor boxes for computing the loss # this makes sure each batch has reasonable balance of foreground/background labels # it selects `batch_size_per_image` total boxes self.sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image, positive_fraction) # matcher decides if an anchor box is a foreground or background based on how much # it overlaps with the nearest target box self.proposal_matcher = det_utils.Matcher( match_thresh_high, match_thresh_low, allow_low_quality_matches=allow_low_quality_matches) self.backbone = backbone self.anchor_generator = AnchorGenerator(anchor_sizes, anchor_aspect_ratios) self.num_anchors = self.anchor_generator.num_anchors_per_location()[0] self._loss_meters = {'rpn_cls_loss': Average(), 'rpn_reg_loss': Average()} initializer(self)
def __init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # new weight_loss=False, use_context=False, track_embedding=None): super(TrackHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor self.weight_loss = weight_loss self.use_context = use_context self.track_embedding = track_embedding
def __init__( self, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights): super(SSDHead, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_predictor = box_predictor
def __init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, out_channels, # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, pose_mean=None, pose_stddev=None, threed_68_points=None, threed_5_points=None, bbox_x_factor=1.1, bbox_y_factor=1.1, expand_forehead=0.3, ): super(RoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10.0, 10.0, 5.0, 5.0) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor num_classes = 2 self.class_roi_pool = MultiScaleRoIAlign( featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2) resolution = box_roi_pool.output_size[0] representation_size = 1024 self.class_head = TwoMLPHead(out_channels * resolution**2, representation_size) self.class_predictor = FastRCNNClassPredictor(representation_size, num_classes) self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.mask_roi_pool = mask_roi_pool self.mask_head = mask_head self.mask_predictor = mask_predictor self.keypoint_roi_pool = keypoint_roi_pool self.keypoint_head = keypoint_head self.keypoint_predictor = keypoint_predictor self.pose_mean = pose_mean self.pose_stddev = pose_stddev self.threed_68_points = threed_68_points self.threed_5_points = threed_5_points self.bbox_x_factor = bbox_x_factor self.bbox_y_factor = bbox_y_factor self.expand_forehead = expand_forehead
def __init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, # new use_soft_nms=False, weight_loss=False, use_context=False, use_track_branch=False, track_embedding=None): super(RoIHeads, self).__init__() self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.mask_roi_pool = mask_roi_pool self.mask_head = mask_head self.mask_predictor = mask_predictor self.keypoint_roi_pool = keypoint_roi_pool self.keypoint_head = keypoint_head self.keypoint_predictor = keypoint_predictor self.weight_loss = weight_loss self.use_soft_nms = use_soft_nms self.use_context = use_context
def __init__(self, out_channels, num_classes, input_mode, acf_head, fg_iou_thresh=0.5, bg_iou_thresh=0.5, batch_size_per_image=512, positive_fraction=0.25, bbox_reg_weights=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100): super(RoIHeadsExtend, self).__init__() self.in_channels = out_channels self.input_mode = input_mode self.score_thresh = box_score_thresh self.nms_thresh = box_nms_thresh self.detections_per_img = box_detections_per_img self.fg_iou_thresh = fg_iou_thresh self.bg_iou_thresh = bg_iou_thresh self.batch_size_per_image = batch_size_per_image self.positive_fraction = positive_fraction self.num_classes = num_classes # Detection self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = (10., 10., 5., 5.) self.box_coder = det_utils.BoxCoder(bbox_reg_weights) self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2) representation_size = 1024 resolution = self.box_roi_pool.output_size[0] self.box_head = TwoMLPHead(out_channels * resolution**2, representation_size) self.box_predictor = FastRCNNPredictor(representation_size, num_classes) # Segmentation self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2) resolution = self.shared_roi_pool.output_size[0] mask_layers = (256, 256, 256, 256, 256, 256, 256, 256) mask_dilation = 1 self.mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) mask_predictor_in_channels = 256 # == mask_layers[-1] mask_dim_reduced = 256 self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, num_classes) self.with_paf_branch = True if self.with_paf_branch: self.paf_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels, mask_dim_reduced, 2 * (num_classes - 1)) if self.input_mode == config.INPUT_RGBD: self.attention_block = ContextBlock(256, 2) self.global_feature_dim = 256 self.with_3d_keypoints = True self.with_axis_keypoints = False self.regress_axis = False self.estimate_norm_vector = False if acf_head == 'endpoints': self.with_axis_keypoints = True elif acf_head == 'scatters': self.regress_axis = True elif acf_head == 'norm_vector': self.estimate_norm_vector = True else: print("Don't assign a vaild acf head") exit() keypoint_layers = (256, ) * 4 self.keypoint_dim_reduced = keypoint_layers[-1] if self.with_3d_keypoints: self.vote_keypoint_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.vote_keypoint_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 3 * (num_classes - 1)) if self.with_axis_keypoints: self.orientation_keypoint_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.orientation_keypoint_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 6 * (num_classes - 1)) if self.regress_axis: self.axis_head = Vote_Kpoints_head(self.global_feature_dim, keypoint_layers, "conv2d") self.axis_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 4 * (num_classes - 1)) if self.estimate_norm_vector: self.norm_vector_head = Vote_Kpoints_head( self.global_feature_dim, keypoint_layers, "conv2d") self.norm_vector_predictor = Vote_Kpoints_Predictor( self.keypoint_dim_reduced, 3 * (num_classes - 1))
def __init__( self, box_roi_pool, box_head, box_predictor, # Faster R-CNN training fg_iou_thresh, bg_iou_thresh, batch_size_per_image, positive_fraction, bbox_reg_weights, # Faster R-CNN inference score_thresh, nms_thresh, detections_per_img, # Mask mask_roi_pool=None, mask_head=None, mask_predictor=None, keypoint_roi_pool=None, keypoint_head=None, keypoint_predictor=None, ): super(CascadeRoIHeads, self).__init__() self.num_cascade_stages = len(box_head) self.box_similarity = box_ops.box_iou # assign ground-truth boxes for each proposal self.proposal_matcher = det_utils.Matcher( fg_iou_thresh[0], bg_iou_thresh[0], allow_low_quality_matches=False) self.proposal_matchers = [] for i in range(3): proposal_matcher = det_utils.Matcher( fg_iou_thresh[i], bg_iou_thresh[i], allow_low_quality_matches=False) self.proposal_matchers.append(proposal_matcher) self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler( batch_size_per_image, positive_fraction) if bbox_reg_weights is None: bbox_reg_weights = [(10., 10., 5., 5.), (20., 20., 10., 10.), (30., 30., 15., 15.)] self.box_coders = [] for i in range(3): self.box_coders.append(det_utils.BoxCoder(bbox_reg_weights[i])) self.box_roi_pool = box_roi_pool self.box_head = box_head self.box_predictor = box_predictor self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.detections_per_img = detections_per_img self.mask_roi_pool = mask_roi_pool self.mask_head = mask_head self.mask_predictor = mask_predictor self.keypoint_roi_pool = keypoint_roi_pool self.keypoint_head = keypoint_head self.keypoint_predictor = keypoint_predictor