Пример #1
0
	def __init__(self,
				 RelDN,
				 box_roi_pool,
				 box_head,
				 box_predictor,
				 # Faster R-CNN training
				 fg_iou_thresh, bg_iou_thresh,
				 batch_size_per_image, positive_fraction,
				 bbox_reg_weights,
				 # Faster R-CNN inference
				 score_thresh,
				 nms_thresh,
				 detections_per_img,
				 # Mask
				 mask_roi_pool=None,
				 mask_head=None,
				 mask_predictor=None,
				 keypoint_roi_pool=None,
				 keypoint_head=None,
				 keypoint_predictor=None,
				 ):
		super(RoIHeads, self).__init__()

		batch_size_per_image_so = 64
		positive_fraction_so = 0.5
		self.box_similarity = box_ops.box_iou
		self.RelDN = RelDN
		# assign ground-truth boxes for each proposal
		self.proposal_matcher = det_utils.Matcher(
			fg_iou_thresh,
			bg_iou_thresh,
			allow_low_quality_matches=False)

		self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
			batch_size_per_image,
			positive_fraction)

		self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler(
			batch_size_per_image_so,
			positive_fraction_so)
			
		if bbox_reg_weights is None:
			bbox_reg_weights = (10., 10., 5., 5.)
		self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

		self.box_roi_pool = box_roi_pool
		self.box_head = box_head
		self.box_predictor = box_predictor

		self.score_thresh = score_thresh
		self.nms_thresh = nms_thresh
		self.detections_per_img = detections_per_img

		self.mask_roi_pool = mask_roi_pool
		self.mask_head = mask_head
		self.mask_predictor = mask_predictor

		self.keypoint_roi_pool = keypoint_roi_pool
		self.keypoint_head = keypoint_head
		self.keypoint_predictor = keypoint_predictor
Пример #2
0
 def __init__(self,
              feature_extractor: Im2VecEncoder,
              pooler_resolution: int = 7,
              pooler_sampling_ratio: int = 2,
              decoder_thresh: float = 0.1,
              decoder_nms_thresh: float = 0.5,
              decoder_detections_per_image: int = 100,
              matcher_high_thresh: float = 0.5,
              matcher_low_thresh: float = 0.5,
              allow_low_quality_matches: bool = True,
              batch_size_per_image: int = 256,
              balance_sampling_fraction: float = 0.25):
     super(FasterRCNNROIHead, self).__init__()
     self.roi_pool = MultiScaleRoIAlign(
         featmap_names=[0, 1, 2, 3],
         output_size=pooler_resolution,
         sampling_ratio=pooler_sampling_ratio)
     self.feature_extractor = feature_extractor
     self.box_coder = det_utils.BoxCoder(weights=(10., 10., 5., 5.))
     self.decoder_thresh = decoder_thresh
     self.decoder_nms_thresh = decoder_nms_thresh
     self.decoder_detections_per_image = decoder_detections_per_image
     self.proposal_matcher = det_utils.Matcher(
         high_threshold=matcher_high_thresh,
         low_threshold=matcher_low_thresh,
         allow_low_quality_matches=allow_low_quality_matches)
     self.sampler = det_utils.BalancedPositiveNegativeSampler(
         batch_size_per_image, positive_fraction=balance_sampling_fraction)
Пример #3
0
    def __init__(
            self,
            anchor_generator,
            head,
            #
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            #
            pre_nms_top_n,
            post_nms_top_n,
            nms_thresh):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        # used during training
        self.box_similarity = box_ops.box_iou

        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,
            bg_iou_thresh,
            allow_low_quality_matches=False,
        )

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)
        # used during testing
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
        self.min_size = 1e-3
Пример #4
0
    def __init__(self,
                 box_roi_pool,
                 box_head,
                 box_predictor,
                 # Faster R-CNN training
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 bbox_reg_weights,
                 # Faster R-CNN inference
                 score_thresh,
                 nms_thresh,
                 detections_per_img,
                 # Mask
                 mask_coarse_head=None,
                 mask_point_head=None,
                 ):
        super(PointRendRoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh,
            bg_iou_thresh,
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image,
            positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img


        self.mask_coarse_in_features=("0",) #对应FPN特征图中的P2
        self.mask_coarse_side_size=14
        self._feature_scales={'0':0.25,'1':0.125,'2':0.0625,'3':0.03125,'4':0.015625} #FPN每阶段特征尺寸与原图像尺寸的比例

        self.mask_coarse_head=mask_coarse_head

        self.mask_point_in_features             =["0"] #0对应FPN中的p2
        self.mask_point_train_num_points        =14*14
        self.mask_point_oversample_ratio        =3
        self.mask_point_importance_sample_ratio =0.75

        #next two parameters are use in the adaptive subdivions inference procedure
        self.mask_point_subdivision_steps         =5
        self.mask_point_subdivision_num_points    =28*28
        self.mask_point_head=mask_point_head
Пример #5
0
 def test_balanced_positive_negative_sampler(self):
     sampler = _utils.BalancedPositiveNegativeSampler(4, 0.25)
     # keep all 6 negatives first, then add 3 positives, last two are ignore
     matched_idxs = [torch.tensor([0, 0, 0, 0, 0, 0, 1, 1, 1, -1, -1])]
     pos, neg = sampler(matched_idxs)
     # we know the number of elements that should be sampled for the positive (1)
     # and the negative (3), and their location. Let's make sure that they are
     # there
     assert pos[0].sum() == 1
     assert pos[0][6:9].sum() == 1
     assert neg[0].sum() == 3
     assert neg[0][0:6].sum() == 3
Пример #6
0
    def __init__(self):
        super(RoIHeads, self).__init__()

        self.box_roi_pool = MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3'],
            output_size=7,
            sampling_ratio=2)

        resolution = self.box_roi_pool.output_size[0]
        representation_size = 1024
        self.box_head = TwoMLPHead(256 * resolution**2, representation_size)
        self.rlp_head = copy.deepcopy(self.box_head)

        representation_size = 1024
        self.box_predictor = FastRCNNPredictor(representation_size,
                                               cfg.BOX.NUM_CLASSES)

        self.RelDN = reldn_heads.reldn_head(self.box_head.fc7.out_features *
                                            3)  # concat of SPO

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            cfg.BOX.FG_IOU_THRESH,
            cfg.BOX.BG_IOU_THRESH,
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            cfg.BOX.BATCH_SIZE_PER_IMAGE, cfg.BOX.POSITIVE_FRACTION)

        self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_SO, cfg.MODEL.POSITIVE_FRACTION_SO)

        self.fg_bg_sampler_rlp = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_REL,
            cfg.MODEL.POSITIVE_FRACTION_REL)

        bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
Пример #7
0
    def __init__(self,
                 backbone: ImageEncoder,
                 positive_fraction: float = 0.5,
                 match_thresh_low: float = 0.3,
                 match_thresh_high: float = 0.7,
                 anchor_sizes: List[int] = (128, 256, 512),
                 anchor_aspect_ratios: List[float] = (0.5, 1.0, 2.0),
                 batch_size_per_image: int = 256,
                 pre_nms_top_n: int = 6000,
                 post_nms_top_n: int = 300,
                 nms_thresh: float = 0.7,
                 min_size: int = 0,
                 fpn_post_nms_top_n: int = 1000,
                 fpn_post_nms_per_batch: int = True,
                 allow_low_quality_matches: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator()) -> None:
        super(RPN, self).__init__(None)
        self._rpn_head = RPNHead(256, 3)
        self.min_size = min_size
        self.pre_nms_top_n = pre_nms_top_n
        self.nms_thresh = nms_thresh
        self.post_nms_top_n = post_nms_top_n

        # the BoxCoder just converts the relative regression offsets into absolute
        # coordinates
        self.box_coder = det_utils.BoxCoder(weights=(1., 1., 1., 1.))

        # sampler is responsible for selecting a subset of anchor boxes for computing the loss
        # this makes sure each batch has reasonable balance of foreground/background labels
        # it selects `batch_size_per_image` total boxes
        self.sampler = det_utils.BalancedPositiveNegativeSampler(batch_size_per_image,
                                                                 positive_fraction)

        # matcher decides if an anchor box is a foreground or background based on how much
        # it overlaps with the nearest target box
        self.proposal_matcher = det_utils.Matcher(
                match_thresh_high,
                match_thresh_low,
                allow_low_quality_matches=allow_low_quality_matches)

        self.backbone = backbone
        self.anchor_generator = AnchorGenerator(anchor_sizes, anchor_aspect_ratios)
        self.num_anchors = self.anchor_generator.num_anchors_per_location()[0]

        self._loss_meters = {'rpn_cls_loss': Average(), 'rpn_reg_loss': Average()}

        initializer(self)
Пример #8
0
    def __init__(
            self,
            box_roi_pool,
            box_head,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights,
            # new
            weight_loss=False,
            use_context=False,
            track_embedding=None):

        super(TrackHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)

        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.weight_loss = weight_loss
        self.use_context = use_context
        self.track_embedding = track_embedding
    def __init__(
            self,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights):
        super(SSDHead, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_predictor = box_predictor
Пример #10
0
    def __init__(
        self,
        box_roi_pool,
        box_head,
        box_predictor,
        # Faster R-CNN training
        fg_iou_thresh,
        bg_iou_thresh,
        batch_size_per_image,
        positive_fraction,
        bbox_reg_weights,
        # Faster R-CNN inference
        score_thresh,
        nms_thresh,
        detections_per_img,
        out_channels,
        # Mask
        mask_roi_pool=None,
        mask_head=None,
        mask_predictor=None,
        keypoint_roi_pool=None,
        keypoint_head=None,
        keypoint_predictor=None,
        pose_mean=None,
        pose_stddev=None,
        threed_68_points=None,
        threed_5_points=None,
        bbox_x_factor=1.1,
        bbox_y_factor=1.1,
        expand_forehead=0.3,
    ):
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        num_classes = 2
        self.class_roi_pool = MultiScaleRoIAlign(
            featmap_names=["0", "1", "2", "3"],
            output_size=7,
            sampling_ratio=2)
        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        self.class_head = TwoMLPHead(out_channels * resolution**2,
                                     representation_size)
        self.class_predictor = FastRCNNClassPredictor(representation_size,
                                                      num_classes)
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor

        self.pose_mean = pose_mean
        self.pose_stddev = pose_stddev
        self.threed_68_points = threed_68_points
        self.threed_5_points = threed_5_points

        self.bbox_x_factor = bbox_x_factor
        self.bbox_y_factor = bbox_y_factor
        self.expand_forehead = expand_forehead
    def __init__(
            self,
            box_roi_pool,
            box_head,
            box_predictor,
            # Faster R-CNN training
            fg_iou_thresh,
            bg_iou_thresh,
            batch_size_per_image,
            positive_fraction,
            bbox_reg_weights,
            # Faster R-CNN inference
            score_thresh,
            nms_thresh,
            detections_per_img,
            # Mask
            mask_roi_pool=None,
            mask_head=None,
            mask_predictor=None,
            keypoint_roi_pool=None,
            keypoint_head=None,
            keypoint_predictor=None,
            # new
            use_soft_nms=False,
            weight_loss=False,
            use_context=False,
            use_track_branch=False,
            track_embedding=None):

        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)

        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img

        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor

        self.weight_loss = weight_loss
        self.use_soft_nms = use_soft_nms
        self.use_context = use_context
    def __init__(self,
                 out_channels,
                 num_classes,
                 input_mode,
                 acf_head,
                 fg_iou_thresh=0.5,
                 bg_iou_thresh=0.5,
                 batch_size_per_image=512,
                 positive_fraction=0.25,
                 bbox_reg_weights=None,
                 box_score_thresh=0.05,
                 box_nms_thresh=0.5,
                 box_detections_per_img=100):
        super(RoIHeadsExtend, self).__init__()

        self.in_channels = out_channels
        self.input_mode = input_mode
        self.score_thresh = box_score_thresh
        self.nms_thresh = box_nms_thresh
        self.detections_per_img = box_detections_per_img
        self.fg_iou_thresh = fg_iou_thresh
        self.bg_iou_thresh = bg_iou_thresh
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction
        self.num_classes = num_classes

        # Detection
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                               output_size=7,
                                               sampling_ratio=2)

        representation_size = 1024
        resolution = self.box_roi_pool.output_size[0]
        self.box_head = TwoMLPHead(out_channels * resolution**2,
                                   representation_size)

        self.box_predictor = FastRCNNPredictor(representation_size,
                                               num_classes)

        # Segmentation
        self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                                  output_size=14,
                                                  sampling_ratio=2)
        resolution = self.shared_roi_pool.output_size[0]

        mask_layers = (256, 256, 256, 256, 256, 256, 256, 256)
        mask_dilation = 1
        self.mask_head = MaskRCNNHeads(out_channels, mask_layers,
                                       mask_dilation)

        mask_predictor_in_channels = 256  # == mask_layers[-1]
        mask_dim_reduced = 256
        self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                mask_dim_reduced, num_classes)

        self.with_paf_branch = True
        if self.with_paf_branch:
            self.paf_head = MaskRCNNHeads(out_channels, mask_layers,
                                          mask_dilation)
            self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                   mask_dim_reduced,
                                                   2 * (num_classes - 1))

        if self.input_mode == config.INPUT_RGBD:
            self.attention_block = ContextBlock(256, 2)
            self.global_feature_dim = 256
            self.with_3d_keypoints = True
            self.with_axis_keypoints = False
            self.regress_axis = False
            self.estimate_norm_vector = False
            if acf_head == 'endpoints':
                self.with_axis_keypoints = True
            elif acf_head == 'scatters':
                self.regress_axis = True
            elif acf_head == 'norm_vector':
                self.estimate_norm_vector = True
            else:
                print("Don't assign a vaild acf head")
                exit()
            keypoint_layers = (256, ) * 4
            self.keypoint_dim_reduced = keypoint_layers[-1]
            if self.with_3d_keypoints:
                self.vote_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.vote_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
            if self.with_axis_keypoints:
                self.orientation_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")

                self.orientation_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 6 * (num_classes - 1))

            if self.regress_axis:
                self.axis_head = Vote_Kpoints_head(self.global_feature_dim,
                                                   keypoint_layers, "conv2d")
                self.axis_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 4 * (num_classes - 1))

            if self.estimate_norm_vector:
                self.norm_vector_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.norm_vector_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
Пример #13
0
    def __init__(
        self,
        box_roi_pool,
        box_head,
        box_predictor,
        # Faster R-CNN training
        fg_iou_thresh,
        bg_iou_thresh,
        batch_size_per_image,
        positive_fraction,
        bbox_reg_weights,
        # Faster R-CNN inference
        score_thresh,
        nms_thresh,
        detections_per_img,
        # Mask
        mask_roi_pool=None,
        mask_head=None,
        mask_predictor=None,
        keypoint_roi_pool=None,
        keypoint_head=None,
        keypoint_predictor=None,
    ):
        super(CascadeRoIHeads, self).__init__()

        self.num_cascade_stages = len(box_head)
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh[0],
            bg_iou_thresh[0],
            allow_low_quality_matches=False)

        self.proposal_matchers = []
        for i in range(3):
            proposal_matcher = det_utils.Matcher(
                fg_iou_thresh[i],
                bg_iou_thresh[i],
                allow_low_quality_matches=False)
            self.proposal_matchers.append(proposal_matcher)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = [(10., 10., 5., 5.), (20., 20., 10., 10.),
                                (30., 30., 15., 15.)]
        self.box_coders = []
        for i in range(3):
            self.box_coders.append(det_utils.BoxCoder(bbox_reg_weights[i]))

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img

        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor