def output_shape(self): """ Returns: ShapeSpec: the output feature shape """ o = self._output_size if isinstance(o, int): return ShapeSpec(channels=o) else: return ShapeSpec(channels=o[0], height=o[1], width=o[2])
def _init_point_head(self, cfg, input_shape): # fmt: off self.mask_point_on = True # always on assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS # next two parameters are use in the adaptive subdivions inference procedure self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS # fmt: on in_channels = int( np.sum([ input_shape[f].channels for f in self.mask_point_in_features ])) self.point_head = build_point_head( cfg, ShapeSpec(channels=in_channels, width=1, height=1)) self.num_params = self.point_head.num_params # inference parameters self.mask_point_subdivision_init_resolution = int( math.sqrt(self.mask_point_subdivision_num_points)) assert (self.mask_point_subdivision_init_resolution * self.mask_point_subdivision_init_resolution == self.mask_point_subdivision_num_points)
def _init_point_head(self, cfg, input_shape): # fmt: off self.mask_point_on = cfg.MODEL.ROI_MASK_HEAD.POINT_HEAD_ON if not self.mask_point_on: return assert cfg.MODEL.ROI_HEADS.NUM_CLASSES == cfg.MODEL.POINT_HEAD.NUM_CLASSES self.mask_point_in_features = cfg.MODEL.POINT_HEAD.IN_FEATURES self.mask_point_train_num_points = cfg.MODEL.POINT_HEAD.TRAIN_NUM_POINTS self.mask_point_oversample_ratio = cfg.MODEL.POINT_HEAD.OVERSAMPLE_RATIO self.mask_point_importance_sample_ratio = cfg.MODEL.POINT_HEAD.IMPORTANCE_SAMPLE_RATIO # next three parameters are use in the adaptive subdivions inference procedure self.mask_point_subdivision_init_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION self.mask_point_subdivision_steps = cfg.MODEL.POINT_HEAD.SUBDIVISION_STEPS self.mask_point_subdivision_num_points = cfg.MODEL.POINT_HEAD.SUBDIVISION_NUM_POINTS # fmt: on in_channels = int( np.sum([ input_shape[f].channels for f in self.mask_point_in_features ])) self.point_head = build_point_head( cfg, ShapeSpec(channels=in_channels, width=1, height=1)) # An optimization to skip unused subdivision steps: if after subdivision, all pixels on # the mask will be selected and recomputed anyway, we should just double our init_resolution while (4 * self.mask_point_subdivision_init_resolution**2 <= self.mask_point_subdivision_num_points): self.mask_point_subdivision_init_resolution *= 2 self.mask_point_subdivision_steps -= 1
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() self._feature_scales = { k: 1.0 / v.stride for k, v in input_shape.items() } # point head self._init_point_head(cfg, input_shape) # coarse mask head self.roi_pooler_in_features = cfg.MODEL.ROI_MASK_HEAD.IN_FEATURES self.roi_pooler_size = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION self._feature_scales = { k: 1.0 / v.stride for k, v in input_shape.items() } in_channels = np.sum( [input_shape[f].channels for f in self.roi_pooler_in_features]) self._init_roi_head( cfg, ShapeSpec( channels=in_channels, width=self.roi_pooler_size, height=self.roi_pooler_size, ), )
def output_shape(self): return { name: ShapeSpec( channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] ) for name in self._out_features }
def _init_keypoint_head(cls, cfg, input_shape): if not cfg.MODEL.KEYPOINT_ON: return {} # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) # noqa sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_TYPE # fmt: on in_channels = [input_shape[f].channels for f in in_features][0] ret = {"keypoint_in_features": in_features} ret["keypoint_pooler"] = (ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) if pooler_type else None) if pooler_type: shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) else: shape = {f: input_shape[f] for f in in_features} ret["keypoint_head"] = build_keypoint_head(cfg, shape) return ret
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on assert pooler_type in ["ROIAlignRotated"], pooler_type # assume all channel counts are equal in_channels = [input_shape[f].channels for f in in_features][0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) # This line is the only difference v.s. StandardROIHeads box_predictor = RotatedFastRCNNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def output_shape(self): """ Returns: dict[str->ShapeSpec] """ # this is a backward-compatible default return { name: ShapeSpec( channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] ) for name in self._out_features }
def from_config(cls, cfg, input_shape): # fmt: off ret = super().from_config(cfg) in_features = ret["in_features"] = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE pooler_scales = (1.0 / input_shape[in_features[0]].stride, ) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO mask_on = cfg.MODEL.MASK_ON # fmt: on assert not cfg.MODEL.KEYPOINT_ON assert len(in_features) == 1 ret["pooler"] = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Compatbility with old moco code. Might be useful. # See notes in StandardROIHeads.from_config if not inspect.ismethod(cls._build_res5_block): logger.warning("The behavior of _build_res5_block may change. " "Please do not depend on private methods.") cls._build_res5_block = classmethod(cls._build_res5_block) ret["res5"], out_channels = cls._build_res5_block(cfg) ret["box_predictor"] = FastRCNNOutputLayers( cfg, ShapeSpec(channels=out_channels, height=1, width=1)) if mask_on: ret["mask_head"] = build_mask_head( cfg, ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution), ) return ret
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE cascade_bbox_reg_weights = cfg.MODEL.ROI_BOX_CASCADE_HEAD.BBOX_REG_WEIGHTS cascade_ious = cfg.MODEL.ROI_BOX_CASCADE_HEAD.IOUS assert len(cascade_bbox_reg_weights) == len(cascade_ious) assert cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, \ "CascadeROIHeads only support class-agnostic regression now!" assert cascade_ious[0] == cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS[0] # fmt: on in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) pooled_shape = ShapeSpec(channels=in_channels, width=pooler_resolution, height=pooler_resolution) box_heads, box_predictors, proposal_matchers = [], [], [] for match_iou, bbox_reg_weights in zip(cascade_ious, cascade_bbox_reg_weights): box_head = build_box_head(cfg, pooled_shape) box_heads.append(box_head) box_predictors.append( FastRCNNOutputLayers( cfg, box_head.output_shape, box2box_transform=Box2BoxTransform( weights=bbox_reg_weights), )) proposal_matchers.append( Matcher([match_iou], [0, 1], allow_low_quality_matches=False)) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_heads": box_heads, "box_predictors": box_predictors, "proposal_matchers": proposal_matchers, }
def build_backbone(cfg, input_shape=None): """ Build a backbone from `cfg.MODEL.BACKBONE.NAME`. Returns: an instance of :class:`Backbone` """ if input_shape is None: input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) backbone_name = cfg.MODEL.BACKBONE.NAME backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) assert isinstance(backbone, Backbone) return backbone
def _init_box_head(cls, cfg, input_shape): # fmt: off in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE # fmt: on # If StandardROIHeads is applied on multiple feature maps (as in FPN), # then we share the same predictors and therefore the channel counts must be the same in_channels = [input_shape[f].channels for f in in_features] # Check all channel counts are equal assert len(set(in_channels)) == 1, in_channels in_channels = in_channels[0] box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) # Here we split "box head" and "box predictor", which is mainly due to historical reasons. # They are used together so the "box predictor" layers should be part of the "box head". # New subclasses of ROIHeads do not need "box predictor"s. box_head = build_box_head( cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) box_predictor = FastRCNNOutputLayers(cfg, box_head.output_shape) return { "box_in_features": in_features, "box_pooler": box_pooler, "box_head": box_head, "box_predictor": box_predictor, }
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) self.num_classes = num_classes input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = nn.Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = nn.Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = { "loss_cls": loss_weight, "loss_box_reg": loss_weight } self.loss_weight = loss_weight