示例#1
0
    def __init__(self,
                 input_shape,
                 num_classes,
                 cls_agnostic_bbox_reg,
                 box_dim=4):
        """
        Args:
            input_shape (ShapeSpec): shape of the input feature
            num_classes (int): number of foreground classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            box_dim (int): the dimension of bounding boxes.
                Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatbility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)
示例#2
0
    def __init__(self,
                 input_size,
                 num_classes,
                 cls_agnostic_bbox_reg,
                 box_dim=4):
        """
        Args:
            input_size (int): channels, or (channels, height, width)
            num_classes (int): number of foreground classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            box_dim (int): the dimension of bounding boxes.
                Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes
        """
        super(FastRCNNOutputLayers, self).__init__()

        if not isinstance(input_size, int):
            input_size = np.prod(input_size)

        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        # unclear: class agnostic 到底是什么?
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)
示例#3
0
    def __init__(
        self,
        input_shape,
        num_classes,
        pos_weights,
        test_score_thresh=0.0,
        test_topk_per_image=100,
    ):
        """
        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            num_classes (int): number of action classes
            test_score_thresh (float): threshold to filter predictions results.
            test_topk_per_image (int): number of top predictions to produce per image.
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatbility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes. The input should be
        # features from person, object and union region. Thus, the input size * 3.
        self.cls_fc1 = Linear(input_size * 3, input_size)
        self.cls_score = Linear(input_size, num_classes)

        for layer in [self.cls_fc1, self.cls_score]:
            nn.init.normal_(layer.weight, std=0.01)
            nn.init.constant_(layer.bias, 0)

        self.test_score_thresh = test_score_thresh
        self.test_topk_per_image = test_topk_per_image
        self.pos_weights = pos_weights
 def __init__(self, **kwargs):
     """
     NOTE: this interface is experimental.
     """
     super().__init__(**kwargs)
     self.z_pred = Linear(self.input_size, 1)
     self.tilt_pred = Linear(self.input_size, 1)
示例#5
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        *,
        box2box_transform,
        num_classes: int,
        test_score_thresh: float = 0.0,
        test_nms_thresh: float = 0.5,
        test_topk_per_image: int = 100,
        cls_agnostic_bbox_reg: bool = False,
        smooth_l1_beta: float = 0.0,
        box_reg_loss_type: str = "smooth_l1",
        loss_weight: Union[float, Dict[str, float]] = 1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                all losses, or a dict of individual weightings. Valid dict keys are:
                    * "loss_cls": applied to classification loss
                    * "loss_box_reg": applied to box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        self.num_classes = num_classes
        input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1)
        # prediction layer for num_classes foreground classes and one background class (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        if isinstance(loss_weight, float):
            loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight}
        self.loss_weight = loss_weight
    def __init__(
        self,
        input_shape,
        *,
        box2box_transform,
        num_classes,
        num_attributes,
        cls_agnostic_bbox_reg=False,
        smooth_l1_beta=0.0,
        test_score_thresh=0.0,
        test_nms_thresh=0.5,
        test_topk_per_image=100,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss.
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
        """
        super().__init__(input_shape,
                         box2box_transform=box2box_transform,
                         num_classes=num_classes)
        if isinstance(input_shape, int):  # some backward compatbility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)

        # Add attribute branch
        self.attr_scores = Linear(input_size, num_attributes)

        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.attr_scores.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.attr_scores, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
示例#7
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_fc: the number of fc layers
            fc_dim: the dimension of the fc layers
        """
        super().__init__()

        # fmt: off
        num_fc = cfg.MODEL.HOI_BOX_HEAD.NUM_FC
        fc_dim = cfg.MODEL.HOI_BOX_HEAD.FC_DIM
        # fmt: on
        assert num_fc > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)
        self.fcs = []
        for k in range(num_fc):
            fc = Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
示例#8
0
 def __init__(self, cfg, input_shape):
     super().__init__()
     in_features       = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.IN_FEATURES
     pooler_resolution = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_RESOLUTION
     pooler_scales     = tuple(1.0 / input_shape[k].stride for k in in_features)
     sampling_ratio    = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_SAMPLING_RATIO
     pooler_type       = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_TYPE
     in_channels = [input_shape[f].channels for f in in_features]
     in_channels = in_channels[0]
     self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
     self.box_in_features = in_features
     self.meta_box_pooler = ROIPooler(
         output_size=pooler_resolution,
         scales=pooler_scales,
         sampling_ratio=sampling_ratio,
         pooler_type=pooler_type,
     )
     self.meta_box_head = build_box_head(cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution))
     input_shape_box = self.meta_box_head.output_shape
     if isinstance(input_shape_box, int):  # some backward compatibility
         input_shape_box = ShapeSpec(channels=input_shape_box)
     input_size = input_shape_box.channels * (input_shape_box.width or 1) * (input_shape_box.height or 1)
     self.input_size = input_size
     self.pi_normalizer = 0.5 * input_size * np.log(2 * np.pi)
     self.rank_loss_classifier = Linear(input_size, self.num_classes + 1)
     nn.init.normal_(self.rank_loss_classifier.weight, std=0.01)
     nn.init.constant_(self.rank_loss_classifier.bias, 0.0)
示例#9
0
 def __init__(self, cfg, input_shape):
     super().__init__(cfg, input_shape)
     del self.rank_loss_classifier
     self.sim_matrix = Linear(self.input_size, self.input_size, bias=False)
     nn.init.constant_(self.sim_matrix.weight, 0.)
     with torch.no_grad():
         self.sim_matrix.weight.fill_diagonal_(1.)
示例#10
0
    def __init__(self, input_shape, fine_bone_name, fine_bone_emb_dim,
                 std_category_num, std_cls_loss_type, arc_softmax_loss_weights,
                 **kwargs):
        super(TripleBranchOutputLayer, self).__init__(
            ShapeSpec(channels=input_shape.channels, width=1, height=1),
            **kwargs)
        self.std_category_num = std_category_num
        self.std_cls_loss_type = std_cls_loss_type
        self.arc_softmax_loss_weights = arc_softmax_loss_weights

        self.input_channels = input_shape.channels

        # 新增加的第三个分类分支: 预测是否标准
        # @Will Lee, 标准预测部分不考虑bg,因为大分类分支已经做了这个工作
        self.fine_bone = self.build_fine_bone(fine_bone_name,
                                              emb_dim=fine_bone_emb_dim)
        self.standard_cls_score = Linear(fine_bone_emb_dim,
                                         self.std_category_num)
        for name, param in self.fine_bone.named_parameters():
            if 'weight' in name:
                nn.init.normal_(param, std=0.01)
            if 'bias' in name:
                nn.init.constant_(param, 0)
        nn.init.normal_(self.standard_cls_score.weight, std=0.01)
        nn.init.constant_(self.standard_cls_score.bias, 0)
示例#11
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__()

        # fmt: off
        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
        num_fc = 3
        fc_dim_regr = [256, 256, 2]
        fc_dim_cls = [256, 256, cfg.K * cfg.K * 2]
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM
        # fmt: on
        assert num_fc > 0

        self._output_size_regr = (input_shape.channels, input_shape.height,
                                  input_shape.width)
        self._output_size_cls = (input_shape.channels, input_shape.height,
                                 input_shape.width)

        self.fcs_regr = []
        self.fcs_cls = []
        self.fc_shared = Linear(np.prod(self._output_size_regr),
                                fc_dim_regr[0])
        self._output_size_regr = fc_dim_regr[0]
        self._output_size_cls = fc_dim_cls[0]
        for k in range(num_fc - 1):
            fc_regr = Linear(np.prod(self._output_size_regr),
                             fc_dim_regr[k + 1])
            fc_cls = Linear(np.prod(self._output_size_cls), fc_dim_cls[k + 1])
            self.add_module("fc_regr{}".format(k + 1), fc_regr)
            self.add_module("fc_cls{}".format(k + 1), fc_cls)
            self.fcs_regr.append(fc_regr)
            self.fcs_cls.append(fc_cls)
            self._output_size_regr = fc_dim_regr[k + 1]
            self._output_size_cls = fc_dim_cls[k + 1]

        weight_init.c2_xavier_fill(self.fc_shared)
        for layer in self.fcs_regr:
            weight_init.c2_xavier_fill(layer)
        for layer in self.fcs_cls:
            weight_init.c2_xavier_fill(layer)
示例#12
0
    def __init__(self, cfg, input_shape):
        """
        Args:
            cfg
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss.
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
        """
        super(BoxOutputLayers, self).__init__()
        # fmt: off
        self.box2box_transform = Box2BoxTransform(
            weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG
        self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA
        self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
        self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
        self.test_topk_per_image = cfg.TEST.DETECTIONS_PER_IMAGE
        self.zero_shot_on = cfg.ZERO_SHOT.ZERO_SHOT_ON
        # fmt: on

        if isinstance(input_shape, int):  # some backward compatbility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.cls_score = Linear(input_size, self.num_classes + 1)
        num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else self.num_classes
        box_dim = len(self.box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        if self.zero_shot_on:
            self._init_zero_shot(cfg)
示例#13
0
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 conv_dims: List[int],
                 fc_dims: List[int],
                 conv_norm=""):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature.
            conv_dims (list[int]): the output dimensions of the conv layers
            fc_dims (list[int]): the output dimensions of the fc layers
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
        """

        logger = logging.getLogger(__name__)
        logger.info("FastRCNNConvFCHead input_shape: {}".format(input_shape))
        logger.info("FastRCNNConvFCHead conv_dims: {}".format(conv_dims))
        logger.info("FastRCNNConvFCHead fc_dims: {}".format(fc_dims))
        logger.info("FastRCNNConvFCHead conv_norm: {}".format(conv_norm))

        super().__init__()
        assert len(conv_dims) + len(fc_dims) > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k, conv_dim in enumerate(conv_dims):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k, fc_dim in enumerate(fc_dims):
            fc = Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
    def __init__(self,
                 input_shape: ShapeSpec,
                 *,
                 conv_dims: List[int],
                 fc_dims: List[int],
                 conv_norm=""):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature.
            conv_dims (list[int]): the output dimensions of the conv layers
            fc_dims (list[int]): the output dimensions of the fc layers
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
        """
        super().__init__()
        assert len(conv_dims) + len(fc_dims) > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k, conv_dim in enumerate(conv_dims):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=nn.ReLU(inplace=True),
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k, fc_dim in enumerate(fc_dims):
            if k == 0:
                self.add_module("flatten", nn.Flatten())
            fc = Linear(int(np.prod(self._output_size)), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.add_module("fc_relu{}".format(k + 1), nn.ReLU(inplace=True))
            self.add_module("fc_dropout{}".format(k + 1),
                            nn.Dropout(p=0.5, inplace=False))
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            # weight_init.c2_xavier_fill(layer)
            torch.nn.init.normal_(layer.weight, std=0.005)
            torch.nn.init.constant_(layer.bias, 0.1)
示例#15
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 loss_weight=1.0,
                 weak_detector_head=None,
                 regression_branch=False,
                 terms={},
                 freeze_layers=[],
                 embedding_path=''):
        super(SupervisedDetectorOutputsFineTune,
              self).__init__(input_shape=input_shape,
                             box2box_transform=box2box_transform,
                             num_classes=num_classes,
                             test_score_thresh=test_score_thresh,
                             test_nms_thresh=test_nms_thresh,
                             test_topk_per_image=test_topk_per_image,
                             cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
                             smooth_l1_beta=smooth_l1_beta,
                             box_reg_loss_type=box_reg_loss_type,
                             loss_weight=loss_weight,
                             weak_detector_head=weak_detector_head,
                             regression_branch=regression_branch,
                             terms=terms,
                             freeze_layers=freeze_layers,
                             embedding_path=embedding_path)

        # Define delta predictors
        self.cls_score_ft = Linear(self.input_size, self.num_classes + 1)
        self.bbox_pred_ft = Linear(self.input_size,
                                   self.num_bbox_reg_classes * self.box_dim)
        # Init Predictors
        for l in [self.cls_score_ft, self.bbox_pred_ft]:
            nn.init.constant_(l.weight, 0.)
            nn.init.constant_(l.bias, 0.)
示例#16
0
    def __init__(self, cfg, input_shape: ShapeSpec):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__()

        # fmt: off
        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM
        dropout = cfg.MODEL.ROI_BOX_HEAD.DROP_OUT
        # fmt: on
        assert num_conv + num_fc > 0
        # Jamie
        self.dropout_en = dropout
        if self.dropout_en:
            self.dropout = nn.Dropout(p=0.5)

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k in range(num_conv):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k in range(num_fc):
            fc = Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
示例#17
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        num_conv: int,
        conv_dim: int,
        num_fc: int,
        fc_dim: int,
        conv_norm="",
    ):
        """
        Args:
            input_shape (ShapeSpec): shape of the input feature.
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the output dimension of the conv/fc layers
            conv_norm (str or callable): normalization for the conv layers.
                See :func:`detectron2.layers.get_norm` for supported types.
        """
        super().__init__()
        assert num_conv + num_fc > 0

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k in range(num_conv):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k in range(num_fc):
            fc = Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
示例#18
0
 def __init__(self,
              in_features,
              out_classes,
              mode='softmax',
              s=30.0,
              m=0.50,
              easy_margin=False):
     super(ArcSoftLayer, self).__init__()
     self.mode = mode
     assert mode in ('arc', 'softmax', 'arc+softmax', 'cross_entropy')
     if 'arc' in mode:
         self.arc_ly = ArcLayer(in_features, out_classes, s, m, easy_margin)
     if 'softmax' in mode or 'cross_entropy' in mode:
         self.soft_ly = Linear(in_features, out_classes)
         nn.init.normal_(self.soft_ly.weight, std=0.01)
         nn.init.constant_(self.soft_ly.bias, 0)
 def __init__(self,
              input_shape: ShapeSpec,
              *args,
              num_classes: int,
              prior_prob: float = 0.001,
              **kwargs):
     super().__init__(input_shape=input_shape,
                      *args,
                      num_classes=num_classes,
                      **kwargs)
     # re-init the out dimension of the last FC layer to exclude the bg class
     if isinstance(input_shape, int):  # some backward compatibility
         input_shape = ShapeSpec(channels=input_shape)
     input_size = input_shape.channels * (input_shape.width
                                          or 1) * (input_shape.height or 1)
     self.cls_score = Linear(input_size,
                             num_classes)  # no +1 since no BG class
     nn.init.normal_(self.cls_score.weight, std=0.01)
     # init the bias with prior prob for stabler training
     bias_value = -math.log((1 - prior_prob) / prior_prob)
     nn.init.constant_(self.cls_score.bias, bias_value)
示例#20
0
    def __init__(self, input_shape, conv_dims, fc_dims, conv_norm=""):
        super().__init__()
        assert len(conv_dims) + len(fc_dims) > 0

        self._output_size = (input_shape[1], input_shape[2], input_shape[3])

        self.conv_norm_relus = []
        for k, conv_dim in enumerate(conv_dims):
            conv = Conv2d(
                self._output_size[0],
                conv_dim,
                kernel_size=3,
                padding=1,
                bias=not conv_norm,
                norm=get_norm(conv_norm, conv_dim),
                activation=nn.ReLU(),
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
            self._output_size = (conv_dim, self._output_size[1],
                                 self._output_size[2])

        self.fcs = []
        for k, fc_dim in enumerate(fc_dims):
            if k == 0:
                self.add_module("flatten", nn.Flatten())
            fc = Linear(int(np.prod(self._output_size)), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.add_module("fc_relu{}".format(k + 1), nn.ReLU())
            self.fcs.append(fc)
            self._output_size = fc_dim

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)
示例#21
0
    def build_std_bone(cls,
                       standard_cls_branch_name,
                       input_shape,
                       emb_dim=512,
                       reduction=8):
        # 2fc 7*7*256 --> flatten --> 512
        # DWSE 7*7*256--> 7*7*512 --> 4*4*512 --> flatten --> 512
        if standard_cls_branch_name == '2fc':
            std_cls_branch = nn.Sequential(
                Flatten(),
                Linear(
                    input_shape.height * input_shape.width *
                    input_shape.channels, emb_dim),
                nn.ReLU(inplace=True),
                # Linear(emb_dim, std_num_classes),
            )
        elif standard_cls_branch_name == 'DWSE':
            std_cls_branch = nn.Sequential(
                # depthwise conv 膨胀卷积操作
                nn.Conv2d(input_shape.channels,
                          input_shape.channels,
                          kernel_size=(3, 3),
                          padding=1,
                          groups=input_shape.channels),
                nn.BatchNorm2d(input_shape.channels),
                # nn.ReLU(inplace=True),
                # 通道变换 : 3x3-->1x1升通道
                nn.Conv2d(input_shape.channels,
                          input_shape.channels, (3, 3),
                          padding=1,
                          stride=1),
                nn.BatchNorm2d(input_shape.channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(input_shape.channels,
                          input_shape.channels * 2, (1, 1),
                          padding=0,
                          stride=1),
                nn.BatchNorm2d(input_shape.channels * 2),
                nn.ReLU(inplace=True),
                # SE 模块
                SELayer(input_shape.channels * 2, reduction=reduction),
                nn.ZeroPad2d(padding=(0, 1, 0, 1)),  # ?x7x7 --> ?x8x8
                nn.MaxPool2d((2, 2), stride=2),
                # embedding
                Flatten(),
                Linear(4 * 4 * input_shape.channels * 2, emb_dim),
                nn.ReLU(inplace=True),
                # Linear(emb_dim, std_num_classes)
            )
        elif standard_cls_branch_name == '131ConvSE':
            std_cls_branch = nn.Sequential(
                # 升通道
                nn.Conv2d(input_shape.channels, input_shape.channels, (1, 1)),
                nn.BatchNorm2d(input_shape.channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(input_shape.channels,
                          input_shape.channels, (3, 3),
                          padding=1,
                          stride=1),
                nn.BatchNorm2d(input_shape.channels),
                nn.ReLU(inplace=True),
                nn.Conv2d(input_shape.channels, input_shape.channels * 2,
                          (1, 1)),
                nn.BatchNorm2d(input_shape.channels * 2),
                nn.ReLU(inplace=True),

                # SE 模块
                SELayer(input_shape.channels * 2, reduction=reduction),
                # MaxPool降低分辨率
                nn.ZeroPad2d(padding=(0, 1, 0, 1)),  # ?x7x7 --> ?x8x8
                nn.MaxPool2d((2, 2), stride=2),
                # embedding
                Flatten(),
                nn.Linear(4 * 4 * input_shape.channels * 2, emb_dim),
                nn.ReLU(inplace=True))
        else:
            raise NotImplementedError('目前标准分类分支网络构建,仅支持2fc、DWSE、131ConvSE三种')

        return std_cls_branch
示例#22
0
    def __init__(
        self,
        input_shape,
        *,
        standard_cls_bone,
        std_num_classes,
        std_cls_emb_dim,
        box2box_transform,
        num_classes,
        arc_args={},
        test_score_thresh=0.0,
        test_nms_thresh=0.5,
        test_topk_per_image=100,
        category_loss_type='cross_entropy',
        std_cls_loss_type='softmax',
        cls_agnostic_bbox_reg=False,
        smooth_l1_beta=0.0,
        box_reg_loss_type="smooth_l1",
        box_reg_loss_weight=1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super(MlabelStandardFastRCNNOutputLayer2, self).__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        # 大类别分类
        self.category_score = nn.Sequential(
            Flatten(), Linear(input_size, num_classes + 1))
        # box回归
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = nn.Sequential(
            Flatten(), Linear(input_size, num_bbox_reg_classes * box_dim))
        # 细分类
        self.standard_cls_bone = standard_cls_bone
        if std_cls_loss_type == 'softmax':
            self.std_cls_score = Linear(std_cls_emb_dim, std_num_classes + 1)
            nn.init.normal_(self.std_cls_score.weight, std=0.01)
            nn.init.constant_(self.std_cls_score.bias, 0)
        elif std_cls_loss_type == 'arc':
            self.std_cls_score = ArcLayer(std_cls_emb_dim,
                                          std_num_classes + 1,
                                          s=arc_args['s'],
                                          m=arc_args['m'],
                                          easy_margin=arc_args['easy_margin'])
        else:
            raise NotImplementedError('目前仅支持softmax、arc两种模式,暂不支持{}'.format(
                std_cls_loss_type, ))
        for pairs in [
                self.standard_cls_bone.named_parameters(),
                self.category_score.named_parameters(),
                self.bbox_pred.named_parameters()
        ]:
            for name, params in pairs:
                if 'weight' in name:
                    nn.init.normal_(params, std=0.01)
                elif 'bias' in name:
                    nn.init.constant_(params, 0.)
        self.std_cls_loss_type = std_cls_loss_type
        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight
        self.std_cls_loss_type = std_cls_loss_type
        self.category_loss_type = category_loss_type
示例#23
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 box_reg_loss_weight=1.0,
                 add_unlabeled_class=False,
                 label_converter=None,
                 reverse_label_converter=None,
                 num_centroid=256,
                 clustering_interval=1000,
                 cluster_obj_thresh=0.8,
                 coupled_cos_thresh=0.15,
                 coupled_obj_thresh=0.9,
                 cos_thresh=0.15,
                 pos_class_thresh=0.7,
                 nms_thresh=0.3,
                 n_sample=20,
                 output_dir='./'):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.label_converter = label_converter
        self.reverse_label_converter = reverse_label_converter
        self.original_num_classes = len(self.label_converter)
        addition = self.label_converter.max() + torch.arange(num_centroid) + 1
        self.label_converter = torch.cat((self.label_converter, addition))

        if self.reverse_label_converter is not None:
            num_classes = min(num_classes + 1, len(reverse_label_converter))
        num_cls = num_classes

        self.add_unlabeled_class = add_unlabeled_class
        self.num_classes = num_cls

        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1
        box_dim = len(box2box_transform.weights)
        self.cls_score = Linear(input_size, num_cls + num_centroid)
        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)

        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        nn.init.constant_(self.bbox_pred.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight

        self.feature_memory = []
        self.label_memory = []
        self.obj_score_memory = []
        self.path_memory = []
        self.bbox_memory = []

        self.num_centroid = num_centroid
        self.clustering_interval = clustering_interval
        weight = torch.zeros((num_centroid, input_size))
        weight = torch.zeros((num_centroid, 1))
        weight = torch.zeros((num_centroid + num_cls, 1))
        weight[:num_cls] = 1
        self.cls_weight = nn.Embedding(num_centroid + num_cls,
                                       1).from_pretrained(weight, freeze=True)
        self.turn_on = False
        self.step = 1
        self.cluster_count = 1
        self.pseudo_gt = None
        self.n_pseudo_gt = 0

        self.n_sample = n_sample
        self.cluster_obj_thresh = cluster_obj_thresh
        self.cos_thresh = cos_thresh
        self.coupled_cos_thresh = coupled_cos_thresh
        self.coupled_obj_thresh = coupled_obj_thresh
        self.pos_class_thresh = pos_class_thresh
        self.nms_thresh = nms_thresh
        self.pal = np.random.random((1024, 3)) * 255

        self.size_opt = 'lm'

        self.output_dir = output_dir

        g_list = glob.glob(os.path.join(self.output_dir, 'pseudo_gts',
                                        '*.pth'))
        if len(g_list) > 0:
            g_list = [
                int(x.split('/')[-1].replace('.pth', '')) for x in g_list
            ]
            g = max(g_list)
            path = os.path.join(self.output_dir, 'pseudo_gts/{}.pth').format(g)
            self.pseudo_gt = torch.load(path)
            self.n_pseudo_gt = len(self.pseudo_gt)
            self.step = g + 1
            if self.pseudo_gt is not None and len(self.pseudo_gt) > 0:

                label = int(self.pseudo_gt[:, 1].max())
                weight[:label] = 1
                self.cls_weight = nn.Embedding(num_centroid + num_cls,
                                               1).from_pretrained(weight,
                                                                  freeze=True)
示例#24
0
 def init_pred_layers(self):
     cls_score = Linear(self.input_size, self.num_classes + 1)
     bbox_pred = Linear(self.input_size, self.pred_reg)
     return cls_score, bbox_pred
示例#25
0
class FastRCNNOutputLayers_baseline(nn.Module):
    """
    Two linear layers for predicting Fast R-CNN outputs:
      (1) proposal-to-detection box regression deltas
      (2) classification scores
    """
    @configurable
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 box_reg_loss_weight=1.0,
                 add_unlabeled_class=False,
                 label_converter=None,
                 reverse_label_converter=None):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.label_converter = label_converter
        self.reverse_label_converter = reverse_label_converter
        if add_unlabeled_class:  # For old job (before runnning 1027 16:05), it should after the below condition.
            num_classes = num_classes + 1
        if self.reverse_label_converter is not None:
            num_classes = min(num_classes + 1, len(reverse_label_converter))
        num_cls = num_classes
        self.add_unlabeled_class = add_unlabeled_class
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1
        box_dim = len(box2box_transform.weights)
        self.cls_score = Linear(input_size, num_cls)
        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)

        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        nn.init.constant_(self.bbox_pred.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight

    @classmethod
    def from_config(cls,
                    cfg,
                    input_shape,
                    label_converter=None,
                    reverse_label_converter=None):
        return {
            "input_shape":
            input_shape,
            "box2box_transform":
            Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS),
            # fmt: off
            "num_classes":
            cfg.MODEL.ROI_HEADS.NUM_CLASSES,
            "cls_agnostic_bbox_reg":
            cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG,
            "smooth_l1_beta":
            cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA,
            "test_score_thresh":
            cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST,
            "test_nms_thresh":
            cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST,
            "test_topk_per_image":
            cfg.TEST.DETECTIONS_PER_IMAGE,
            "box_reg_loss_type":
            cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE,
            "box_reg_loss_weight":
            cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT,
            "add_unlabeled_class":
            cfg.MODEL.EOPSN.UNLABELED_REGION
            and (not cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION),
            "label_converter":
            label_converter,
            "reverse_label_converter":
            reverse_label_converter,
            # fmt: on
        }

    def forward(self, x):
        """
        Returns:
            Tensor: shape (N,K+1), scores for each of the N box. Each row contains the scores for
                K object categories and 1 background class.
            Tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), or (N,4)
                for class-agnostic regression.
        """
        if x.dim() > 2:
            x = torch.flatten(x, start_dim=1)
        scores = self.cls_score(x)
        proposal_deltas = self.bbox_pred(x)
        return scores, proposal_deltas

    def get_logits(self, x):
        if x.dim() > 2:
            x = torch.flatten(x, start_dim=1)
        scores = self.cls_score.forward_freeze(x)
        return scores

    # TODO: move the implementation to this class.
    def losses(self, predictions, proposals):
        """
        Args:
            predictions: return values of :meth:`forward()`.
            proposals (list[Instances]): proposals that match the features
                that were used to compute predictions.
        """
        scores, proposal_deltas = predictions
        losses = FastRCNNOutputs(
            self.box2box_transform,
            scores,
            proposal_deltas,
            proposals,
            self.smooth_l1_beta,
            self.box_reg_loss_type,
            self.box_reg_loss_weight,
            self.label_converter,
            add_unlabeled_class=self.add_unlabeled_class).losses()
        return losses

    def inference(self, predictions, proposals, use_unknown=False):
        """
        Returns:
            list[Instances]: same as `fast_rcnn_inference`.
            list[Tensor]: same as `fast_rcnn_inference`.
        """
        boxes = self.predict_boxes(predictions, proposals)
        scores = self.predict_probs(predictions, proposals)
        objness_scores = [x.objectness_logits for x in proposals]
        image_shapes = [x.image_size for x in proposals]
        return fast_rcnn_inference(
            boxes,
            scores,
            image_shapes,
            objness_scores,
            self.test_score_thresh,
            self.test_nms_thresh,
            self.test_topk_per_image,
            use_unknown,
            reverse_label_converter=self.reverse_label_converter,
            num_classes=len(self.reverse_label_converter) - 2)

    def predict_boxes_for_gt_classes(self, predictions, proposals):
        """
        Returns:
            list[Tensor]: A list of Tensors of predicted boxes for GT classes in case of
                class-specific box head. Element i of the list has shape (Ri, B), where Ri is
                the number of predicted objects for image i and B is the box dimension (4 or 5)
        """
        if not len(proposals):
            return []
        scores, proposal_deltas = predictions
        proposal_boxes = [p.proposal_boxes for p in proposals]
        proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor
        N, B = proposal_boxes.shape
        predict_boxes = self.box2box_transform.apply_deltas(
            proposal_deltas, proposal_boxes)  # Nx(KxB)

        K = predict_boxes.shape[1] // B
        if K > 1:
            gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0)
            # Some proposals are ignored or have a background class. Their gt_classes
            # cannot be used as index.
            gt_classes = gt_classes.clamp_(0, K - 1)

            predict_boxes = predict_boxes.view(N, K, B)[
                torch.arange(N, dtype=torch.long, device=predict_boxes.device),
                gt_classes]
        num_prop_per_image = [len(p) for p in proposals]
        return predict_boxes.split(num_prop_per_image)

    def predict_boxes(self, predictions, proposals):
        """
        Returns:
            list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes
                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
                the number of predicted objects for image i and B is the box dimension (4 or 5)
        """
        if not len(proposals):
            return []
        _, proposal_deltas = predictions
        num_prop_per_image = [len(p) for p in proposals]
        proposal_boxes = [p.proposal_boxes for p in proposals]
        proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor
        predict_boxes = self.box2box_transform.apply_deltas(
            proposal_deltas, proposal_boxes)  # Nx(KxB)
        return predict_boxes.split(num_prop_per_image)

    def predict_probs(self, predictions, proposals):
        """
        Returns:
            list[Tensor]: A list of Tensors of predicted class probabilities for each image.
                Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
                for image i.
        """
        scores, _ = predictions
        num_inst_per_image = [len(p) for p in proposals]
        probs = F.softmax(scores, dim=-1)
        return probs.split(num_inst_per_image, dim=0)
示例#26
0
    def __init__(self,
                 cfg,
                 input_shape,
                 num_classes,
                 cls_agnostic_bbox_reg,
                 box_dim=4):
        """
        The following attributes are parsed from config:
            num_conv, num_fc: the number of conv/fc layers
            conv_dim/fc_dim: the dimension of the conv/fc layers
            norm: normalization for the conv layers
        """
        super().__init__()

        # fmt: off
        num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
        conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
        num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
        fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
        norm = cfg.MODEL.ROI_BOX_HEAD.NORM
        # fmt: on
        assert num_conv + num_fc > 0

        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        self.num_classes = num_classes
        self.pred_reg = num_bbox_reg_classes * box_dim

        self._output_size = (input_shape.channels, input_shape.height,
                             input_shape.width)

        self.conv_norm_relus = []
        for k in range(num_conv):
            dim_in = conv_dim if self._output_size[
                0] * 2**k >= conv_dim else self._output_size[0] * 2**k
            dim_out = conv_dim if self._output_size[0] * 2**(
                k + 1) >= conv_dim else self._output_size[0] * 2**(k + 1)
            conv = Conv2d(
                dim_in,
                dim_out,
                kernel_size=3,
                padding=1,
                bias=not norm,
                norm=get_norm(norm, dim_out),
                activation=F.relu,
            )
            self.add_module("conv{}".format(k + 1), conv)
            self.conv_norm_relus.append(conv)
        self._output_size = (conv_dim, self._output_size[1],
                             self._output_size[2])

        self.fcs = []
        for k in range(num_fc):
            fc = Linear(np.prod(self._output_size), fc_dim)
            self.add_module("fc{}".format(k + 1), fc)
            self.fcs.append(fc)
            self._output_size = fc_dim

        self.input_size = self.output_size
        if not isinstance(self.input_size, int):
            self.input_size = self.input_size[0]

        self.cls_score, self.bbox_pred = self.init_pred_layers()

        for layer in self.conv_norm_relus:
            weight_init.c2_msra_fill(layer)
        for layer in self.fcs:
            weight_init.c2_xavier_fill(layer)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for layer in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(layer.bias, 0)
示例#27
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 box_reg_loss_weight=1.0,
                 add_unlabeled_class=False,
                 label_converter=None,
                 reverse_label_converter=None):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            box_reg_loss_weight (float): Weight for box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # The prediction layer for num_classes foreground classes and one background class
        # (hence + 1)
        self.label_converter = label_converter
        self.reverse_label_converter = reverse_label_converter
        if add_unlabeled_class:  # For old job (before runnning 1027 16:05), it should after the below condition.
            num_classes = num_classes + 1
        if self.reverse_label_converter is not None:
            num_classes = min(num_classes + 1, len(reverse_label_converter))
        num_cls = num_classes
        self.add_unlabeled_class = add_unlabeled_class
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1
        box_dim = len(box2box_transform.weights)
        self.cls_score = Linear(input_size, num_cls)
        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.constant_(self.cls_score.bias, 0)

        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        nn.init.constant_(self.bbox_pred.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        self.box_reg_loss_weight = box_reg_loss_weight
示例#28
0
    def __init__(
        self,
        input_shape: ShapeSpec,
        *,
        box2box_transform,
        clustering_items_per_class,
        clustering_start_iter,
        clustering_update_mu_iter,
        clustering_momentum,
        clustering_z_dimension,
        enable_clustering,
        prev_intro_cls,
        curr_intro_cls,
        max_iterations,
        output_dir,
        feat_store_path,
        margin,
        num_classes: int,
        test_score_thresh: float = 0.0,
        test_nms_thresh: float = 0.5,
        test_topk_per_image: int = 100,
        cls_agnostic_bbox_reg: bool = False,
        smooth_l1_beta: float = 0.0,
        box_reg_loss_type: str = "smooth_l1",
        loss_weight: Union[float, Dict[str, float]] = 1.0,
    ):
        """
        NOTE: this interface is experimental.

        Args:
            input_shape (ShapeSpec): shape of the input feature to this module
            box2box_transform (Box2BoxTransform or Box2BoxTransformRotated):
            num_classes (int): number of foreground classes
            test_score_thresh (float): threshold to filter predictions results.
            test_nms_thresh (float): NMS threshold for prediction results.
            test_topk_per_image (int): number of top predictions to produce per image.
            cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
            smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                `box_reg_loss_type` is "smooth_l1"
            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
            loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                all losses, or a dict of individual weightings. Valid dict keys are:
                    * "loss_cls": applied to classification loss
                    * "loss_box_reg": applied to box regression loss
        """
        super().__init__()
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        # prediction layer for num_classes foreground classes and one background class (hence + 1)
        self.cls_score = Linear(input_size, num_classes + 1)
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim)

        nn.init.normal_(self.cls_score.weight, std=0.01)
        nn.init.normal_(self.bbox_pred.weight, std=0.001)
        for l in [self.cls_score, self.bbox_pred]:
            nn.init.constant_(l.bias, 0)

        self.box2box_transform = box2box_transform
        self.smooth_l1_beta = smooth_l1_beta
        self.test_score_thresh = test_score_thresh
        self.test_nms_thresh = test_nms_thresh
        self.test_topk_per_image = test_topk_per_image
        self.box_reg_loss_type = box_reg_loss_type
        if isinstance(loss_weight, float):
            loss_weight = {
                "loss_cls": loss_weight,
                "loss_box_reg": loss_weight
            }
        self.loss_weight = loss_weight

        self.num_classes = num_classes
        self.clustering_start_iter = clustering_start_iter
        self.clustering_update_mu_iter = clustering_update_mu_iter
        self.clustering_momentum = clustering_momentum

        self.hingeloss = nn.HingeEmbeddingLoss(2)
        self.enable_clustering = enable_clustering

        self.prev_intro_cls = prev_intro_cls
        self.curr_intro_cls = curr_intro_cls
        self.seen_classes = self.prev_intro_cls + self.curr_intro_cls
        self.invalid_class_range = list(
            range(self.seen_classes, self.num_classes - 1))
        logging.getLogger(__name__).info("Invalid class range: " +
                                         str(self.invalid_class_range))

        self.max_iterations = max_iterations
        self.feature_store_is_stored = False
        self.output_dir = output_dir
        self.feat_store_path = feat_store_path
        self.feature_store_save_loc = os.path.join(self.output_dir,
                                                   self.feat_store_path,
                                                   'feat.pt')

        if os.path.isfile(self.feature_store_save_loc):
            logging.getLogger(
                __name__).info('Trying to load feature store from ' +
                               self.feature_store_save_loc)
            self.feature_store = torch.load(self.feature_store_save_loc)
        else:
            logging.getLogger(__name__).info('Feature store not found in ' +
                                             self.feature_store_save_loc +
                                             '. Creating new feature store.')
            self.feature_store = Store(num_classes + 1,
                                       clustering_items_per_class)
        self.means = [None for _ in range(num_classes + 1)]
        self.margin = margin
示例#29
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 loss_weight=1.0,
                 oicr_iter=3,
                 fg_threshold=0.5,
                 bg_threshold=0.1,
                 freeze_layers=[],
                 embedding_path='',
                 terms={},
                 mode='Pre_Softmax',
                 mil_multiplier=4.0,
                 detector_temp=1.0,
                 classifier_temp=1.0):
        super(FastRCNNOutputsBase,
              self).__init__(input_shape=input_shape,
                             box2box_transform=box2box_transform,
                             num_classes=num_classes,
                             test_score_thresh=test_score_thresh,
                             test_nms_thresh=test_nms_thresh,
                             test_topk_per_image=test_topk_per_image,
                             cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
                             smooth_l1_beta=smooth_l1_beta,
                             box_reg_loss_type=box_reg_loss_type,
                             loss_weight=loss_weight)
        self.num_classes = num_classes
        self.oicr_iter = oicr_iter
        self.fg_threshold = fg_threshold
        self.bg_threshold = bg_threshold
        self.terms = terms
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.box_dim = box_dim
        self.num_bbox_reg_classes = num_bbox_reg_classes
        self.mode = mode
        self.mil_multiplier = mil_multiplier
        self.detector_temp = detector_temp
        self.classifier_temp = classifier_temp
        # Delete instances defined by super
        del self.cls_score
        del self.bbox_pred

        # Define delta predictors
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        self.input_size = input_size
        self.classifier_stream = Linear(input_size, self.num_classes)
        self.detection_stream = Linear(input_size, self.num_classes)
        self.oicr_predictors = nn.ModuleList([
            Linear(input_size, self.num_classes + 1)
            for _ in range(self.oicr_iter)
        ])
        self.cls_score_delta = Linear(input_size, self.num_classes + 1)
        self.bbox_pred_delta = Linear(input_size,
                                      num_bbox_reg_classes * box_dim)

        # Init Predictors
        nn.init.normal_(self.bbox_pred_delta.weight, std=0.001)
        nn.init.normal_(self.classifier_stream.weight, std=0.01)
        nn.init.normal_(self.detection_stream.weight, std=0.01)
        for oicr_iter in range(self.oicr_iter):
            nn.init.normal_(self.oicr_predictors[oicr_iter].weight, std=0.01)
            nn.init.constant_(self.oicr_predictors[oicr_iter].bias, 0.)
        nn.init.constant_(self.cls_score_delta.weight, 0.)
        # nn.init.constant_(self.bbox_pred_delta.weight, 0.)
        for l in [
                self.cls_score_delta, self.bbox_pred_delta,
                self.detection_stream, self.classifier_stream
        ]:
            nn.init.constant_(l.bias, 0.)

        pretrained_embeddings = torch.load(embedding_path)['embeddings']
        self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings,
                                                       freeze=True)
        self._freeze_layers(layers=freeze_layers)
示例#30
0
    def __init__(self,
                 input_shape,
                 *,
                 box2box_transform,
                 num_classes,
                 test_score_thresh=0.0,
                 test_nms_thresh=0.5,
                 test_topk_per_image=100,
                 cls_agnostic_bbox_reg=False,
                 smooth_l1_beta=0.0,
                 box_reg_loss_type="smooth_l1",
                 loss_weight=1.0,
                 weak_detector_head=None,
                 regression_branch=False,
                 terms={},
                 freeze_layers=[],
                 embedding_path=''):
        super(SupervisedDetectorOutputsBase,
              self).__init__(input_shape=input_shape,
                             box2box_transform=box2box_transform,
                             num_classes=num_classes,
                             test_score_thresh=test_score_thresh,
                             test_nms_thresh=test_nms_thresh,
                             test_topk_per_image=test_topk_per_image,
                             cls_agnostic_bbox_reg=cls_agnostic_bbox_reg,
                             smooth_l1_beta=smooth_l1_beta,
                             box_reg_loss_type=box_reg_loss_type,
                             loss_weight=loss_weight)
        self.num_classes = num_classes
        self.terms = terms
        num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes
        box_dim = len(box2box_transform.weights)
        self.box_dim = box_dim
        self.num_bbox_reg_classes = num_bbox_reg_classes
        self.weak_detector_head = weak_detector_head
        self.regression_branch = regression_branch

        # Delete instances defined by super
        del self.cls_score
        del self.bbox_pred

        # Define delta predictors
        if isinstance(input_shape, int):  # some backward compatibility
            input_shape = ShapeSpec(channels=input_shape)
        input_size = input_shape.channels * (input_shape.width
                                             or 1) * (input_shape.height or 1)
        self.input_size = input_size
        self.cls_score_delta = Linear(input_size, self.num_classes + 1)
        self.bbox_pred_delta = Linear(input_size,
                                      num_bbox_reg_classes * box_dim)

        # Init Predictors
        nn.init.constant_(self.cls_score_delta.weight, 0.)
        if not self.regression_branch:
            nn.init.normal_(self.bbox_pred_delta.weight, std=0.001)
        else:
            nn.init.constant_(self.bbox_pred_delta.weight, 0.)
        for l in [self.cls_score_delta, self.bbox_pred_delta]:
            nn.init.constant_(l.bias, 0.)

        pretrained_embeddings = torch.load(embedding_path)['embeddings']
        self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings,
                                                       freeze=True)
        self._freeze_layers(layers=freeze_layers)