def __init__(self, input_shape, num_classes, cls_agnostic_bbox_reg, box_dim=4): """ Args: input_shape (ShapeSpec): shape of the input feature num_classes (int): number of foreground classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression box_dim (int): the dimension of bounding boxes. Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes """ super().__init__() if isinstance(input_shape, int): # some backward compatbility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0)
def __init__(self, input_size, num_classes, cls_agnostic_bbox_reg, box_dim=4): """ Args: input_size (int): channels, or (channels, height, width) num_classes (int): number of foreground classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression box_dim (int): the dimension of bounding boxes. Example box dimensions: 4 for regular XYXY boxes and 5 for rotated XYWHA boxes """ super(FastRCNNOutputLayers, self).__init__() if not isinstance(input_size, int): input_size = np.prod(input_size) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) # unclear: class agnostic 到底是什么? num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0)
def __init__( self, input_shape, num_classes, pos_weights, test_score_thresh=0.0, test_topk_per_image=100, ): """ Args: input_shape (ShapeSpec): shape of the input feature to this module num_classes (int): number of action classes test_score_thresh (float): threshold to filter predictions results. test_topk_per_image (int): number of top predictions to produce per image. """ super().__init__() if isinstance(input_shape, int): # some backward compatbility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes. The input should be # features from person, object and union region. Thus, the input size * 3. self.cls_fc1 = Linear(input_size * 3, input_size) self.cls_score = Linear(input_size, num_classes) for layer in [self.cls_fc1, self.cls_score]: nn.init.normal_(layer.weight, std=0.01) nn.init.constant_(layer.bias, 0) self.test_score_thresh = test_score_thresh self.test_topk_per_image = test_topk_per_image self.pos_weights = pos_weights
def __init__(self, **kwargs): """ NOTE: this interface is experimental. """ super().__init__(**kwargs) self.z_pred = Linear(self.input_size, 1) self.tilt_pred = Linear(self.input_size, 1)
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) self.num_classes = num_classes input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = {"loss_cls": loss_weight, "loss_box_reg": loss_weight} self.loss_weight = loss_weight
def __init__( self, input_shape, *, box2box_transform, num_classes, num_attributes, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. """ super().__init__(input_shape, box2box_transform=box2box_transform, num_classes=num_classes) if isinstance(input_shape, int): # some backward compatbility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) # Add attribute branch self.attr_scores = Linear(input_size, num_attributes) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.attr_scores.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.attr_scores, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_fc: the number of fc layers fc_dim: the dimension of the fc layers """ super().__init__() # fmt: off num_fc = cfg.MODEL.HOI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.HOI_BOX_HEAD.FC_DIM # fmt: on assert num_fc > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.fcs = [] for k in range(num_fc): fc = Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, cfg, input_shape): super().__init__() in_features = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.IN_FEATURES pooler_resolution = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_RESOLUTION pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features) sampling_ratio = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_SAMPLING_RATIO pooler_type = cfg.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_TYPE in_channels = [input_shape[f].channels for f in in_features] in_channels = in_channels[0] self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.box_in_features = in_features self.meta_box_pooler = ROIPooler( output_size=pooler_resolution, scales=pooler_scales, sampling_ratio=sampling_ratio, pooler_type=pooler_type, ) self.meta_box_head = build_box_head(cfg, ShapeSpec(channels=in_channels, height=pooler_resolution, width=pooler_resolution)) input_shape_box = self.meta_box_head.output_shape if isinstance(input_shape_box, int): # some backward compatibility input_shape_box = ShapeSpec(channels=input_shape_box) input_size = input_shape_box.channels * (input_shape_box.width or 1) * (input_shape_box.height or 1) self.input_size = input_size self.pi_normalizer = 0.5 * input_size * np.log(2 * np.pi) self.rank_loss_classifier = Linear(input_size, self.num_classes + 1) nn.init.normal_(self.rank_loss_classifier.weight, std=0.01) nn.init.constant_(self.rank_loss_classifier.bias, 0.0)
def __init__(self, cfg, input_shape): super().__init__(cfg, input_shape) del self.rank_loss_classifier self.sim_matrix = Linear(self.input_size, self.input_size, bias=False) nn.init.constant_(self.sim_matrix.weight, 0.) with torch.no_grad(): self.sim_matrix.weight.fill_diagonal_(1.)
def __init__(self, input_shape, fine_bone_name, fine_bone_emb_dim, std_category_num, std_cls_loss_type, arc_softmax_loss_weights, **kwargs): super(TripleBranchOutputLayer, self).__init__( ShapeSpec(channels=input_shape.channels, width=1, height=1), **kwargs) self.std_category_num = std_category_num self.std_cls_loss_type = std_cls_loss_type self.arc_softmax_loss_weights = arc_softmax_loss_weights self.input_channels = input_shape.channels # 新增加的第三个分类分支: 预测是否标准 # @Will Lee, 标准预测部分不考虑bg,因为大分类分支已经做了这个工作 self.fine_bone = self.build_fine_bone(fine_bone_name, emb_dim=fine_bone_emb_dim) self.standard_cls_score = Linear(fine_bone_emb_dim, self.std_category_num) for name, param in self.fine_bone.named_parameters(): if 'weight' in name: nn.init.normal_(param, std=0.01) if 'bias' in name: nn.init.constant_(param, 0) nn.init.normal_(self.standard_cls_score.weight, std=0.01) nn.init.constant_(self.standard_cls_score.bias, 0)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = 3 fc_dim_regr = [256, 256, 2] fc_dim_cls = [256, 256, cfg.K * cfg.K * 2] norm = cfg.MODEL.ROI_BOX_HEAD.NORM # fmt: on assert num_fc > 0 self._output_size_regr = (input_shape.channels, input_shape.height, input_shape.width) self._output_size_cls = (input_shape.channels, input_shape.height, input_shape.width) self.fcs_regr = [] self.fcs_cls = [] self.fc_shared = Linear(np.prod(self._output_size_regr), fc_dim_regr[0]) self._output_size_regr = fc_dim_regr[0] self._output_size_cls = fc_dim_cls[0] for k in range(num_fc - 1): fc_regr = Linear(np.prod(self._output_size_regr), fc_dim_regr[k + 1]) fc_cls = Linear(np.prod(self._output_size_cls), fc_dim_cls[k + 1]) self.add_module("fc_regr{}".format(k + 1), fc_regr) self.add_module("fc_cls{}".format(k + 1), fc_cls) self.fcs_regr.append(fc_regr) self.fcs_cls.append(fc_cls) self._output_size_regr = fc_dim_regr[k + 1] self._output_size_cls = fc_dim_cls[k + 1] weight_init.c2_xavier_fill(self.fc_shared) for layer in self.fcs_regr: weight_init.c2_xavier_fill(layer) for layer in self.fcs_cls: weight_init.c2_xavier_fill(layer)
def __init__(self, cfg, input_shape): """ Args: cfg input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. """ super(BoxOutputLayers, self).__init__() # fmt: off self.box2box_transform = Box2BoxTransform( weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS) self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES self.cls_agnostic_bbox_reg = cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG self.smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA self.test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST self.test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST self.test_topk_per_image = cfg.TEST.DETECTIONS_PER_IMAGE self.zero_shot_on = cfg.ZERO_SHOT.ZERO_SHOT_ON # fmt: on if isinstance(input_shape, int): # some backward compatbility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.cls_score = Linear(input_size, self.num_classes + 1) num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else self.num_classes box_dim = len(self.box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) if self.zero_shot_on: self._init_zero_shot(cfg)
def __init__(self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature. conv_dims (list[int]): the output dimensions of the conv layers fc_dims (list[int]): the output dimensions of the fc layers conv_norm (str or callable): normalization for the conv layers. See :func:`detectron2.layers.get_norm` for supported types. """ logger = logging.getLogger(__name__) logger.info("FastRCNNConvFCHead input_shape: {}".format(input_shape)) logger.info("FastRCNNConvFCHead conv_dims: {}".format(conv_dims)) logger.info("FastRCNNConvFCHead fc_dims: {}".format(fc_dims)) logger.info("FastRCNNConvFCHead conv_norm: {}".format(conv_norm)) super().__init__() assert len(conv_dims) + len(fc_dims) > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k, fc_dim in enumerate(fc_dims): fc = Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature. conv_dims (list[int]): the output dimensions of the conv layers fc_dims (list[int]): the output dimensions of the fc layers conv_norm (str or callable): normalization for the conv layers. See :func:`detectron2.layers.get_norm` for supported types. """ super().__init__() assert len(conv_dims) + len(fc_dims) > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=nn.ReLU(inplace=True), ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k, fc_dim in enumerate(fc_dims): if k == 0: self.add_module("flatten", nn.Flatten()) fc = Linear(int(np.prod(self._output_size)), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.add_module("fc_relu{}".format(k + 1), nn.ReLU(inplace=True)) self.add_module("fc_dropout{}".format(k + 1), nn.Dropout(p=0.5, inplace=False)) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: # weight_init.c2_xavier_fill(layer) torch.nn.init.normal_(layer.weight, std=0.005) torch.nn.init.constant_(layer.bias, 0.1)
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", loss_weight=1.0, weak_detector_head=None, regression_branch=False, terms={}, freeze_layers=[], embedding_path=''): super(SupervisedDetectorOutputsFineTune, self).__init__(input_shape=input_shape, box2box_transform=box2box_transform, num_classes=num_classes, test_score_thresh=test_score_thresh, test_nms_thresh=test_nms_thresh, test_topk_per_image=test_topk_per_image, cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, smooth_l1_beta=smooth_l1_beta, box_reg_loss_type=box_reg_loss_type, loss_weight=loss_weight, weak_detector_head=weak_detector_head, regression_branch=regression_branch, terms=terms, freeze_layers=freeze_layers, embedding_path=embedding_path) # Define delta predictors self.cls_score_ft = Linear(self.input_size, self.num_classes + 1) self.bbox_pred_ft = Linear(self.input_size, self.num_bbox_reg_classes * self.box_dim) # Init Predictors for l in [self.cls_score_ft, self.bbox_pred_ft]: nn.init.constant_(l.weight, 0.) nn.init.constant_(l.bias, 0.)
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM norm = cfg.MODEL.ROI_BOX_HEAD.NORM dropout = cfg.MODEL.ROI_BOX_HEAD.DROP_OUT # fmt: on assert num_conv + num_fc > 0 # Jamie self.dropout_en = dropout if self.dropout_en: self.dropout = nn.Dropout(p=0.5) self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not norm, norm=get_norm(norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__( self, input_shape: ShapeSpec, num_conv: int, conv_dim: int, num_fc: int, fc_dim: int, conv_norm="", ): """ Args: input_shape (ShapeSpec): shape of the input feature. num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the output dimension of the conv/fc layers conv_norm (str or callable): normalization for the conv layers. See :func:`detectron2.layers.get_norm` for supported types. """ super().__init__() assert num_conv + num_fc > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, in_features, out_classes, mode='softmax', s=30.0, m=0.50, easy_margin=False): super(ArcSoftLayer, self).__init__() self.mode = mode assert mode in ('arc', 'softmax', 'arc+softmax', 'cross_entropy') if 'arc' in mode: self.arc_ly = ArcLayer(in_features, out_classes, s, m, easy_margin) if 'softmax' in mode or 'cross_entropy' in mode: self.soft_ly = Linear(in_features, out_classes) nn.init.normal_(self.soft_ly.weight, std=0.01) nn.init.constant_(self.soft_ly.bias, 0)
def __init__(self, input_shape: ShapeSpec, *args, num_classes: int, prior_prob: float = 0.001, **kwargs): super().__init__(input_shape=input_shape, *args, num_classes=num_classes, **kwargs) # re-init the out dimension of the last FC layer to exclude the bg class if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) self.cls_score = Linear(input_size, num_classes) # no +1 since no BG class nn.init.normal_(self.cls_score.weight, std=0.01) # init the bias with prior prob for stabler training bias_value = -math.log((1 - prior_prob) / prior_prob) nn.init.constant_(self.cls_score.bias, bias_value)
def __init__(self, input_shape, conv_dims, fc_dims, conv_norm=""): super().__init__() assert len(conv_dims) + len(fc_dims) > 0 self._output_size = (input_shape[1], input_shape[2], input_shape[3]) self.conv_norm_relus = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=nn.ReLU(), ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k, fc_dim in enumerate(fc_dims): if k == 0: self.add_module("flatten", nn.Flatten()) fc = Linear(int(np.prod(self._output_size)), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.add_module("fc_relu{}".format(k + 1), nn.ReLU()) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def build_std_bone(cls, standard_cls_branch_name, input_shape, emb_dim=512, reduction=8): # 2fc 7*7*256 --> flatten --> 512 # DWSE 7*7*256--> 7*7*512 --> 4*4*512 --> flatten --> 512 if standard_cls_branch_name == '2fc': std_cls_branch = nn.Sequential( Flatten(), Linear( input_shape.height * input_shape.width * input_shape.channels, emb_dim), nn.ReLU(inplace=True), # Linear(emb_dim, std_num_classes), ) elif standard_cls_branch_name == 'DWSE': std_cls_branch = nn.Sequential( # depthwise conv 膨胀卷积操作 nn.Conv2d(input_shape.channels, input_shape.channels, kernel_size=(3, 3), padding=1, groups=input_shape.channels), nn.BatchNorm2d(input_shape.channels), # nn.ReLU(inplace=True), # 通道变换 : 3x3-->1x1升通道 nn.Conv2d(input_shape.channels, input_shape.channels, (3, 3), padding=1, stride=1), nn.BatchNorm2d(input_shape.channels), nn.ReLU(inplace=True), nn.Conv2d(input_shape.channels, input_shape.channels * 2, (1, 1), padding=0, stride=1), nn.BatchNorm2d(input_shape.channels * 2), nn.ReLU(inplace=True), # SE 模块 SELayer(input_shape.channels * 2, reduction=reduction), nn.ZeroPad2d(padding=(0, 1, 0, 1)), # ?x7x7 --> ?x8x8 nn.MaxPool2d((2, 2), stride=2), # embedding Flatten(), Linear(4 * 4 * input_shape.channels * 2, emb_dim), nn.ReLU(inplace=True), # Linear(emb_dim, std_num_classes) ) elif standard_cls_branch_name == '131ConvSE': std_cls_branch = nn.Sequential( # 升通道 nn.Conv2d(input_shape.channels, input_shape.channels, (1, 1)), nn.BatchNorm2d(input_shape.channels), nn.ReLU(inplace=True), nn.Conv2d(input_shape.channels, input_shape.channels, (3, 3), padding=1, stride=1), nn.BatchNorm2d(input_shape.channels), nn.ReLU(inplace=True), nn.Conv2d(input_shape.channels, input_shape.channels * 2, (1, 1)), nn.BatchNorm2d(input_shape.channels * 2), nn.ReLU(inplace=True), # SE 模块 SELayer(input_shape.channels * 2, reduction=reduction), # MaxPool降低分辨率 nn.ZeroPad2d(padding=(0, 1, 0, 1)), # ?x7x7 --> ?x8x8 nn.MaxPool2d((2, 2), stride=2), # embedding Flatten(), nn.Linear(4 * 4 * input_shape.channels * 2, emb_dim), nn.ReLU(inplace=True)) else: raise NotImplementedError('目前标准分类分支网络构建,仅支持2fc、DWSE、131ConvSE三种') return std_cls_branch
def __init__( self, input_shape, *, standard_cls_bone, std_num_classes, std_cls_emb_dim, box2box_transform, num_classes, arc_args={}, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, category_loss_type='cross_entropy', std_cls_loss_type='softmax', cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super(MlabelStandardFastRCNNOutputLayer2, self).__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) # 大类别分类 self.category_score = nn.Sequential( Flatten(), Linear(input_size, num_classes + 1)) # box回归 num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = nn.Sequential( Flatten(), Linear(input_size, num_bbox_reg_classes * box_dim)) # 细分类 self.standard_cls_bone = standard_cls_bone if std_cls_loss_type == 'softmax': self.std_cls_score = Linear(std_cls_emb_dim, std_num_classes + 1) nn.init.normal_(self.std_cls_score.weight, std=0.01) nn.init.constant_(self.std_cls_score.bias, 0) elif std_cls_loss_type == 'arc': self.std_cls_score = ArcLayer(std_cls_emb_dim, std_num_classes + 1, s=arc_args['s'], m=arc_args['m'], easy_margin=arc_args['easy_margin']) else: raise NotImplementedError('目前仅支持softmax、arc两种模式,暂不支持{}'.format( std_cls_loss_type, )) for pairs in [ self.standard_cls_bone.named_parameters(), self.category_score.named_parameters(), self.bbox_pred.named_parameters() ]: for name, params in pairs: if 'weight' in name: nn.init.normal_(params, std=0.01) elif 'bias' in name: nn.init.constant_(params, 0.) self.std_cls_loss_type = std_cls_loss_type self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight self.std_cls_loss_type = std_cls_loss_type self.category_loss_type = category_loss_type
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, add_unlabeled_class=False, label_converter=None, reverse_label_converter=None, num_centroid=256, clustering_interval=1000, cluster_obj_thresh=0.8, coupled_cos_thresh=0.15, coupled_obj_thresh=0.9, cos_thresh=0.15, pos_class_thresh=0.7, nms_thresh=0.3, n_sample=20, output_dir='./'): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.label_converter = label_converter self.reverse_label_converter = reverse_label_converter self.original_num_classes = len(self.label_converter) addition = self.label_converter.max() + torch.arange(num_centroid) + 1 self.label_converter = torch.cat((self.label_converter, addition)) if self.reverse_label_converter is not None: num_classes = min(num_classes + 1, len(reverse_label_converter)) num_cls = num_classes self.add_unlabeled_class = add_unlabeled_class self.num_classes = num_cls num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1 box_dim = len(box2box_transform.weights) self.cls_score = Linear(input_size, num_cls + num_centroid) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.constant_(self.cls_score.bias, 0) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.bbox_pred.weight, std=0.001) nn.init.constant_(self.bbox_pred.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight self.feature_memory = [] self.label_memory = [] self.obj_score_memory = [] self.path_memory = [] self.bbox_memory = [] self.num_centroid = num_centroid self.clustering_interval = clustering_interval weight = torch.zeros((num_centroid, input_size)) weight = torch.zeros((num_centroid, 1)) weight = torch.zeros((num_centroid + num_cls, 1)) weight[:num_cls] = 1 self.cls_weight = nn.Embedding(num_centroid + num_cls, 1).from_pretrained(weight, freeze=True) self.turn_on = False self.step = 1 self.cluster_count = 1 self.pseudo_gt = None self.n_pseudo_gt = 0 self.n_sample = n_sample self.cluster_obj_thresh = cluster_obj_thresh self.cos_thresh = cos_thresh self.coupled_cos_thresh = coupled_cos_thresh self.coupled_obj_thresh = coupled_obj_thresh self.pos_class_thresh = pos_class_thresh self.nms_thresh = nms_thresh self.pal = np.random.random((1024, 3)) * 255 self.size_opt = 'lm' self.output_dir = output_dir g_list = glob.glob(os.path.join(self.output_dir, 'pseudo_gts', '*.pth')) if len(g_list) > 0: g_list = [ int(x.split('/')[-1].replace('.pth', '')) for x in g_list ] g = max(g_list) path = os.path.join(self.output_dir, 'pseudo_gts/{}.pth').format(g) self.pseudo_gt = torch.load(path) self.n_pseudo_gt = len(self.pseudo_gt) self.step = g + 1 if self.pseudo_gt is not None and len(self.pseudo_gt) > 0: label = int(self.pseudo_gt[:, 1].max()) weight[:label] = 1 self.cls_weight = nn.Embedding(num_centroid + num_cls, 1).from_pretrained(weight, freeze=True)
def init_pred_layers(self): cls_score = Linear(self.input_size, self.num_classes + 1) bbox_pred = Linear(self.input_size, self.pred_reg) return cls_score, bbox_pred
class FastRCNNOutputLayers_baseline(nn.Module): """ Two linear layers for predicting Fast R-CNN outputs: (1) proposal-to-detection box regression deltas (2) classification scores """ @configurable def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, add_unlabeled_class=False, label_converter=None, reverse_label_converter=None): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.label_converter = label_converter self.reverse_label_converter = reverse_label_converter if add_unlabeled_class: # For old job (before runnning 1027 16:05), it should after the below condition. num_classes = num_classes + 1 if self.reverse_label_converter is not None: num_classes = min(num_classes + 1, len(reverse_label_converter)) num_cls = num_classes self.add_unlabeled_class = add_unlabeled_class num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1 box_dim = len(box2box_transform.weights) self.cls_score = Linear(input_size, num_cls) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.constant_(self.cls_score.bias, 0) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.bbox_pred.weight, std=0.001) nn.init.constant_(self.bbox_pred.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight @classmethod def from_config(cls, cfg, input_shape, label_converter=None, reverse_label_converter=None): return { "input_shape": input_shape, "box2box_transform": Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS), # fmt: off "num_classes": cfg.MODEL.ROI_HEADS.NUM_CLASSES, "cls_agnostic_bbox_reg": cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG, "smooth_l1_beta": cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA, "test_score_thresh": cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST, "test_nms_thresh": cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST, "test_topk_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, "box_reg_loss_type": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE, "box_reg_loss_weight": cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT, "add_unlabeled_class": cfg.MODEL.EOPSN.UNLABELED_REGION and (not cfg.MODEL.EOPSN.IGNORE_UNLABELED_REGION), "label_converter": label_converter, "reverse_label_converter": reverse_label_converter, # fmt: on } def forward(self, x): """ Returns: Tensor: shape (N,K+1), scores for each of the N box. Each row contains the scores for K object categories and 1 background class. Tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), or (N,4) for class-agnostic regression. """ if x.dim() > 2: x = torch.flatten(x, start_dim=1) scores = self.cls_score(x) proposal_deltas = self.bbox_pred(x) return scores, proposal_deltas def get_logits(self, x): if x.dim() > 2: x = torch.flatten(x, start_dim=1) scores = self.cls_score.forward_freeze(x) return scores # TODO: move the implementation to this class. def losses(self, predictions, proposals): """ Args: predictions: return values of :meth:`forward()`. proposals (list[Instances]): proposals that match the features that were used to compute predictions. """ scores, proposal_deltas = predictions losses = FastRCNNOutputs( self.box2box_transform, scores, proposal_deltas, proposals, self.smooth_l1_beta, self.box_reg_loss_type, self.box_reg_loss_weight, self.label_converter, add_unlabeled_class=self.add_unlabeled_class).losses() return losses def inference(self, predictions, proposals, use_unknown=False): """ Returns: list[Instances]: same as `fast_rcnn_inference`. list[Tensor]: same as `fast_rcnn_inference`. """ boxes = self.predict_boxes(predictions, proposals) scores = self.predict_probs(predictions, proposals) objness_scores = [x.objectness_logits for x in proposals] image_shapes = [x.image_size for x in proposals] return fast_rcnn_inference( boxes, scores, image_shapes, objness_scores, self.test_score_thresh, self.test_nms_thresh, self.test_topk_per_image, use_unknown, reverse_label_converter=self.reverse_label_converter, num_classes=len(self.reverse_label_converter) - 2) def predict_boxes_for_gt_classes(self, predictions, proposals): """ Returns: list[Tensor]: A list of Tensors of predicted boxes for GT classes in case of class-specific box head. Element i of the list has shape (Ri, B), where Ri is the number of predicted objects for image i and B is the box dimension (4 or 5) """ if not len(proposals): return [] scores, proposal_deltas = predictions proposal_boxes = [p.proposal_boxes for p in proposals] proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor N, B = proposal_boxes.shape predict_boxes = self.box2box_transform.apply_deltas( proposal_deltas, proposal_boxes) # Nx(KxB) K = predict_boxes.shape[1] // B if K > 1: gt_classes = torch.cat([p.gt_classes for p in proposals], dim=0) # Some proposals are ignored or have a background class. Their gt_classes # cannot be used as index. gt_classes = gt_classes.clamp_(0, K - 1) predict_boxes = predict_boxes.view(N, K, B)[ torch.arange(N, dtype=torch.long, device=predict_boxes.device), gt_classes] num_prop_per_image = [len(p) for p in proposals] return predict_boxes.split(num_prop_per_image) def predict_boxes(self, predictions, proposals): """ Returns: list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is the number of predicted objects for image i and B is the box dimension (4 or 5) """ if not len(proposals): return [] _, proposal_deltas = predictions num_prop_per_image = [len(p) for p in proposals] proposal_boxes = [p.proposal_boxes for p in proposals] proposal_boxes = proposal_boxes[0].cat(proposal_boxes).tensor predict_boxes = self.box2box_transform.apply_deltas( proposal_deltas, proposal_boxes) # Nx(KxB) return predict_boxes.split(num_prop_per_image) def predict_probs(self, predictions, proposals): """ Returns: list[Tensor]: A list of Tensors of predicted class probabilities for each image. Element i has shape (Ri, K + 1), where Ri is the number of predicted objects for image i. """ scores, _ = predictions num_inst_per_image = [len(p) for p in proposals] probs = F.softmax(scores, dim=-1) return probs.split(num_inst_per_image, dim=0)
def __init__(self, cfg, input_shape, num_classes, cls_agnostic_bbox_reg, box_dim=4): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM norm = cfg.MODEL.ROI_BOX_HEAD.NORM # fmt: on assert num_conv + num_fc > 0 num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes self.num_classes = num_classes self.pred_reg = num_bbox_reg_classes * box_dim self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): dim_in = conv_dim if self._output_size[ 0] * 2**k >= conv_dim else self._output_size[0] * 2**k dim_out = conv_dim if self._output_size[0] * 2**( k + 1) >= conv_dim else self._output_size[0] * 2**(k + 1) conv = Conv2d( dim_in, dim_out, kernel_size=3, padding=1, bias=not norm, norm=get_norm(norm, dim_out), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim self.input_size = self.output_size if not isinstance(self.input_size, int): self.input_size = self.input_size[0] self.cls_score, self.bbox_pred = self.init_pred_layers() for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for layer in [self.cls_score, self.bbox_pred]: nn.init.constant_(layer.bias, 0)
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", box_reg_loss_weight=1.0, add_unlabeled_class=False, label_converter=None, reverse_label_converter=None): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" box_reg_loss_weight (float): Weight for box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # The prediction layer for num_classes foreground classes and one background class # (hence + 1) self.label_converter = label_converter self.reverse_label_converter = reverse_label_converter if add_unlabeled_class: # For old job (before runnning 1027 16:05), it should after the below condition. num_classes = num_classes + 1 if self.reverse_label_converter is not None: num_classes = min(num_classes + 1, len(reverse_label_converter)) num_cls = num_classes self.add_unlabeled_class = add_unlabeled_class num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_cls - 1 box_dim = len(box2box_transform.weights) self.cls_score = Linear(input_size, num_cls) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.constant_(self.cls_score.bias, 0) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.bbox_pred.weight, std=0.001) nn.init.constant_(self.bbox_pred.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type self.box_reg_loss_weight = box_reg_loss_weight
def __init__( self, input_shape: ShapeSpec, *, box2box_transform, clustering_items_per_class, clustering_start_iter, clustering_update_mu_iter, clustering_momentum, clustering_z_dimension, enable_clustering, prev_intro_cls, curr_intro_cls, max_iterations, output_dir, feat_store_path, margin, num_classes: int, test_score_thresh: float = 0.0, test_nms_thresh: float = 0.5, test_topk_per_image: int = 100, cls_agnostic_bbox_reg: bool = False, smooth_l1_beta: float = 0.0, box_reg_loss_type: str = "smooth_l1", loss_weight: Union[float, Dict[str, float]] = 1.0, ): """ NOTE: this interface is experimental. Args: input_shape (ShapeSpec): shape of the input feature to this module box2box_transform (Box2BoxTransform or Box2BoxTransformRotated): num_classes (int): number of foreground classes test_score_thresh (float): threshold to filter predictions results. test_nms_thresh (float): NMS threshold for prediction results. test_topk_per_image (int): number of top predictions to produce per image. cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if `box_reg_loss_type` is "smooth_l1" box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou" loss_weight (float|dict): weights to use for losses. Can be single float for weighting all losses, or a dict of individual weightings. Valid dict keys are: * "loss_cls": applied to classification loss * "loss_box_reg": applied to box regression loss """ super().__init__() if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) # prediction layer for num_classes foreground classes and one background class (hence + 1) self.cls_score = Linear(input_size, num_classes + 1) num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.bbox_pred = Linear(input_size, num_bbox_reg_classes * box_dim) nn.init.normal_(self.cls_score.weight, std=0.01) nn.init.normal_(self.bbox_pred.weight, std=0.001) for l in [self.cls_score, self.bbox_pred]: nn.init.constant_(l.bias, 0) self.box2box_transform = box2box_transform self.smooth_l1_beta = smooth_l1_beta self.test_score_thresh = test_score_thresh self.test_nms_thresh = test_nms_thresh self.test_topk_per_image = test_topk_per_image self.box_reg_loss_type = box_reg_loss_type if isinstance(loss_weight, float): loss_weight = { "loss_cls": loss_weight, "loss_box_reg": loss_weight } self.loss_weight = loss_weight self.num_classes = num_classes self.clustering_start_iter = clustering_start_iter self.clustering_update_mu_iter = clustering_update_mu_iter self.clustering_momentum = clustering_momentum self.hingeloss = nn.HingeEmbeddingLoss(2) self.enable_clustering = enable_clustering self.prev_intro_cls = prev_intro_cls self.curr_intro_cls = curr_intro_cls self.seen_classes = self.prev_intro_cls + self.curr_intro_cls self.invalid_class_range = list( range(self.seen_classes, self.num_classes - 1)) logging.getLogger(__name__).info("Invalid class range: " + str(self.invalid_class_range)) self.max_iterations = max_iterations self.feature_store_is_stored = False self.output_dir = output_dir self.feat_store_path = feat_store_path self.feature_store_save_loc = os.path.join(self.output_dir, self.feat_store_path, 'feat.pt') if os.path.isfile(self.feature_store_save_loc): logging.getLogger( __name__).info('Trying to load feature store from ' + self.feature_store_save_loc) self.feature_store = torch.load(self.feature_store_save_loc) else: logging.getLogger(__name__).info('Feature store not found in ' + self.feature_store_save_loc + '. Creating new feature store.') self.feature_store = Store(num_classes + 1, clustering_items_per_class) self.means = [None for _ in range(num_classes + 1)] self.margin = margin
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", loss_weight=1.0, oicr_iter=3, fg_threshold=0.5, bg_threshold=0.1, freeze_layers=[], embedding_path='', terms={}, mode='Pre_Softmax', mil_multiplier=4.0, detector_temp=1.0, classifier_temp=1.0): super(FastRCNNOutputsBase, self).__init__(input_shape=input_shape, box2box_transform=box2box_transform, num_classes=num_classes, test_score_thresh=test_score_thresh, test_nms_thresh=test_nms_thresh, test_topk_per_image=test_topk_per_image, cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, smooth_l1_beta=smooth_l1_beta, box_reg_loss_type=box_reg_loss_type, loss_weight=loss_weight) self.num_classes = num_classes self.oicr_iter = oicr_iter self.fg_threshold = fg_threshold self.bg_threshold = bg_threshold self.terms = terms num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.box_dim = box_dim self.num_bbox_reg_classes = num_bbox_reg_classes self.mode = mode self.mil_multiplier = mil_multiplier self.detector_temp = detector_temp self.classifier_temp = classifier_temp # Delete instances defined by super del self.cls_score del self.bbox_pred # Define delta predictors if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) self.input_size = input_size self.classifier_stream = Linear(input_size, self.num_classes) self.detection_stream = Linear(input_size, self.num_classes) self.oicr_predictors = nn.ModuleList([ Linear(input_size, self.num_classes + 1) for _ in range(self.oicr_iter) ]) self.cls_score_delta = Linear(input_size, self.num_classes + 1) self.bbox_pred_delta = Linear(input_size, num_bbox_reg_classes * box_dim) # Init Predictors nn.init.normal_(self.bbox_pred_delta.weight, std=0.001) nn.init.normal_(self.classifier_stream.weight, std=0.01) nn.init.normal_(self.detection_stream.weight, std=0.01) for oicr_iter in range(self.oicr_iter): nn.init.normal_(self.oicr_predictors[oicr_iter].weight, std=0.01) nn.init.constant_(self.oicr_predictors[oicr_iter].bias, 0.) nn.init.constant_(self.cls_score_delta.weight, 0.) # nn.init.constant_(self.bbox_pred_delta.weight, 0.) for l in [ self.cls_score_delta, self.bbox_pred_delta, self.detection_stream, self.classifier_stream ]: nn.init.constant_(l.bias, 0.) pretrained_embeddings = torch.load(embedding_path)['embeddings'] self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True) self._freeze_layers(layers=freeze_layers)
def __init__(self, input_shape, *, box2box_transform, num_classes, test_score_thresh=0.0, test_nms_thresh=0.5, test_topk_per_image=100, cls_agnostic_bbox_reg=False, smooth_l1_beta=0.0, box_reg_loss_type="smooth_l1", loss_weight=1.0, weak_detector_head=None, regression_branch=False, terms={}, freeze_layers=[], embedding_path=''): super(SupervisedDetectorOutputsBase, self).__init__(input_shape=input_shape, box2box_transform=box2box_transform, num_classes=num_classes, test_score_thresh=test_score_thresh, test_nms_thresh=test_nms_thresh, test_topk_per_image=test_topk_per_image, cls_agnostic_bbox_reg=cls_agnostic_bbox_reg, smooth_l1_beta=smooth_l1_beta, box_reg_loss_type=box_reg_loss_type, loss_weight=loss_weight) self.num_classes = num_classes self.terms = terms num_bbox_reg_classes = 1 if cls_agnostic_bbox_reg else num_classes box_dim = len(box2box_transform.weights) self.box_dim = box_dim self.num_bbox_reg_classes = num_bbox_reg_classes self.weak_detector_head = weak_detector_head self.regression_branch = regression_branch # Delete instances defined by super del self.cls_score del self.bbox_pred # Define delta predictors if isinstance(input_shape, int): # some backward compatibility input_shape = ShapeSpec(channels=input_shape) input_size = input_shape.channels * (input_shape.width or 1) * (input_shape.height or 1) self.input_size = input_size self.cls_score_delta = Linear(input_size, self.num_classes + 1) self.bbox_pred_delta = Linear(input_size, num_bbox_reg_classes * box_dim) # Init Predictors nn.init.constant_(self.cls_score_delta.weight, 0.) if not self.regression_branch: nn.init.normal_(self.bbox_pred_delta.weight, std=0.001) else: nn.init.constant_(self.bbox_pred_delta.weight, 0.) for l in [self.cls_score_delta, self.bbox_pred_delta]: nn.init.constant_(l.bias, 0.) pretrained_embeddings = torch.load(embedding_path)['embeddings'] self.embeddings = nn.Embedding.from_pretrained(pretrained_embeddings, freeze=True) self._freeze_layers(layers=freeze_layers)