def __init__(self, cfg): super().__init__() self.cfg = cfg self.box_coder = layers.BoxCoder() self.stride_list = cfg.rpn_stride rpn_channel = cfg.rpn_channel self.in_features = cfg.rpn_in_features self.anchors_generator = layers.DefaultAnchorGenerator( cfg.anchor_base_size, cfg.anchor_scales, cfg.anchor_aspect_ratios, cfg.anchor_offset, ) self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1) self.rpn_cls_score = M.Conv2d( rpn_channel, cfg.num_cell_anchors * 2, kernel_size=1, stride=1 ) self.rpn_bbox_offsets = M.Conv2d( rpn_channel, cfg.num_cell_anchors * 4, kernel_size=1, stride=1 ) for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]: M.init.normal_(l.weight, std=0.01) M.init.fill_(l.bias, 0)
def __init__(self, cfg): super().__init__() self.cfg = cfg self.box_coder = layers.BoxCoder(cfg.rcnn_reg_mean, cfg.rcnn_reg_std) # roi head self.in_features = cfg.rcnn_in_features self.stride = cfg.rcnn_stride self.pooling_method = cfg.pooling_method self.pooling_size = cfg.pooling_size self.fc1 = M.Linear(256 * self.pooling_size[0] * self.pooling_size[1], 1024) self.fc2 = M.Linear(1024, 1024) for l in [self.fc1, self.fc2]: M.init.normal_(l.weight, std=0.01) M.init.fill_(l.bias, 0) # box predictor self.pred_cls = M.Linear(1024, cfg.num_classes + 1) self.pred_delta = M.Linear(1024, cfg.num_classes * 4) M.init.normal_(self.pred_cls.weight, std=0.01) M.init.normal_(self.pred_delta.weight, std=0.001) for l in [self.pred_cls, self.pred_delta]: M.init.fill_(l.bias, 0)
def __init__(self, cfg, batch_size): super().__init__() self.cfg = cfg self.batch_size = batch_size self.anchor_gen = layers.DefaultAnchorGenerator( base_size=4, anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, ) self.box_coder = layers.BoxCoder(reg_mean=cfg.reg_mean, reg_std=cfg.reg_std) self.stride_list = np.array([8, 16, 32, 64, 128]).astype(np.float32) self.in_features = ["p3", "p4", "p5", "p6", "p7"] # ----------------------- build the backbone ------------------------ # bottom_up = resnet50(norm=layers.get_norm(self.cfg.resnet_norm)) # ------------ freeze the weights of resnet stage1 and stage 2 ------ # if self.cfg.backbone_freeze_at >= 1: for p in bottom_up.conv1.parameters(): p.requires_grad = False if self.cfg.backbone_freeze_at >= 2: for p in bottom_up.layer1.parameters(): p.requires_grad = False # ----------------------- build the FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm="", top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build the RetinaNet Head ------------------ # self.head = layers.RetinaNetHead(cfg, feature_shapes) self.inputs = { "image": mge.tensor( np.random.random([2, 3, 224, 224]).astype(np.float32), dtype="float32", ), "im_info": mge.tensor( np.random.random([2, 5]).astype(np.float32), dtype="float32", ), "gt_boxes": mge.tensor( np.random.random([2, 100, 5]).astype(np.float32), dtype="float32", ), }
def __init__(self, cfg): super().__init__() self.cfg = cfg self.box_coder = layers.BoxCoder(cfg.rpn_reg_mean, cfg.rpn_reg_std) # check anchor settings assert len(set(len(x) for x in cfg.anchor_scales)) == 1 assert len(set(len(x) for x in cfg.anchor_ratios)) == 1 self.num_cell_anchors = len(cfg.anchor_scales[0]) * len( cfg.anchor_ratios[0]) self.stride_list = np.array(cfg.rpn_stride).astype(np.float32) rpn_channel = cfg.rpn_channel self.in_features = cfg.rpn_in_features self.anchor_generator = layers.AnchorBoxGenerator( anchor_scales=cfg.anchor_scales, anchor_ratios=cfg.anchor_ratios, strides=cfg.rpn_stride, offset=self.cfg.anchor_offset, ) self.matcher = layers.Matcher(cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality) self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1) self.rpn_cls_score = M.Conv2d(rpn_channel, self.num_cell_anchors, kernel_size=1, stride=1) self.rpn_bbox_offsets = M.Conv2d(rpn_channel, self.num_cell_anchors * 4, kernel_size=1, stride=1) for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]: M.init.normal_(l.weight, std=0.01) M.init.fill_(l.bias, 0)
def __init__(self, cfg): super().__init__() self.cfg = cfg self.anchor_generator = layers.AnchorBoxGenerator( anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, strides=self.cfg.stride, offset=self.cfg.anchor_offset, ) self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std) self.in_features = cfg.in_features # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained ) del bottom_up.fc # ----------------------- build FPN ----------------------------- # self.backbone = layers.FPN( bottom_up=bottom_up, in_features=cfg.fpn_in_features, out_channels=cfg.fpn_out_channels, norm=cfg.fpn_norm, top_block=layers.LastLevelP6P7( cfg.fpn_top_in_channel, cfg.fpn_out_channels, cfg.fpn_top_in_feature ), strides=cfg.fpn_in_strides, channels=cfg.fpn_in_channels, ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build RetinaNet Head ------------------ # self.head = layers.BoxHead(cfg, feature_shapes) self.matcher = layers.Matcher( cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality )
def __init__(self, cfg): super().__init__() self.cfg = cfg self.anchor_generator = layers.AnchorBoxGenerator( anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, strides=self.cfg.stride, offset=self.cfg.anchor_offset, ) self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std) self.stride_list = np.array(cfg.stride, dtype=np.float32) self.in_features = cfg.in_features # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.resnet_norm), pretrained=cfg.backbone_pretrained) del bottom_up.fc # ----------------------- build FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm=cfg.fpn_norm, top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build head ------------------ # self.head = layers.BoxHead(cfg, feature_shapes)