def __init__(self, cfg): super().__init__() self.cfg = cfg self.anchor_generator = layers.AnchorPointGenerator( cfg.num_anchors, strides=self.cfg.stride, offset=self.cfg.anchor_offset, ) self.point_coder = layers.PointCoder() self.in_features = cfg.in_features # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.resnet_norm), pretrained=cfg.backbone_pretrained) del bottom_up.fc # ----------------------- build FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm=cfg.fpn_norm, top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build FCOS Head ----------------------- # self.head = layers.PointHead(cfg, feature_shapes)
def __init__(self, cfg): super().__init__() self.cfg = cfg # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained) del bottom_up.fc # ----------------------- build FPN ----------------------------- # self.backbone = layers.FPN( bottom_up=bottom_up, in_features=cfg.fpn_in_features, out_channels=cfg.fpn_out_channels, norm=cfg.fpn_norm, top_block=layers.FPNP6(), strides=cfg.fpn_in_strides, channels=cfg.fpn_in_channels, ) # ----------------------- build RPN ----------------------------- # self.rpn = layers.RPN(cfg) # ----------------------- build RCNN head ----------------------- # self.rcnn = layers.RCNN(cfg)
def __init__(self, cfg): super().__init__() self.cfg = cfg # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.resnet_norm), pretrained=cfg.backbone_pretrained) del bottom_up.fc # ----------------------- build FPN ----------------------------- # out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res2", "res3", "res4", "res5"], out_channels=out_channels, norm=cfg.fpn_norm, top_block=layers.FPNP6(), strides=[4, 8, 16, 32], channels=[256, 512, 1024, 2048], ) # ----------------------- build RPN ----------------------------- # self.rpn = layers.RPN(cfg) # ----------------------- build RCNN head ----------------------- # self.rcnn = layers.RCNN(cfg)
def __init__(self, cfg, batch_size): super().__init__() self.cfg = cfg cfg.batch_per_gpu = batch_size self.batch_size = batch_size # ----------------------- build the backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.resnet_norm), pretrained=cfg.backbone_pretrained) # ------------ freeze the weights of resnet stage1 and stage 2 ------ # if self.cfg.backbone_freeze_at >= 1: for p in bottom_up.conv1.parameters(): p.requires_grad = False if self.cfg.backbone_freeze_at >= 2: for p in bottom_up.layer1.parameters(): p.requires_grad = False # ----------------------- build the FPN ----------------------------- # out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res2", "res3", "res4", "res5"], out_channels=out_channels, norm=cfg.fpn_norm, top_block=layers.FPNP6(), strides=[4, 8, 16, 32], channels=[256, 512, 1024, 2048], ) # ----------------------- build the RPN ----------------------------- # self.RPN = layers.RPN(cfg) # ----------------------- build the RCNN head ----------------------- # self.RCNN = layers.RCNN(cfg) # -------------------------- input Tensor --------------------------- # self.inputs = { "image": mge.tensor( np.random.random([2, 3, 224, 224]).astype(np.float32), dtype="float32", ), "im_info": mge.tensor( np.random.random([2, 5]).astype(np.float32), dtype="float32", ), "gt_boxes": mge.tensor( np.random.random([2, 100, 5]).astype(np.float32), dtype="float32", ), }
def __init__(self, cfg, batch_size): super().__init__() self.cfg = cfg self.batch_size = batch_size self.anchor_gen = layers.DefaultAnchorGenerator( base_size=4, anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, ) self.box_coder = layers.BoxCoder(reg_mean=cfg.reg_mean, reg_std=cfg.reg_std) self.stride_list = np.array([8, 16, 32, 64, 128]).astype(np.float32) self.in_features = ["p3", "p4", "p5", "p6", "p7"] # ----------------------- build the backbone ------------------------ # bottom_up = resnet50(norm=layers.get_norm(self.cfg.resnet_norm)) # ------------ freeze the weights of resnet stage1 and stage 2 ------ # if self.cfg.backbone_freeze_at >= 1: for p in bottom_up.conv1.parameters(): p.requires_grad = False if self.cfg.backbone_freeze_at >= 2: for p in bottom_up.layer1.parameters(): p.requires_grad = False # ----------------------- build the FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm="", top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build the RetinaNet Head ------------------ # self.head = layers.RetinaNetHead(cfg, feature_shapes) self.inputs = { "image": mge.tensor( np.random.random([2, 3, 224, 224]).astype(np.float32), dtype="float32", ), "im_info": mge.tensor( np.random.random([2, 5]).astype(np.float32), dtype="float32", ), "gt_boxes": mge.tensor( np.random.random([2, 100, 5]).astype(np.float32), dtype="float32", ), }
def __init__(self, cfg): super().__init__() self.cfg = cfg self.anchor_generator = layers.AnchorBoxGenerator( anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, strides=self.cfg.stride, offset=self.cfg.anchor_offset, ) self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std) self.in_features = cfg.in_features # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.backbone_norm), pretrained=cfg.backbone_pretrained ) del bottom_up.fc # ----------------------- build FPN ----------------------------- # self.backbone = layers.FPN( bottom_up=bottom_up, in_features=cfg.fpn_in_features, out_channels=cfg.fpn_out_channels, norm=cfg.fpn_norm, top_block=layers.LastLevelP6P7( cfg.fpn_top_in_channel, cfg.fpn_out_channels, cfg.fpn_top_in_feature ), strides=cfg.fpn_in_strides, channels=cfg.fpn_in_channels, ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build RetinaNet Head ------------------ # self.head = layers.BoxHead(cfg, feature_shapes) self.matcher = layers.Matcher( cfg.match_thresholds, cfg.match_labels, cfg.match_allow_low_quality )
def __init__(self, cfg): super().__init__() self.cfg = cfg self.anchor_generator = layers.AnchorBoxGenerator( anchor_scales=self.cfg.anchor_scales, anchor_ratios=self.cfg.anchor_ratios, strides=self.cfg.stride, offset=self.cfg.anchor_offset, ) self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std) self.stride_list = np.array(cfg.stride, dtype=np.float32) self.in_features = cfg.in_features # ----------------------- build backbone ------------------------ # bottom_up = getattr(resnet, cfg.backbone)( norm=layers.get_norm(cfg.resnet_norm), pretrained=cfg.backbone_pretrained) del bottom_up.fc # ----------------------- build FPN ----------------------------- # in_channels_p6p7 = 2048 out_channels = 256 self.backbone = layers.FPN( bottom_up=bottom_up, in_features=["res3", "res4", "res5"], out_channels=out_channels, norm=cfg.fpn_norm, top_block=layers.LastLevelP6P7(in_channels_p6p7, out_channels), ) backbone_shape = self.backbone.output_shape() feature_shapes = [backbone_shape[f] for f in self.in_features] # ----------------------- build head ------------------ # self.head = layers.BoxHead(cfg, feature_shapes)
def __init__( self, bottom_up: M.Module, in_features: List[str], out_channels: int = 256, norm: str = "", top_block: M.Module = None, ): """ Args: bottom_up (M.Module): module representing the bottom up sub-network. it generates multi-scale feature maps which formatted as a dict like {'res3': res3_feature, 'res4': res4_feature} in_features (list[str]): list of input feature maps keys coming from the `bottom_up` which will be used in FPN. e.g. ['res3', 'res4', 'res5'] out_channels (int): number of channels used in the output feature maps. norm (str): the normalization type. top_block (nn.Module or None): the module build upon FPN layers. """ super(FPN, self).__init__() in_strides = [8, 16, 32] in_channels = [512, 1024, 2048] use_bias = norm == "" self.lateral_convs = list() self.output_convs = list() for idx, in_channels in enumerate(in_channels): lateral_norm = layers.get_norm(norm, out_channels) output_norm = layers.get_norm(norm, out_channels) lateral_conv = layers.Conv2d( in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm, ) output_conv = layers.Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) M.init.msra_normal_(lateral_conv.weight, mode="fan_in") M.init.msra_normal_(output_conv.weight, mode="fan_in") if use_bias: M.init.fill_(lateral_conv.bias, 0) M.init.fill_(output_conv.bias, 0) stage = int(math.log2(in_strides[idx])) setattr(self, "fpn_lateral{}".format(stage), lateral_conv) setattr(self, "fpn_output{}".format(stage), output_conv) self.lateral_convs.insert(0, lateral_conv) self.output_convs.insert(0, output_conv) self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up # follow the common practices, FPN features are named to "p<stage>", # like ["p2", "p3", ..., "p6"] self._out_feature_strides = { "p{}".format(int(math.log2(s))): s for s in in_strides } # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2**(s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features }