def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__() num_classes = cfg.MODEL.NUM_CLASSES num_convs = cfg.FCOS.NUM_CONVS prior_prob = cfg.FCOS.PRIOR_PROB in_channels = input_shape[0].channels self.neck_strides = [x.stride for x in input_shape] self.norm_reg_targets = cfg.FCOS.NORM_REG_TARGETS self.centerness_on_reg = cfg.FCOS.CENTERNESS_ON_REG cls_subnet = [] bbox_subnet = [] for _ in range(num_convs): cls_subnet.append( Conv2d(in_channels, in_channels, 3, 1, norm="GN", activation="ReLU") ) bbox_subnet.append( Conv2d(in_channels, in_channels, 3, 1, norm="GN", activation="ReLU") ) self.cls_subnet = nn.Sequential(*cls_subnet) self.bbox_subnet = nn.Sequential(*bbox_subnet) self.cls_score = nn.Conv2d(in_channels, num_classes, 3, 1, 1) self.bbox_pred = nn.Conv2d(in_channels, 4, 3, 1, 1) self.centerness = nn.Conv2d(in_channels, 1, 3, 1, 1) self.scales = nn.ModuleList([Scale(1.0) for _ in range(len(input_shape))]) for module in [self.cls_score, self.bbox_pred, self.centerness]: nn.init.normal_(module.weight, std=0.01) nn.init.constant_(module.bias, 0) bias_value = -math.log((1 - prior_prob) / prior_prob) nn.init.constant_(self.cls_score.bias, bias_value)
def __init__(self, input_shape, in_features, out_channels, norm="", top_block=None, fuse_type="sum"): super().__init__() in_strides = [input_shape[f].stride for f in in_features] in_channels = [input_shape[f].channels for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs = [] output_convs = [] use_bias = norm == "" for idx, in_channel in enumerate(in_channels): lateral_conv = Conv2d(in_channel, out_channels, 1, bias=use_bias, norm=norm) output_conv = Conv2d(out_channels, out_channels, 3, 1, 1, bias=use_bias, norm=norm) stage = int(math.log2(in_strides[idx])) self.add_module(f"fpn_lateral{stage}", lateral_conv) self.add_module(f"fpn_output{stage}", output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self._out_feature_strides = { f"p{int(math.log2(s))}": s for s in in_strides } if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides[f"p{s + 1}"] = 2**(s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features } self._size_divisibility = in_strides[-1] if fuse_type not in ("avg", "sum"): raise ValueError(f"Unsupported fuse_type, got '{fuse_type}'") self._fuse_type = fuse_type
def __init__( self, in_channels, mid_channels, out_channels, dw_kernel_size=3, stride=1, se_ratio=0, norm="BN" ): super().__init__() self.use_se = se_ratio is not None and se_ratio > 0 self.stride = stride self.ghost1 = GhostModule(in_channels, mid_channels, relu=True) if stride > 1: self.dw_conv = Conv2d( mid_channels, mid_channels, dw_kernel_size, stride, (dw_kernel_size - 1) // 2, groups=mid_channels, bias=False, norm=norm ) if self.use_se: self.se = SEModule( mid_channels, se_ratio, activation=("ReLU", "HardSigmoid"), divisor=4 ) self.ghost2 = GhostModule(mid_channels, out_channels, relu=False) if (in_channels == out_channels and stride == 1): self.shortcut = nn.Sequential() else: self.shortcut = nn.Sequential( Conv2d( in_channels, in_channels, dw_kernel_size, stride, (dw_kernel_size - 1) // 2, groups=in_channels, bias=False, norm=norm ), Conv2d(in_channels, out_channels, 1, bias=False, norm=norm) )
def __init__(self, stem_channels, depths, widths, exp_ratios, se_ratio, strides, kernels, last_channels, norm="BN", activation="Swish", num_classes=1000, out_features=None): super().__init__() stage_params = list(zip(depths, widths, exp_ratios, strides, kernels)) self.stem = Conv2d(3, stem_channels, 3, 2, 1, bias=False, norm=norm, activation="Swish") self._out_feature_channels = {"stem": stem_channels} stride = 2 self._out_feature_strides = {"stem": stride} prev_channels = stem_channels self.stages = ["stem"] for i, (depth, width, expand_ratio, s, k) in enumerate(stage_params): name = f"stage{i + 1}" stage = EfficientStage(prev_channels, expand_ratio, k, s, se_ratio, width, depth, norm, activation) self.add_module(name, stage) self.stages.append(name) prev_channels = width stride *= s self._out_feature_strides[name] = stride self._out_feature_channels[name] = width if not out_features: out_features = ["linear"] if "linear" in out_features and num_classes is not None: self.last_conv = Conv2d(prev_channels, last_channels, 1, bias=False, norm="BN", activation=activation) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(last_channels, num_classes) self._out_features = out_features
def __init__(self, cfg, input_shape: ShapeSpec): super(CoarseMaskHead, self).__init__() self.num_classes = cfg.MODEL.NUM_CLASSES conv_dim = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM self.fc_dim = cfg.MODEL.ROI_MASK_HEAD.FC_DIM num_fc = cfg.MODEL.ROI_MASK_HEAD.NUM_FC self.output_side_resolution = cfg.MODEL.ROI_MASK_HEAD.OUTPUT_SIDE_RESOLUTION self.input_channels = input_shape.channels self.input_h = input_shape.height self.input_w = input_shape.width self.conv_layers = [] if self.input_channels > conv_dim: self.reduce_channel_dim_conv = Conv2d(self.input_channels, conv_dim, kernel_size=1, activation="ReLU") self.conv_layers.append(self.reduce_channel_dim_conv) self.reduce_spatial_dim_conv = Conv2d(conv_dim, conv_dim, kernel_size=2, stride=2, padding=0, bias=True, activation="ReLU") self.conv_layers.append(self.reduce_spatial_dim_conv) input_dim = conv_dim * self.input_h * self.input_w input_dim //= 4 self.fcs = [] for k in range(num_fc): fc = nn.Linear(input_dim, self.fc_dim) self.add_module("coarse_mask_fc{}".format(k + 1), fc) self.fcs.append(fc) input_dim = self.fc_dim output_dim = self.num_classes * self.output_side_resolution * self.output_side_resolution self.prediction = nn.Linear(self.fc_dim, output_dim) nn.init.normal_(self.prediction.weight, std=0.001) nn.init.constant_(self.prediction.bias, 0) for layer in self.conv_layers: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, input_shape, *, num_keypoints, conv_dims, **kwargs): """ NOTE: this interface is experimental. """ super().__init__(num_keypoints=num_keypoints, **kwargs) up_scale = 2 in_channels = input_shape.channels self.blocks = [] for idx, layer_channels in enumerate(conv_dims, 1): module = Conv2d(in_channels, layer_channels, 3, stride=1, padding=1) self.add_module("conv_fcn{}".format(idx), module) self.blocks.append(module) in_channels = layer_channels deconv_kernel = 4 self.score_lowres = nn.ConvTranspose2d(in_channels, num_keypoints, deconv_kernel, stride=2, padding=deconv_kernel // 2 - 1) self.up_scale = up_scale for name, param in self.named_parameters(): if "bias" in name: nn.init.constant_(param, 0) elif "weight" in name: nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], norm=""): super().__init__() input_size = cfg.SSD.SIZE extra_cfg = self.extra_setting[input_size] in_channels = 1024 layers = [] previous_features = [f for f in input_shape] self._out_features = [f for f in input_shape] previous_channels = [input_shape[f].channels for f in input_shape] extra_channels = [] for i, config in enumerate(extra_cfg): name = f"extra_{i + 1}" module = [] for c, k, s, p in config: module.append( Conv2d(in_channels, c, k, s, p, norm=norm, activation="ReLU")) in_channels = c self.add_module(name, nn.Sequential(*module)) self._out_features.append(name) extra_channels.append(in_channels) self._out_feature_strides = dict( zip(self._out_features, cfg.SSD.STRIDES)) self._out_feature_channels = dict( zip(self._out_features, previous_channels + extra_channels)) self.l2_norm = L2Norm(previous_channels[0])
def __init__(self, input_shape: ShapeSpec, *, num_classes, conv_dims, conv_norm="", **kwargs): """ NOTE: this interface is experimental. """ super().__init__(**kwargs) assert len(conv_dims) >= 1, "conv_dims have to be non-empty!" self.conv_norm_relus = [] cur_channels = input_shape.channels for k, conv_dim in enumerate(conv_dims[:-1]): conv = Conv2d( cur_channels, conv_dim, 3, stride=1, padding=1, bias=not conv_norm, norm=conv_norm, activation="relu", ) self.add_module("mask_fcn{}".format(k + 1), conv) self.conv_norm_relus.append(conv) cur_channels = conv_dim self.deconv = nn.ConvTranspose2d(cur_channels, conv_dims[-1], kernel_size=2, stride=2, padding=0) cur_channels = conv_dims[-1] self.predictor = Conv2d(cur_channels, num_classes, kernel_size=1) weight_init.c2_msra_fill(self.deconv) nn.init.normal_(self.predictor.conv.weight, std=0.001) if self.predictor.conv.bias is not None: nn.init.constant_(self.predictor.conv.bias, 0)
def __init__(self, in_channels, expand_ratio, kernel_size, stride, se_ratio, out_channels, norm="BN", activation="Swish"): super().__init__() expand_channels = int(in_channels * expand_ratio) self.expand = expand_channels != in_channels if self.expand: self.expand_conv = Conv2d(in_channels, expand_channels, 1, bias=False, norm=norm, activation=activation) padding = (kernel_size - 1) // 2 self.dw = Conv2d(expand_channels, expand_channels, kernel_size, stride, padding, groups=expand_channels, bias=False, norm=norm, activation=activation) self.use_se = se_ratio > 0 if self.use_se: self.se = SEModule(expand_channels, se_channels=int(in_channels * se_ratio)) self.pw = Conv2d(expand_channels, out_channels, 1, bias=False, norm=norm) self.use_res = (stride == 1) and (in_channels == out_channels)
def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__() in_channels = [x.channels for x in input_shape] num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors num_classes = cfg.MODEL.NUM_CLASSES norm = cfg.SSD.HEAD.NORM cls_score = [] bbox_pred = [] for i, c in enumerate(in_channels): if i == len(in_channels) - 1: cls_score.append( nn.Conv2d(c, num_anchors[i] * (num_classes + 1), 1)) bbox_pred.append(nn.Conv2d(c, num_anchors[i] * 4, 1)) else: cls_score.append( nn.Sequential( Conv2d(c, c, 3, 1, groups=c, norm=norm, activation="ReLU6"), Conv2d(c, num_anchors[i] * (num_classes + 1), 1))) bbox_pred.append( nn.Sequential( Conv2d(c, c, 3, 1, groups=c, norm=norm, activation="ReLU6"), Conv2d(c, num_anchors[i] * 4, 1))) self.cls_score = nn.ModuleList(cls_score) self.bbox_pred = nn.ModuleList(bbox_pred)
def __init__(self, in_channels, out_channels, norm="BN", activation="LeakyReLU", **kwargs): super().__init__() self.conv1 = Conv2d(in_channels, out_channels // 2, kernel_size=1, bias=False, norm=norm, activation=activation, **kwargs) self.conv2 = Conv2d(out_channels // 2, out_channels, kernel_size=3, bias=False, norm=norm, activation=activation, **kwargs)
def __init__(self, vgg_cfg, norm="", out_features=None): super().__init__() self._out_feature_channels = {} self._out_feature_strides = {} layers = [] in_channels = 3 idx = 0 stride = 1 for v in vgg_cfg: if v == "M": layers.append(nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)) stride *= 2 else: layers.append(Conv2d(in_channels, v, 3, 1, 1, norm=norm, activation="ReLU")) in_channels = v self._out_feature_channels[str(idx)] = v self._out_feature_strides[str(idx)] = stride idx += 1 layers.append(nn.MaxPool2d(kernel_size=3, stride=1, padding=1)) self._out_feature_channels[str(idx)] = in_channels self._out_feature_strides[str(idx)] = stride idx += 1 layers.append( Conv2d(in_channels, 1024, 3, padding=6, dilation=6, norm=norm, activation="ReLU") ) self._out_feature_channels[str(idx)] = 1024 self._out_feature_strides[str(idx)] = stride idx += 1 layers.append(Conv2d(1024, 1024, 1, norm=norm, activation="ReLU")) self._out_feature_channels[str(idx)] = 1024 self._out_feature_strides[str(idx)] = stride self.features = nn.Sequential(*layers) self._out_features = out_features
def __init__( self, in_channels, out_channels, kernel_size=1, ratio=2, dw_size=3, stride=1, norm="BN", relu=True ): super().__init__() self.out_channels = out_channels init_channels = math.ceil(out_channels / ratio) new_channels = init_channels * (ratio - 1) self.primary_conv = Conv2d( in_channels, init_channels, kernel_size, stride, (kernel_size - 1) // 2, bias=False, norm=norm, activation="ReLU" if relu else "" ) self.cheap_operation = Conv2d( init_channels, new_channels, dw_size, 1, (dw_size - 1) // 2, groups=init_channels, bias=False, norm=norm, activation="ReLU" if relu else "" )
def _make_stage(layers, in_channels, out_channels, norm="BN", activation="LeakyReLU"): module = [ Conv2d(in_channels, out_channels, 3, 2, bias=False, norm=norm, activation=activation) ] for _ in range(layers): module.append(BasicBlock(out_channels, out_channels, norm, activation)) return nn.Sequential(*module)
def __init__(self, layers, channels, stem_channels=32, norm="BN", activation="LeakyReLU", out_features=None, num_classes=1000): super().__init__() assert len(layers) == len(channels), \ f"len(layers) should equal to len(channels), given {len(layers)} vs {len(channels)}" self.stem = Conv2d(3, stem_channels, 3, 1, bias=False, norm=norm, activation=activation) self.stage1 = _make_stage(layers[0], stem_channels, channels[0], norm, activation) self.stage2 = _make_stage(layers[1], channels[0], channels[1], norm, activation) self.stage3 = _make_stage(layers[2], channels[1], channels[2], norm, activation) self.stage4 = _make_stage(layers[3], channels[2], channels[3], norm, activation) self.stage5 = _make_stage(layers[4], channels[3], channels[4], norm, activation) self._out_feature_channels = { f"stage{i}": c for i, c in zip(range(1, 6), channels) } self._out_feature_strides = {f"stage{i}": 2**i for i in range(1, 6)} if not out_features: out_features = ["linear"] if "linear" in out_features and num_classes is not None: self.fc = nn.Linear(channels[4], num_classes) self._out_features = out_features
def __init__(self, input_shape: ShapeSpec, *, conv_dims: List[int], fc_dims: List[int], conv_norm=""): """ NOTE: this interface is experimental. """ super().__init__() assert len(conv_dims) + len(fc_dims) > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( self._output_size[0], conv_dim, 3, bias=(conv_norm == ""), norm=conv_norm, activation="ReLU", ) self.add_module(f"conv{k + 1}", conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k, fc_dim in enumerate(fc_dims): fc = nn.Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__( self, ghostnet_cfg=None, multiplier=1.0, dropout=0.2, norm="BN", num_classes=1000, out_features=None ): super().__init__() if ghostnet_cfg is None: ghostnet_cfg = [ [3, 16, 16, 0, 1], [3, 48, 24, 0, 2], [3, 72, 24, 0, 1], [5, 72, 40, 0.25, 2], [5, 120, 40, 0.25, 1], [3, 240, 80, 0, 2], [3, 200, 80, 0, 1], [3, 184, 80, 0, 1], [3, 184, 80, 0, 1], [3, 480, 112, 0.25, 1], [3, 672, 112, 0.25, 1], [5, 672, 160, 0.25, 2], [5, 960, 160, 0, 1], [5, 960, 160, 0.25, 1], [5, 960, 160, 0, 1], [5, 960, 160, 0.25, 1] ] output_channel = make_divisible(16 * multiplier, 4) layers = [] layers.append(Conv2d(3, output_channel, 3, 2, 1, bias=False, norm=norm, activation="ReLU")) self._out_feature_channels = {"0": output_channel} stride = 2 self._out_feature_strides = {"0": stride} input_channel = output_channel block = GhostBottleneck index = 1 for k, exp_size, c, se_ratio, s in ghostnet_cfg: output_channel = make_divisible(c * multiplier, 4) hidden_channel = make_divisible(exp_size * multiplier, 4) layers.append( block(input_channel, hidden_channel, output_channel, k, s, se_ratio) ) input_channel = output_channel stride *= s self._out_feature_channels[str(index)] = output_channel self._out_feature_strides[str(index)] = stride index += 1 output_channel = make_divisible(exp_size * multiplier, 4) layers.append(Conv2d(input_channel, output_channel, 1, norm=norm, activation="ReLU")) self._out_feature_channels[str(index)] = output_channel self._out_feature_strides[str(index)] = stride self.features = nn.Sequential(*layers) if not out_features: out_features = ["linear"] if "linear" in out_features and num_classes is not None: self.conv_head = Conv2d(input_channel, 1280, 1, activation="ReLU") self.classifier = nn.Sequential( nn.Dropout(0.2), nn.Linear(last_channel, num_classes) ) self._out_features = out_features