def __init__(self, top_feat, bot_feat, in_channels, plan_cfg): super().__init__() self.decode_cfg(plan_cfg) self.out_channels = in_channels self.top_key = Conv2d(in_channels, self.num_heads * self.key_dim, 1, stride=1, bias=False) self.bot_query = Conv2d(in_channels, self.num_heads * self.key_dim, 1, stride=1, bias=False) self.bot_value = Conv2d(in_channels, self.num_heads * self.value_dim, 1, stride=1, bias=False) self.top_fuse = Conv2d(self.num_heads * self.value_dim, self.out_channels, 1, stride=1, bias=False) for layer in self.modules(): if isinstance(layer, Conv2d): torch.nn.init.normal_(layer.weight, mean=0, std=0.01) if layer.bias is not None: torch.nn.init.constant_(layer.bias, 0)
def __init__(self, in_channels, out_channels, norm='BN', fuse_type='fast_norm'): super().__init__() self.in_features = list(in_channels.keys())[::-1] td_channels = in_channels[self.in_features[0]] for in_feat in self.in_features[1:]: td_conv = Conv2d( td_channels, out_channels, kernel_size=1, padding=0, bias=True, norm=get_norm(norm, out_channels), activation=nn.Upsample(scale_factor=2) ) in_conv = Conv2d( in_channels[in_feat], out_channels, kernel_size=1, padding=0, bias=True, norm=get_norm(norm, out_channels), ) fuse = FuseBlock(out_channels, num_weights=2, norm=norm, fuse_type=fuse_type) self.add_module(f'{in_feat}_td', td_conv) self.add_module(f'{in_feat}_in', in_conv) self.add_module(f'{in_feat}_fuse', fuse) td_channels = out_channels
def __init__( self, in_channels, hidden_channels, out_channels, memory_efficient=True, ): super().__init__() self.reduce = Conv2d( in_channels, hidden_channels, kernel_size=1, stride=1, bias=True, activation=MemoryEfficientSwish() if memory_efficient else Swish(), ) self.expand = Conv2d( hidden_channels, out_channels, kernel_size=1, stride=1, bias=True, activation=MemoryEfficientSwish() if memory_efficient else Swish(), )
def __init__(self, in_channels, out_channels): super().__init__() self.num_levels = 2 self.in_feature = "res5" self.p6 = Conv2d(in_channels, out_channels, 3, 2, 1) self.p7 = Conv2d(out_channels, out_channels, 3, 2, 1) for module in [self.p6, self.p7]: weight_init.c2_xavier_fill(module)
def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__() # fmt: off in_channels = input_shape[0].channels num_classes = cfg.META_ARCH.NUM_CLASSES num_convs = cfg.META_ARCH.RETINANET.HEAD.NUM_CONVS prior_prob = cfg.META_ARCH.RETINANET.HEAD.PRIOR_PROB num_anchors = build_anchor_generator(cfg, input_shape).num_cell_anchors # fmt: on assert ( len(set(num_anchors)) == 1 ), "Using different number of anchors between levels is not currently supported!" num_anchors = num_anchors[0] cls_subnet = [] bbox_subnet = [] for _ in range(num_convs): cls_subnet.append( Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) cls_subnet.append(nn.ReLU()) bbox_subnet.append( Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)) bbox_subnet.append(nn.ReLU()) self.cls_subnet = nn.Sequential(*cls_subnet) self.bbox_subnet = nn.Sequential(*bbox_subnet) self.cls_score = Conv2d(in_channels, num_anchors * num_classes, kernel_size=3, stride=1, padding=1) self.bbox_pred = Conv2d(in_channels, num_anchors * 4, kernel_size=3, stride=1, padding=1) # Initialization for modules in [ self.cls_subnet, self.bbox_subnet, self.cls_score, self.bbox_pred ]: for layer in modules.modules(): if isinstance(layer, Conv2d): torch.nn.init.normal_(layer.weight, mean=0, std=0.01) torch.nn.init.constant_(layer.bias, 0) # Use prior in model initialization to improve stability bias_value = -math.log((1 - prior_prob) / prior_prob) torch.nn.init.constant_(self.cls_score.bias, bias_value)
def __init__(self, in_features, out_features, in_channels, out_channels, norm='BN'): super().__init__() self.in_features = in_features self.out_features = out_features assert len(in_features) == len(in_channels) in_channel = in_channels[-1] self.out_channels = in_channels self.num_block = 0 for out_feat in out_features[len(in_features):]: conv1x1 = Conv2d( in_channel, out_channels, kernel_size=1, stride=1, padding=0, bias=True, norm=get_norm(norm, out_channels), activation=nn.MaxPool2d(kernel_size=2), ) self.add_module(out_feat, conv1x1) self.num_block += 1 in_channel = out_channels self.out_channels.append(out_channels) self.out_channels = dict(zip(self.out_features, self.out_channels))
def __init__(self, fpn, in_features, out_channels, use_bias=True, norm=""): super(PANetBase, self).__init__() assert isinstance(fpn, Backbone) self.fpn = fpn self.in_features = in_features fpn_shape = fpn.output_shape() in_channels = [fpn_shape[k].channels for k in self.in_features] in_strides = [fpn_shape[k].stride for k in self.in_features] self.lateral_convs = [] self.output_convs = [] for idx, in_channels in enumerate(in_channels[:-1], start=1): t = get_norm(norm, out_channels) lateral_conv = Conv2d( in_channels, out_channels, kernel_size=3, stride=2, padding=1, bias=use_bias, norm=get_norm(norm, out_channels) ) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=get_norm(norm, out_channels) ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(in_strides[idx])) self.add_module(f"panet_lateral{stage}", lateral_conv) self.add_module(f"panet_output{stage}", output_conv) self.lateral_convs.append(lateral_conv) self.output_convs.append(output_conv) self._out_feature_strides = {f"n{int(math.log2(s))}": s for s in in_strides} self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = {k: out_channels for k in self._out_features} self._size_divisibility = in_strides[-1]
def __init__(self, cfg, input_shape: ShapeSpec): super().__init__() num_classes = cfg.META_ARCH.NUM_CLASSES num_conv = cfg.META_ARCH.ROI.BOX_HEAD.NUM_CONV conv_dim = cfg.META_ARCH.ROI.BOX_HEAD.CONV_DIM conv_norm = cfg.META_ARCH.ROI.BOX_HEAD.CONV_NORM num_fc = cfg.META_ARCH.ROI.BOX_HEAD.NUM_FC fc_dim = cfg.META_ARCH.ROI.BOX_HEAD.FC_DIM conv_dims = [conv_dim] * num_conv fc_dims = [fc_dim] * num_fc assert len(conv_dims) + len(fc_dims) > 0 output_size = (input_shape.channels, input_shape.height, input_shape.width) # Conv Subnet self.conv_subnet = [] for k, conv_dim in enumerate(conv_dims): conv = Conv2d( output_size[0], conv_dim, kernel_size=3, padding=1, bias=not conv_norm, norm=get_norm(conv_norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_subnet.append(conv) output_size = (conv_dim, output_size[1], output_size[2]) # FC Subnet self.fc_subnet = [] for k, fc_dim in enumerate(fc_dims): fc = Linear(np.prod(output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fc_subnet.append(fc) output_size = fc_dim # Classification and Localization if isinstance(output_size, int): input_size = output_size else: input_size = output_size[0] * output_size[1] * output_size[2] box_dim = len(cfg.META_ARCH.ROI.TEST.BBOX_REG_WEIGHTS) self.cls_score = Linear(input_size, num_classes + 1) self.bbox_pred = Linear(input_size, num_classes * box_dim) # Initialization for layer in self.conv_subnet: weight_init.c2_msra_fill(layer) for layer in self.fc_subnet: weight_init.c2_xavier_fill(layer)
def __init__(self, in_channels, out_channels, norm='BN', fuse_type='fast_norm'): super().__init__() self.in_features = list(in_channels.keys()) for in_feat in self.in_features[1:]: bu_conv = Conv2d( out_channels, out_channels, kernel_size=1, padding=0, bias=True, norm=get_norm(norm, out_channels), activation=nn.MaxPool2d(kernel_size=2) ) in_conv = Conv2d( in_channels[in_feat], out_channels, kernel_size=1, padding=0, bias=True, norm=get_norm(norm, out_channels), ) self.add_module(f'{in_feat}_bu', bu_conv) self.add_module(f'{in_feat}_in', in_conv) for in_feat in self.in_features[1:-1]: td_conv = Conv2d( out_channels, out_channels, kernel_size=1, padding=0, bias=True, norm=get_norm(norm, out_channels), ) fuse = FuseBlock(out_channels, num_weights=3, norm=norm, fuse_type=fuse_type) self.add_module(f'{in_feat}_td', td_conv) self.add_module(f'{in_feat}_fuse', fuse) fuse = FuseBlock(out_channels, num_weights=2, norm=norm, fuse_type=fuse_type) self.add_module(f'{self.in_features[-1]}_fuse', fuse)
def __init__(self, in_channels=3, out_channels=32, norm="BN"): super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, out_channels), )
def __init__(self, top_channels, bot_channels, out_channels, plan_cfg): super().__init__() self.out_channels = out_channels self.decode_cfg(plan_cfg) self.conv_keys = Conv2d(top_channels, self.num_heads * self.hidden_channels, 1, stride=1, bias=False) self.conv_queries = Conv2d(bot_channels, self.num_heads * self.hidden_channels, 1, stride=1, bias=False) self.conv_values = Conv2d(bot_channels, self.num_heads * self.hidden_channels, 1, stride=1, bias=False) self.conv_fuse = [] prev_channels = self.num_heads * self.hidden_channels for _ in range(self.num_fuse): self.conv_fuse.append(Conv2d(prev_channels, self.out_channels, 1, stride=1, bias=False, activation=nn.ReLU())) prev_channels = self.out_channels self.conv_fuse = nn.Sequential(*self.conv_fuse) for layer in self.modules(): if isinstance(layer, Conv2d): torch.nn.init.normal_(layer.weight, mean=0, std=0.01) if layer.bias is not None : torch.nn.init.constant_(layer.bias, 0)
def __init__(self, cfg, input_shape): """ NOTE: this interface is experimental. """ super().__init__() in_channels = [s.channels for s in input_shape] assert len( set(in_channels)) == 1, "Each level must have the same channel!" in_channels = in_channels[0] anchor_generator = build_anchor_generator(cfg, input_shape) num_anchors = anchor_generator.num_cell_anchors assert ( len(set(num_anchors)) == 1 ), "Each level must have the same number of anchors per spatial position" num_anchors = num_anchors[0] box_dim = anchor_generator.box_dim self.conv = Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1, activation=F.relu) self.cls_subnet = Conv2d(in_channels, num_anchors, kernel_size=1, stride=1) self.bbox_subnet = Conv2d(in_channels, num_anchors * box_dim, kernel_size=1, stride=1) for l in [self.conv, self.cls_subnet, self.bbox_subnet]: nn.init.normal_(l.weight, std=0.01) nn.init.constant_(l.bias, 0)
def __init__(self, input_shape : ShapeSpec, norm='BN', use_bias=True): super().__init__() in_channel = input_shape.channels self.conv = Conv2d( in_channel, in_channel, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=get_norm(norm, in_channel), ) weight_init.c2_xavier_fill(self.conv)
def __init__(self, feat, in_channels, out_channels): super().__init__() self.feat = feat self.in_channels = in_channels self.out_channels = out_channels self.SE_block = SqueezeExcitation2d(in_channels, 32, in_channels) self.conv_block = conv_block = Conv2d(in_channels, out_channels, stride=1, kernel_size=1, padding=0, bias=False)
def __init__(self, cfg): super().__init__(cfg) self.in_features = cfg.META_ARCH.RESNET.IN_FEATURES self.num_classes = cfg.META_ARCH.NUM_CLASSES assert type(self.in_features) == str # Backbone Network self.backbone = build_backbone(cfg) assert self.in_features in self.backbone.out_features, f"'{self.in_features}' is not in backbone({self.backbone.out_features})" backbone_shape = self.backbone.output_shape() feature_shapes = backbone_shape[self.in_features] self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.linear = Conv2d(feature_shapes.channels, self.num_classes, 1, 1)
def __init__(self, in_channels=3, out_channels=64, norm="BN", stem_bias=True): super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=stem_bias, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1)
def __init__( self, in_channels, out_channels, norm="BN", memory_efficient=True, ): super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), activation=MemoryEfficientSwish() if memory_efficient else Swish(), )
def __init__( self, in_channels, out_channels, kernel_size, *, stride=1, expand_dim=1, num_groups=1, norm="BN", dilation=1, se_ratio=0.25, drop_connect_prob=0.2, memory_efficient=True, is_skip=True, ): super().__init__(in_channels, out_channels, stride) self.is_skip = (is_skip and stride == 1 and in_channels != out_channels) self.drop_connect_prob = drop_connect_prob if expand_dim > 1: self.expand_conv = Conv2d( in_channels, in_channels * expand_dim, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, in_channels * expand_dim), activation=MemoryEfficientSwish() if memory_efficient else Swish(), ) self.depthwise_conv = Conv2d( in_channels * expand_dim, in_channels * expand_dim, kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) / 2) * dilation, bias=False, groups=in_channels * expand_dim, dilation=dilation, norm=get_norm(norm, in_channels * expand_dim), activation=MemoryEfficientSwish() if memory_efficient else Swish(), ) if se_ratio > 0: self.SEblock = SqueezeExcitation2d( in_channels * expand_dim, int(in_channels * se_ratio), in_channels * expand_dim, memory_efficient=memory_efficient, ) self.project_conv = Conv2d( in_channels * expand_dim, out_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, out_channels), activation=None, )
def __init__(self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum"): super(FPNBase, self).__init__() assert isinstance(bottom_up, Backbone) in_strides = [bottom_up.out_feature_strides[f] for f in in_features] in_channels = [bottom_up.out_feature_channels[f] for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs = [] output_convs = [] use_bias = norm == "" for idx, in_channels in enumerate(in_channels): lateral_norm = get_norm(norm, out_channels) output_norm = get_norm(norm, out_channels) lateral_conv = Conv2d(in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(in_strides[idx])) self.add_module(f"fpn_lateral{stage}", lateral_conv) self.add_module(f"fpn_output{stage}", output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up self._out_feature_strides = { f"p{int(math.log2(s))}": s for s in in_strides } if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides[f"p{s+1}"] = 2**(s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = { k: out_channels for k in self._out_features } self._size_divisibility = in_strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", stride_in_1x1=False, dilation=1, ): super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: weight_init.c2_msra_fill(layer)