def __init__(self, cfg, input_shape: List[ShapeSpec]): super().__init__() in_channels = input_shape[0].channels num_classes = cfg.MODEL.EFFICIENTDET.NUM_CLASSES norm = cfg.MODEL.EFFICIENTDET.HEAD.NORM bn_momentum = cfg.MODEL.EFFICIENTDET.HEAD.BN_MOMENTUM bn_eps = cfg.MODEL.EFFICIENTDET.HEAD.BN_EPS prior_prob = cfg.MODEL.EFFICIENTDET.HEAD.PRIOR_PROB memory_efficient = cfg.MODEL.EFFICIENTDET.HEAD.MEMORY_EFFICIENT_SWISH num_conv_layers = cfg.MODEL.EFFICIENTDET.HEAD.NUM_CONV num_anchors = cfg.build_anchor_generator(cfg, input_shape).num_cell_anchors self.bn_momentum = bn_momentum self.bn_eps = bn_eps self.prior_prob = prior_prob assert ( len(set(num_anchors)) == 1 ), "Using different number of anchors between levels is not currently supported!" num_anchors = num_anchors[0] self.cls_subnet = nn.ModuleList([]) self.bbox_subnet = nn.ModuleList([]) for _ in range(num_conv_layers): self.cls_subnet.append( SeparableConvBlock(in_channels, in_channels, kernel_size=3, padding="SAME")) self.bbox_subnet.append( SeparableConvBlock(in_channels, in_channels, kernel_size=3, padding="SAME")) num_levels = len(input_shape) self.bn_cls_subnet = nn.ModuleList() self.bn_bbox_subnet = nn.ModuleList() for _ in range(num_levels): self.bn_cls_subnet.append( nn.ModuleList([ get_norm(norm, in_channels) for _ in range(num_conv_layers) ])) self.bn_bbox_subnet.append( nn.ModuleList([ get_norm(norm, in_channels) for _ in range(num_conv_layers) ])) self.cls_score = SeparableConvBlock(in_channels, num_anchors * num_classes, kernel_size=3, padding="SAME") self.bbox_pred = SeparableConvBlock(in_channels, num_anchors * 4, kernel_size=3, padding="SAME") self.act = MemoryEfficientSwish() if memory_efficient else Swish() self._init_weights()
def __init__(self, block_args, global_params): """ Args: block_args (EasyDict): block args, see: class: `EfficientNet`. global_params (EasyDict): global args, see: class: `EfficientNet`. """ super().__init__() self._block_args = block_args self.has_se = (block_args.se_ratio is not None) and (0 < block_args.se_ratio <= 1) self.id_skip = block_args.id_skip # Expansion phase # number of input channels inp = block_args.in_channels # number of output channels oup = block_args.in_channels * block_args.expand_ratio if block_args.expand_ratio != 1: self._expand_conv = Conv2d(in_channels=inp, out_channels=oup, kernel_size=1, padding=0, bias=False) self._bn0 = get_norm(global_params.norm, out_channels=oup) # Depthwise convolution phase k = block_args.kernel_size s = block_args.stride self._depthwise_conv = Conv2d(in_channels=oup, out_channels=oup, groups=oup, kernel_size=k, stride=s, padding="SAME", bias=False) self._bn1 = get_norm(global_params.norm, out_channels=oup) # Squeeze and Excitation layer, if desired if self.has_se: num_squeezed_channels = max( 1, int(block_args.in_channels * block_args.se_ratio)) self._se_reduce = Conv2d(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1, padding=0) self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1, padding=0) # Output phase final_oup = block_args.out_channels self._project_conv = Conv2d(in_channels=oup, out_channels=final_oup, kernel_size=1, padding=0, bias=False) self._bn2 = get_norm(global_params.norm, final_oup) self._swish = MemoryEfficientSwish()
def __init__(self, in_channels, out_channels, *, stride=1, norm="BN", activation=None, **kwargs): """ The standard block type for ResNet18 and ResNet34. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. stride (int): Stride for the first conv. norm (str or callable): A callable that takes the number of channels and returns a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__(self, in_channels, channels, kernel_size, stride=(1, 1), padding=(0, 0), dilation=(1, 1), groups=1, bias=True, radix=2, reduction_factor=4, rectify=False, rectify_avg=False, norm=None, dropblock_prob=0.0, **kwargs): super(SplAtConv2d, self).__init__() padding = _pair(padding) self.rectify = rectify and (padding[0] > 0 or padding[1] > 0) self.rectify_avg = rectify_avg inter_channels = max(in_channels * radix // reduction_factor, 32) self.radix = radix self.cardinality = groups self.channels = channels self.dropblock_prob = dropblock_prob if self.rectify: self.conv = RFConv2d(in_channels, channels * radix, kernel_size, stride, padding, dilation, groups=groups * radix, bias=bias, average_mode=rectify_avg, **kwargs) else: self.conv = Conv2d(in_channels, channels * radix, kernel_size, stride, padding, dilation, groups=groups * radix, bias=bias, **kwargs) self.use_bn = norm is not None self.bn0 = get_norm(norm, channels * radix) self.relu = ReLU(inplace=True) self.fc1 = Conv2d(channels, inter_channels, 1, groups=self.cardinality) self.bn1 = get_norm(norm, inter_channels) self.fc2 = Conv2d(inter_channels, channels * radix, 1, groups=self.cardinality) if dropblock_prob > 0.0: self.dropblock = DropBlock2D(dropblock_prob, 3)
def __init__(self, C_in, C_out, kernel_size, stride, padding, norm_layer, affine=True, input_size=None): super(SepConv, self).__init__() self.op = nn.Sequential( # depth wise Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False), # point wise Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False, norm=get_norm(norm_layer, C_in), activation=nn.ReLU()), # stack 2 separate depthwise-conv. Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, groups=C_in, bias=False), Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False, norm=get_norm(norm_layer, C_out))) self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in, C_out, affine, input_size[0], input_size[1]) # using Kaiming init for m in self.op.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0)
def __init__(self, input_channels, output_channels, stride, expand_ratio, norm, activation, use_shortcut=True): super(InvertedResBlock, self).__init__() self.stride = stride assert stride in [1, 2] mid_channels = int(round(input_channels * expand_ratio)) self.use_shortcut = use_shortcut if self.use_shortcut: assert stride == 1 assert input_channels == output_channels conv_kwargs = { "norm": get_norm(norm, mid_channels), "activation": get_activation(activation) } layers = [] if expand_ratio > 1: layers.append( Conv2d( input_channels, mid_channels, 1, bias=False, # Pixel-wise non-linear **deepcopy(conv_kwargs))) layers += [ Conv2d( mid_channels, mid_channels, 3, padding=1, bias=False, # Depth-wise 3x3 stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)), Conv2d( mid_channels, output_channels, 1, bias=False, # Pixel-wise linear norm=get_norm(norm, output_channels)) ] self.conv = nn.Sequential(*layers)
def __init__(self, C_in, C_out, kernel_size, stride, padding, norm_layer, expansion=4, affine=True, input_size=None): super(MBConv, self).__init__() self.hidden_dim = expansion * C_in self.op = nn.Sequential( # pw Conv2d(C_in, self.hidden_dim, 1, 1, 0, bias=False, norm=get_norm(norm_layer, self.hidden_dim), activation=nn.ReLU()), # dw Conv2d(self.hidden_dim, self.hidden_dim, kernel_size, stride, padding, groups=self.hidden_dim, bias=False, norm=get_norm(norm_layer, self.hidden_dim), activation=nn.ReLU()), # pw-linear without ReLU! Conv2d(self.hidden_dim, C_out, 1, 1, 0, bias=False, norm=get_norm(norm_layer, C_out))) self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in, C_out, affine, input_size[0], input_size[1]) # using Kaiming init for m in self.op.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super(MaskBranch, self).__init__() # fmt: off norm = cfg.MODEL.CONDINST.MASK_BRANCH.NORM in_features = cfg.MODEL.CONDINST.MASK_BRANCH.IN_FEATURES num_convs = cfg.MODEL.CONDINST.MASK_BRANCH.NUM_CONVS channels = cfg.MODEL.CONDINST.MASK_BRANCH.BRANCH_CHANNELS fpn_out_features = cfg.MODEL.FCOS.IN_FEATURES self.num_outputs = cfg.MODEL.CONDINST.MASK_BRANCH.OUT_CHANNELS # fmt: on self.in_features_inds = [ fpn_out_features.index(in_fea_key) for in_fea_key in in_features ] self.out_stride = input_shape[in_features[0]].stride self.refine = nn.ModuleList() for in_feature in in_features: refine_i = [ nn.Conv2d(input_shape[in_feature].channels, channels, kernel_size=3, stride=1, padding=1, bias=(norm is None)), get_norm(norm, channels), nn.ReLU(inplace=True) ] self.refine.append(nn.Sequential(*refine_i)) mask_subnet = [] for _ in range(num_convs): mask_subnet.append( nn.Conv2d(channels, channels, kernel_size=3, stride=1, padding=1, bias=(norm is None))) mask_subnet.append(get_norm(norm, channels)) mask_subnet.append(nn.ReLU(inplace=True)) mask_subnet.append(nn.Conv2d(channels, max(self.num_outputs, 1), 1)) self.mask_subnet = nn.Sequential(*mask_subnet) # Initialization for modules in [self.refine, self.mask_subnet]: for layer in modules.modules(): if isinstance(layer, nn.Conv2d): torch.nn.init.kaiming_uniform_(layer.weight, a=1)
def _init_layers(self): ins_convs = [] cate_convs = [] for i in range(self.stacked_convs): # Mask branch chn = self.in_channels + 2 if i == 0 else self.seg_feat_channels ins_convs.append( nn.Conv2d(chn, self.seg_feat_channels, kernel_size=3, stride=1, padding=1, bias=False if self.norm else True) ) if self.norm: ins_convs.append(get_norm(self.norm, self.seg_feat_channels)) ins_convs.append(nn.ReLU(inplace=True)) # Category branch chn = self.in_channels if i == 0 else self.seg_feat_channels cate_convs.append( nn.Conv2d(chn, self.seg_feat_channels, kernel_size=3, stride=1, padding=1, bias=False if self.norm else True) ) if self.norm: cate_convs.append(get_norm(self.norm, self.seg_feat_channels)) cate_convs.append(nn.ReLU(inplace=True)) self.ins_convs = nn.Sequential(*ins_convs) self.cate_convs = nn.Sequential(*cate_convs) self.solo_ins_list = nn.ModuleList() for seg_num_grid in self.seg_num_grids: self.solo_ins_list.append( nn.Conv2d( self.seg_feat_channels, seg_num_grid ** 2, kernel_size=1) ) self.solo_cate = nn.Conv2d( self.seg_feat_channels, self.num_classes, kernel_size=3, stride=1, padding=1, )
def __init__(self, C_in, C_out, kernel_size, stride, padding, norm_layer, affine=True, input_size=None): super(BasicResBlock, self).__init__() self.op = Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False, norm=get_norm(norm_layer, C_out)) self.flops = self.get_flop([kernel_size, kernel_size], stride, C_in, C_out, affine, input_size[0], input_size[1]) # using Kaiming init for m in self.op.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0)
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, cfg): super(Classification, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.network = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) self.network.stem = nn.Sequential( Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm("BN", 64)), nn.ReLU(), ) self.loss_evaluator = nn.CrossEntropyLoss() pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
def _make_layers(self, num_blocks, **kwargs): """ Create a vgg-net stage by creating many blocks(conv layers). Args: num_blocks (int): the number of conv layer in the stage. kwargs: other arguments, see: method:`__init__`. Returns: list[nn.Module]: a list of block module. """ blocks = list() for _ in range(num_blocks): conv2d = nn.Conv2d(kwargs["in_channels"], kwargs["out_channels"], kernel_size=3, padding=1) if kwargs["norm"]: blocks += [ conv2d, get_norm(kwargs["norm"], kwargs["out_channels"]), nn.ReLU(inplace=True) ] else: blocks += [conv2d, nn.ReLU(inplace=True)] kwargs["in_channels"] = kwargs["out_channels"] pool = nn.MaxPool2d(kernel_size=kwargs["pool_args"][0], stride=kwargs["pool_args"][1], padding=kwargs["pool_args"][2], ceil_mode=kwargs["pool_args"][3]) blocks.append(pool) return blocks
def __init__(self, cfg): super(Classification, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.network = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) self.network.stem = nn.Sequential( Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)), nn.ReLU(), ) self.freeze() self.network.eval() # init the fc layer self.network.linear.weight.data.normal_(mean=0.0, std=0.01) self.network.linear.bias.data.zero_() self.loss_evaluator = nn.CrossEntropyLoss() pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 1, 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 1, 3, 1, 1) self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std self.to(self.device)
def __init__(self, input_channels, output_channels, stride, expand_ratio, norm, activation, use_shortcut=True): """ Args: input_channels (int): the input channel number. output_channels (int): the output channel number. stride (int): the stride of the current block. expand_ratio(int): the channel expansion ratio for `mid_channels` in InvertedResBlock. norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (See cvpods.layer.get_norm for more details). activation (str): a pre-defined string (See cvpods.layer.get_activation for more details). use_shortcut (bool): whether to use the residual path. """ super(InvertedResBlock, self).__init__() self.stride = stride assert stride in [1, 2] mid_channels = int(round(input_channels * expand_ratio)) self.use_shortcut = use_shortcut if self.use_shortcut: assert stride == 1 assert input_channels == output_channels conv_kwargs = { "norm": get_norm(norm, mid_channels), "activation": get_activation(activation) } layers = [] if expand_ratio > 1: layers.append( Conv2d(input_channels, mid_channels, 1, bias=False, # Pixel-wise non-linear **deepcopy(conv_kwargs)) ) layers += [ Conv2d(mid_channels, mid_channels, 3, padding=1, bias=False, # Depth-wise 3x3 stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)), Conv2d(mid_channels, output_channels, 1, bias=False, # Pixel-wise linear norm=get_norm(norm, output_channels)) ] self.conv = nn.Sequential(*layers)
def __init__(self, cfg): super(SimSiam, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) self.proj_dim = cfg.MODEL.BYOL.PROJ_DIM self.pred_dim = cfg.MODEL.BYOL.PRED_DIM self.out_dim = cfg.MODEL.BYOL.OUT_DIM self.total_steps = cfg.SOLVER.LR_SCHEDULER.MAX_ITER * cfg.SOLVER.BATCH_SUBDIVISIONS # create the encoders # num_classes is the output fc dimension cfg.MODEL.RESNETS.NUM_CLASSES = self.out_dim self.encoder = cfg.build_backbone( cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) self.encoder.stem = nn.Sequential( Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(cfg.MODEL.RESNETS.NORM, 64)), nn.ReLU(), ) self.size_divisibility = self.encoder.size_divisibility dim_mlp = self.encoder.linear.weight.shape[1] # Projection Head self.encoder.linear = nn.Sequential( nn.Linear(dim_mlp, self.proj_dim), nn.SyncBatchNorm(self.proj_dim), nn.ReLU(), nn.Linear(self.proj_dim, self.proj_dim), nn.SyncBatchNorm(self.proj_dim), ) # Predictor self.predictor = nn.Sequential( nn.Linear(self.proj_dim, self.pred_dim), nn.SyncBatchNorm(self.pred_dim), nn.ReLU(), nn.Linear(self.pred_dim, self.out_dim), ) pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 1, 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 1, 3, 1, 1) self.normalizer = lambda x: (x / 255.0 - pixel_mean) / pixel_std self.to(self.device)
def __init__( self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32, ): super().__init__() self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, in_channels, out_channels, stride=1, norm="BN", activation=None): super().__init__() if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), )
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv: the number of conv layers conv_dim: the dimension of the conv layers norm: normalization for the conv layers """ super(MaskRCNNConvUpsampleHead, self).__init__() # fmt: off num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES conv_dims = cfg.MODEL.ROI_MASK_HEAD.CONV_DIM self.norm = cfg.MODEL.ROI_MASK_HEAD.NORM num_conv = cfg.MODEL.ROI_MASK_HEAD.NUM_CONV input_channels = input_shape.channels cls_agnostic_mask = cfg.MODEL.ROI_MASK_HEAD.CLS_AGNOSTIC_MASK # fmt: on self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( input_channels if k == 0 else conv_dims, conv_dims, kernel_size=3, stride=1, padding=1, bias=not self.norm, norm=get_norm(self.norm, conv_dims), activation=F.relu, ) self.add_module("mask_fcn{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self.deconv = ConvTranspose2d( conv_dims if num_conv > 0 else input_channels, conv_dims, kernel_size=2, stride=2, padding=0, ) num_mask_classes = 1 if cls_agnostic_mask else num_classes self.predictor = Conv2d(conv_dims, num_mask_classes, kernel_size=1, stride=1, padding=0) for layer in self.conv_norm_relus + [self.deconv]: weight_init.c2_msra_fill(layer) # use normal distribution initialization for mask prediction layer nn.init.normal_(self.predictor.weight, std=0.001) if self.predictor.bias is not None: nn.init.constant_(self.predictor.bias, 0)
def __init__(self, input_channels, output_channels, norm, activation): super().__init__() self.input_channels = input_channels self.output_channels = output_channels self.stride = 2 self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False, norm=get_norm(norm, output_channels), activation=get_activation(activation))
def __init__(self, cfg, input_shape: ShapeSpec): """ The following attributes are parsed from config: num_conv, num_fc: the number of conv/fc layers conv_dim/fc_dim: the dimension of the conv/fc layers norm: normalization for the conv layers """ super().__init__() # fmt: off num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM norm = cfg.MODEL.ROI_BOX_HEAD.NORM # fmt: on assert num_conv + num_fc > 0 self._output_size = (input_shape.channels, input_shape.height, input_shape.width) self.conv_norm_relus = [] for k in range(num_conv): conv = Conv2d( self._output_size[0], conv_dim, kernel_size=3, padding=1, bias=not norm, norm=get_norm(norm, conv_dim), activation=F.relu, ) self.add_module("conv{}".format(k + 1), conv) self.conv_norm_relus.append(conv) self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) self.fcs = [] for k in range(num_fc): fc = nn.Linear(np.prod(self._output_size), fc_dim) self.add_module("fc{}".format(k + 1), fc) self.fcs.append(fc) self._output_size = fc_dim for layer in self.conv_norm_relus: weight_init.c2_msra_fill(layer) for layer in self.fcs: weight_init.c2_xavier_fill(layer)
def __init__(self, in_channels, out_channels, norm="BN"): """ Args: in_channels (int): the number of input tensor channels. out_channels (int): the number of output tensor channels. norm (str): the normalization to use. """ super().__init__() self.num_levels = 2 self.in_feature = "stage8" self.p6_conv = Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, norm=get_norm(norm, out_channels), activation=None) self.down_sampling = MaxPool2d(kernel_size=3, stride=2, padding="SAME")
def __init__(self, in_channels: int, out_channels: int, num_convs: int, kernel_size: int = 1, padding: int = 0, stride: int = 1, num_groups: int = 1, norm: str = "GN", gate_activation: str = "ReTanH", gate_activation_kargs: dict = None, depthwise: bool = False): super(DynamicConv2D, self).__init__() if depthwise: assert in_channels == out_channels self.num_groups = num_groups self.norm = norm self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size self.depthwise = depthwise convs = [] for _ in range(num_convs): convs += [ nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, groups=in_channels if depthwise else 1), get_norm(norm, in_channels) ] in_channels = out_channels self.convs = nn.Sequential(*convs) self.gate = SpatialGate(in_channels, num_groups=num_groups, kernel_size=kernel_size, padding=padding, stride=stride, gate_activation=gate_activation, gate_activation_kargs=gate_activation_kargs, get_running_cost=self.get_running_cost) self.init_parameters()
def __init__(self, input_channels, output_channels, norm, activation): """ Args: input_channels (int): the input channel number. output_channels (int): the output channel number. norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). activation (str): a pre-defined string (See cvpods.layer.get_activation for more details). """ super().__init__() self.input_channels = input_channels self.output_channels = output_channels self.stride = 2 self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False, norm=get_norm(norm, output_channels), activation=get_activation(activation))
def __init__(self, C_in, C_out, norm_layer, affine=True, input_size=None): super(Identity, self).__init__() if C_in == C_out: self.change = False self.flops = 0.0 else: self.change = True self.op = Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False, norm=get_norm(norm_layer, C_out)) self.flops = self.get_flop([1, 1], 1, C_in, C_out, affine, input_size[0], input_size[1]) # using Kaiming init for m in self.op.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0)
def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): super().__init__() # fmt: off self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES feature_strides = {k: v.stride for k, v in input_shape.items()} # noqa:F841 feature_channels = {k: v.channels for k, v in input_shape.items()} feature_resolution = { k: np.array([v.height, v.width]) for k, v in input_shape.items() } self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES norm = cfg.MODEL.SEM_SEG_HEAD.NORM self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT self.cal_flops = cfg.MODEL.CAL_FLOPS self.real_flops = 0.0 # fmt: on self.layer_decoder_list = nn.ModuleList() # set affine in BatchNorm if 'Sync' in norm: affine = True else: affine = False # use simple decoder for _feat in self.in_features: res_size = feature_resolution[_feat] in_channel = feature_channels[_feat] if _feat == 'layer_0': out_channel = in_channel else: out_channel = in_channel // 2 conv_1x1 = Conv2d(in_channel, out_channel, kernel_size=1, stride=1, padding=0, bias=False, norm=get_norm(norm, out_channel), activation=nn.ReLU()) self.real_flops += cal_op_flops.count_ConvBNReLU_flop( res_size[0], res_size[1], in_channel, out_channel, [1, 1], is_affine=affine) self.layer_decoder_list.append(conv_1x1) # using Kaiming init for layer in self.layer_decoder_list: for m in layer.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0) in_channel = feature_channels['layer_0'] # the output layer self.predictor = Conv2d(in_channels=in_channel, out_channels=num_classes, kernel_size=3, stride=1, padding=1) self.real_flops += cal_op_flops.count_Conv_flop( feature_resolution['layer_0'][0], feature_resolution['layer_0'][1], in_channel, num_classes, [3, 3]) # using Kaiming init for m in self.predictor.modules(): if isinstance(m, nn.Conv2d): weight_init.kaiming_init(m, mode='fan_in') elif isinstance(m, (nn.BatchNorm2d, nn.SyncBatchNorm)): if m.weight is not None: nn.init.constant_(m.weight, 1) if m.bias is not None: nn.init.constant_(m.bias, 0)
def __init__( self, bottom_up, in_features, out_channels, norm="", top_block=None, fuse_type="sum" ): """ Args: bottom_up (Backbone): module representing the bottom up subnetwork. Must be a subclass of :class:`Backbone`. The multi-scale feature maps generated by the bottom up network, and listed in `in_features`, are used to generate FPN levels. in_features (list[str]): names of the input feature maps coming from the backbone to which FPN is attached. For example, if the backbone produces ["res2", "res3", "res4"], any *contiguous* sublist of these may be used; order must be from high to low resolution. out_channels (int): number of channels in the output feature maps. norm (str): the normalization to use. top_block (nn.Module or None): if provided, an extra operation will be performed on the output of the last (smallest resolution) FPN output, and the result will extend the result list. The top_block further downsamples the feature map. It must have an attribute "num_levels", meaning the number of extra FPN levels added by this block, and "in_feature", which is a string representing its input feature (e.g., p5). fuse_type (str): types for fusing the top down features and the lateral ones. It can be "sum" (default), which sums up element-wise; or "avg", which takes the element-wise mean of the two. """ super(FPN, self).__init__() assert isinstance(bottom_up, Backbone) # Feature map strides and channels from the bottom up network (e.g. ResNet) input_shapes = bottom_up.output_shape() in_strides = [input_shapes[f].stride for f in in_features] in_channels = [input_shapes[f].channels for f in in_features] _assert_strides_are_log2_contiguous(in_strides) lateral_convs = [] output_convs = [] use_bias = norm == "" for idx, in_channels in enumerate(in_channels): lateral_norm = get_norm(norm, out_channels) output_norm = get_norm(norm, out_channels) lateral_conv = Conv2d( in_channels, out_channels, kernel_size=1, bias=use_bias, norm=lateral_norm ) output_conv = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=use_bias, norm=output_norm, ) weight_init.c2_xavier_fill(lateral_conv) weight_init.c2_xavier_fill(output_conv) stage = int(math.log2(in_strides[idx])) self.add_module("fpn_lateral{}".format(stage), lateral_conv) self.add_module("fpn_output{}".format(stage), output_conv) lateral_convs.append(lateral_conv) output_convs.append(output_conv) # Place convs into top-down order (from low to high resolution) # to make the top-down computation in forward clearer. self.lateral_convs = lateral_convs[::-1] self.output_convs = output_convs[::-1] self.top_block = top_block self.in_features = in_features self.bottom_up = bottom_up # Return feature names are "p<stage>", like ["p2", "p3", ..., "p6"] self._out_feature_strides = {"p{}".format(int(math.log2(s))): s for s in in_strides} # top block output feature maps. if self.top_block is not None: for s in range(stage, stage + self.top_block.num_levels): self._out_feature_strides["p{}".format(s + 1)] = 2 ** (s + 1) self._out_features = list(self._out_feature_strides.keys()) self._out_feature_channels = {k: out_channels for k in self._out_features} self._size_divisibility = in_strides[-1] assert fuse_type in {"avg", "sum"} self._fuse_type = fuse_type
def __init__(self, in_channels, channels, num_classes=None, dropout=False, out_features=None, norm="BN"): """ See: https://arxiv.org/pdf/1903.11752.pdf Args: num_blocks (int): the number of blocks in this stage. in_channels (int): the input channel number. channels (int): output channel numbers for stem and every stages. num_classes (None or int): if None, will not perform classification. dropout (bool): whether to use dropout. out_features (list[str]): name of the layers whose outputs should be returned in forward. Can be anything in "stem", "linear", or "snet3" ... If None, will return the output of the last layer. norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (See cvpods.layer.get_norm for more details). """ super(ShuffleNetV2, self).__init__() self.stage_out_channels = channels self.num_classes = num_classes # ---------------- Stem ---------------------- # input_channels = self.stage_out_channels[0] self.stem = nn.Sequential(*[ Conv2d( in_channels, input_channels, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, input_channels), activation=nn.ReLU(inplace=True), ), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ]) # TODO: use a stem class and property stride current_stride = 4 self._out_feature_strides = {"stem": current_stride} self._out_feature_channels = {"stem": input_channels} # ---------------- Stages --------------------- # self.stage_num_blocks = [4, 8, 4] self.stages_and_names = [] for i in range(len(self.stage_num_blocks)): num_blocks = self.stage_num_blocks[i] output_channels = self.stage_out_channels[i + 1] name = "snet" + str(i + 3) block_list = make_stage(num_blocks, input_channels, output_channels, norm) current_stride = current_stride * np.prod( [block.stride for block in block_list]) stages = nn.Sequential(*block_list) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = output_channels self.add_module(name, stages) self.stages_and_names.append((stages, name)) input_channels = output_channels if len(self.stage_out_channels) == len(self.stage_num_blocks) + 2: name = "snet" + str(len(self.stage_num_blocks) + 2) + "-last" last_output_channels = self.stage_out_channels[-1] last_conv = Conv2d(output_channels, last_output_channels, kernel_size=1, bias=False, norm=get_norm(norm, last_output_channels), activation=nn.ReLU(inplace=True)) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = last_output_channels self.add_module(name, last_conv) self.stages_and_names.append((last_conv, name)) # ---------------- Classifer ------------------- # if num_classes is not None: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = dropout if dropout: self.dropout = nn.Dropout(0.2) self.classifier = nn.Linear(self.stage_out_channels[-1], num_classes, bias=False) name = "linear" self._out_features = [name] if out_features is None else out_features self._initialize_weights()
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.deep_stem = deep_stem if self.deep_stem: self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) else: self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, deform_modulated=False, deform_num_groups=1, ): """ Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.deform_modulated = deform_modulated if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) if deform_modulated: deform_conv_op = ModulatedDeformConv # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size offset_channels = 27 else: deform_conv_op = DeformConv offset_channels = 18 self.conv2_offset = Conv2d( bottleneck_channels, offset_channels * deform_num_groups, kernel_size=3, stride=stride_3x3, padding=1 * dilation, dilation=dilation, ) self.conv2 = deform_conv_op( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, deformable_groups=deform_num_groups, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) nn.init.constant_(self.conv2_offset.weight, 0) nn.init.constant_(self.conv2_offset.bias, 0)