def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, in_channels, out_channels, *, stride=1, norm="BN", activation=None, **kwargs): """ The standard block type for ResNet18 and ResNet34. Args: in_channels (int): Number of input channels. out_channels (int): Number of output channels. stride (int): Stride for the first conv. norm (str or callable): A callable that takes the number of channels and returns a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__(self, input_channels, output_channels, stride, expand_ratio, norm, activation, use_shortcut=True): super(InvertedResBlock, self).__init__() self.stride = stride assert stride in [1, 2] mid_channels = int(round(input_channels * expand_ratio)) self.use_shortcut = use_shortcut if self.use_shortcut: assert stride == 1 assert input_channels == output_channels conv_kwargs = { "norm": get_norm(norm, mid_channels), "activation": get_activation(activation) } layers = [] if expand_ratio > 1: layers.append( Conv2d( input_channels, mid_channels, 1, bias=False, # Pixel-wise non-linear **deepcopy(conv_kwargs))) layers += [ Conv2d( mid_channels, mid_channels, 3, padding=1, bias=False, # Depth-wise 3x3 stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)), Conv2d( mid_channels, output_channels, 1, bias=False, # Pixel-wise linear norm=get_norm(norm, output_channels)) ] self.conv = nn.Sequential(*layers)
def __init__(self, input_channels, output_channels, norm, activation): super().__init__() self.input_channels = input_channels self.output_channels = output_channels self.stride = 2 self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False, norm=get_norm(norm, output_channels), activation=get_activation(activation))
def __init__(self, input_channels, output_channels, stride, expand_ratio, norm, activation, use_shortcut=True): """ Args: input_channels (int): the input channel number. output_channels (int): the output channel number. stride (int): the stride of the current block. expand_ratio(int): the channel expansion ratio for `mid_channels` in InvertedResBlock. norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (See cvpods.layer.get_norm for more details). activation (str): a pre-defined string (See cvpods.layer.get_activation for more details). use_shortcut (bool): whether to use the residual path. """ super(InvertedResBlock, self).__init__() self.stride = stride assert stride in [1, 2] mid_channels = int(round(input_channels * expand_ratio)) self.use_shortcut = use_shortcut if self.use_shortcut: assert stride == 1 assert input_channels == output_channels conv_kwargs = { "norm": get_norm(norm, mid_channels), "activation": get_activation(activation) } layers = [] if expand_ratio > 1: layers.append( Conv2d(input_channels, mid_channels, 1, bias=False, # Pixel-wise non-linear **deepcopy(conv_kwargs)) ) layers += [ Conv2d(mid_channels, mid_channels, 3, padding=1, bias=False, # Depth-wise 3x3 stride=stride, groups=mid_channels, **deepcopy(conv_kwargs)), Conv2d(mid_channels, output_channels, 1, bias=False, # Pixel-wise linear norm=get_norm(norm, output_channels)) ] self.conv = nn.Sequential(*layers)
def __init__( self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32, ): super().__init__() self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__(self, input_channels, output_channels, norm, activation): """ Args: input_channels (int): the input channel number. output_channels (int): the output channel number. norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). activation (str): a pre-defined string (See cvpods.layer.get_activation for more details). """ super().__init__() self.input_channels = input_channels self.output_channels = output_channels self.stride = 2 self.conv = Conv2d(input_channels, output_channels, 3, stride=2, padding=1, bias=False, norm=get_norm(norm, output_channels), activation=get_activation(activation))
def __init__(self, in_channels, out_channels, stride=1, norm="BN", activation=None): super().__init__() if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False, norm=get_norm(norm, out_channels), ) self.conv2 = Conv2d( out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, out_channels), )
def __init__(self, in_channels=3, out_channels=64, norm="BN", activation=None, deep_stem=False, stem_width=32): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). """ super().__init__() self.deep_stem = deep_stem if self.deep_stem: self.conv1_1 = Conv2d( 3, stem_width, kernel_size=3, stride=2, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_2 = Conv2d( stem_width, stem_width, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width), ) self.conv1_3 = Conv2d( stem_width, stem_width * 2, kernel_size=3, stride=1, padding=1, bias=False, norm=get_norm(norm, stem_width * 2), ) for layer in [self.conv1_1, self.conv1_2, self.conv1_3]: if layer is not None: weight_init.c2_msra_fill(layer) else: self.conv1 = Conv2d( in_channels, out_channels, kernel_size=7, stride=2, padding=3, bias=False, norm=get_norm(norm, out_channels), ) weight_init.c2_msra_fill(self.conv1) self.activation = get_activation(activation) self.max_pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, deform_modulated=False, deform_num_groups=1, ): """ Similar to :class:`BottleneckBlock`, but with deformable conv in the 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.deform_modulated = deform_modulated if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) if deform_modulated: deform_conv_op = ModulatedDeformConv # offset channels are 2 or 3 (if with modulated) * kernel_size * kernel_size offset_channels = 27 else: deform_conv_op = DeformConv offset_channels = 18 self.conv2_offset = Conv2d( bottleneck_channels, offset_channels * deform_num_groups, kernel_size=3, stride=stride_3x3, padding=1 * dilation, dilation=dilation, ) self.conv2 = deform_conv_op( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, deformable_groups=deform_num_groups, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) nn.init.constant_(self.conv2_offset.weight, 0) nn.init.constant_(self.conv2_offset.bias, 0)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = Conv2d( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, dilation=1, avd=False, avg_down=False, radix=1, bottleneck_width=64, ): """ Args: norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (one of {"FrozenBN", "BN", "GN"}). stride_in_1x1 (bool): when stride==2, whether to put stride in the first 1x1 convolution or the bottleneck 3x3 convolution. """ super().__init__(in_channels, out_channels, stride) self.avd = avd and (stride > 1) self.avg_down = avg_down self.radix = radix cardinality = num_groups group_width = int(bottleneck_channels * (bottleneck_width / 64.)) * cardinality if in_channels != out_channels: if self.avg_down: self.shortcut_avgpool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False) self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=1, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None # The original MSRA ResNet models have stride in the first 1x1 conv # The subsequent fb.torch.resnet and Caffe2 ResNe[X]t implementations have # stride in the 3x3 conv stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, group_width), ) if self.radix > 1: from .splat import SplAtConv2d self.conv2 = SplAtConv2d( group_width, group_width, kernel_size=3, stride=1 if self.avd else stride_3x3, padding=dilation, dilation=dilation, groups=cardinality, bias=False, radix=self.radix, norm=norm, ) else: self.conv2 = Conv2d( group_width, group_width, kernel_size=3, stride=stride_3x3, padding=1 * dilation, bias=False, groups=num_groups, dilation=dilation, norm=get_norm(norm, group_width), ) if self.avd: self.avd_layer = nn.AvgPool2d(3, stride, padding=1) self.conv3 = Conv2d( group_width, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) if self.radix > 1: for layer in [self.conv1, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer) else: for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, in_channels, out_channels, *, bottleneck_channels, stride=1, num_groups=1, norm="BN", activation=None, stride_in_1x1=False, num_branch=3, dilations=(1, 2, 3), concat_output=False, test_branch_idx=-1, ): """ Args: num_branch (int): the number of branches in TridentNet. dilations (tuple): the dilations of multiple branches in TridentNet. concat_output (bool): if concatenate outputs of multiple branches in TridentNet. Use 'True' for the last trident block. """ super().__init__(in_channels, out_channels, stride) assert num_branch == len(dilations) self.num_branch = num_branch self.concat_output = concat_output self.test_branch_idx = test_branch_idx if in_channels != out_channels: self.shortcut = Conv2d( in_channels, out_channels, kernel_size=1, stride=stride, bias=False, norm=get_norm(norm, out_channels), ) else: self.shortcut = None stride_1x1, stride_3x3 = (stride, 1) if stride_in_1x1 else (1, stride) self.activation = get_activation(activation) self.conv1 = Conv2d( in_channels, bottleneck_channels, kernel_size=1, stride=stride_1x1, bias=False, norm=get_norm(norm, bottleneck_channels), ) self.conv2 = TridentConv( bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride_3x3, paddings=dilations, bias=False, groups=num_groups, dilations=dilations, num_branch=num_branch, test_branch_idx=test_branch_idx, norm=get_norm(norm, bottleneck_channels), ) self.conv3 = Conv2d( bottleneck_channels, out_channels, kernel_size=1, bias=False, norm=get_norm(norm, out_channels), ) for layer in [self.conv1, self.conv2, self.conv3, self.shortcut]: if layer is not None: # shortcut can be None weight_init.c2_msra_fill(layer)
def __init__( self, stem, inverted_residual_setting, norm, activation, num_classes=None, out_features=None, ): """ See: https://arxiv.org/pdf/1801.04381.pdf Args: stem (nn.Module): a stem module inverted_residual_setting(list of list): Network structure. (See https://arxiv.org/pdf/1801.04381.pdf Table 2) norm (str or callable): a callable that takes the number of channels and return a `nn.Module`, or a pre-defined string (See cvpods.layer.get_norm for more details). activation (str): a pre-defined string (See cvpods.layer.get_activation for more details). num_classes (None or int): if None, will not perform classification. out_features (list[str]): name of the layers whose outputs should be returned in forward. Can be anything in "stem", "linear", or "MobileNetV23" ... If None, will return the output of the last layer. """ super(MobileNetV2, self).__init__() self.num_classes = num_classes # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting[0]) != 4: raise ValueError( "inverted_residual_setting should be a " "4-element list, got {}".format(inverted_residual_setting)) self.stem = stem self.last_channel = 1280 input_channels = stem.output_channels current_stride = stem.stride self._out_feature_strides = {"stem": current_stride} self._out_feature_channels = {"stem": input_channels} # ---------------- Stages --------------------- # ext = 0 self.stages_and_names = [] for i, (t, c, n, s) in enumerate(inverted_residual_setting): # t: expand ratio # c: output channels # n: block number # s: stride # See https://arxiv.org/pdf/1801.04381.pdf Table 2 for more details if s == 1 and i > 0: ext += 1 else: ext = 0 current_stride *= s assert int(np.log2(current_stride)) == np.log2(current_stride) name = "mobile" + str(int(np.log2(current_stride))) if ext != 0: name += "-{}".format(ext + 1) stage = nn.Sequential( *make_stage(n, input_channels, c, s, t, norm, activation)) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = c self.add_module(name, stage) self.stages_and_names.append((stage, name)) input_channels = c name = "mobile" + str(int(np.log2(current_stride))) + "-last" stage = Conv2d(input_channels, self.last_channel, kernel_size=1, bias=False, norm=get_norm("BN", self.last_channel), activation=get_activation(activation)) self.stages_and_names.append((stage, name)) self.add_module(name, stage) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = self.last_channel # ---------------- Classifer ------------------- # if num_classes is not None: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.2) self.classifier = nn.Linear(self.last_channel, num_classes) name = "linear" self._out_features = [name] if out_features is None else out_features self._initialize_weights()
def __init__(self, stem, inverted_residual_setting, norm, activation, num_classes=None, out_features=None, init_model=False): """ MobileNet V2 main class Args: num_classes (int): Number of classes inverted_residual_setting: Network structure """ super(MobileNetV2, self).__init__() self.num_classes = num_classes # only check the first element, assuming user knows t,c,n,s are required if len(inverted_residual_setting[0]) != 4: raise ValueError( "inverted_residual_setting should be a " "4-element list, got {}".format(inverted_residual_setting)) self.stem = stem self.last_channel = 1280 input_channels = stem.output_channels current_stride = stem.stride self._out_feature_strides = {"stem": current_stride} self._out_feature_channels = {"stem": input_channels} # ---------------- Stages --------------------- # ext = 0 self.stages_and_names = [] for i, (t, c, n, s) in enumerate(inverted_residual_setting): # t: expand ratio # c: output channels # n: block number # s: stride # See https://arxiv.org/pdf/1801.04381.pdf Table 2 for more details if s == 1 and i > 0: ext += 1 else: ext = 0 current_stride *= s assert int(np.log2(current_stride)) == np.log2(current_stride) name = "mobile" + str(int(np.log2(current_stride))) if ext != 0: name += "-{}".format(ext + 1) stage = nn.Sequential( *make_stage(n, input_channels, c, s, t, norm, activation)) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = c self.add_module(name, stage) self.stages_and_names.append((stage, name)) input_channels = c name = "mobile" + str(int(np.log2(current_stride))) + "-last" stage = Conv2d(input_channels, self.last_channel, kernel_size=1, bias=False, norm=get_norm("BN", self.last_channel), activation=get_activation(activation)) self.stages_and_names.append((stage, name)) self.add_module(name, stage) self._out_feature_strides[name] = current_stride self._out_feature_channels[name] = self.last_channel # ---------------- Classifer ------------------- # if num_classes is not None: self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.2) self.classifier = nn.Linear(self.last_channel, num_classes) name = "linear" self._out_features = [name] if out_features is None else out_features if init_model: self._initialize_weights()