def __init__( self, in_channels, out_channels, dw_kernel_size=3, stride=1, dilation=1, use_se=False, expand_ratio=1.0, # expansion keep_prob=1, # drop connect param noskip=False, norm_layer=ABN, norm_act="relu", ): super().__init__() mid_chs = make_divisible(in_channels * expand_ratio) self.has_residual = (in_channels == out_channels and stride == 1) and not noskip self.has_expansion = expand_ratio != 1 if self.has_expansion: self.conv_pw = conv1x1(in_channels, mid_chs) self.bn1 = norm_layer(mid_chs, activation=norm_act) self.conv_dw = nn.Conv2d( mid_chs, mid_chs, dw_kernel_size, stride=stride, groups=mid_chs, dilation=dilation, bias=False, padding=dilation * (dw_kernel_size - 1) // 2, ) self.bn2 = norm_layer(mid_chs, activation=norm_act) # some models like MobileNet use mid_chs here instead of in_channels. But I don't care for now self.se = SEModule(mid_chs, in_channels // 4, norm_act) if use_se else nn.Identity() self.conv_pw1 = conv1x1(mid_chs, out_channels) self.bn3 = norm_layer(out_channels, activation="identity") self.drop_connect = DropConnect( keep_prob) if keep_prob < 1 else nn.Identity()
def __init__( self, blocks_args=None, width_multiplier=None, depth_multiplier=None, pretrained=None, # not used. here for proper signature num_classes=1000, in_channels=3, output_stride=32, encoder=False, drop_rate=0, drop_connect_rate=0, stem_size=32, norm_layer="abn", norm_act="swish", match_tf_same_padding=False, ): super().__init__() norm_layer = bn_from_name(norm_layer) self.norm_layer = norm_layer self.norm_act = norm_act self.width_multiplier = width_multiplier self.depth_multiplier = depth_multiplier stem_size = make_divisible(stem_size * width_multiplier) self.conv_stem = conv3x3(in_channels, stem_size, stride=2) self.bn1 = norm_layer(stem_size, activation=norm_act) in_channels = stem_size self.blocks = nn.ModuleList([]) # modify block args to account for output_stride strategy blocks_args = _patch_block_args(blocks_args, output_stride) for block_idx, block_arg in enumerate(blocks_args): block = [] block_arg["in_channels"] = make_divisible(block_arg["in_channels"] * self.width_multiplier) block_arg["out_channels"] = make_divisible(block_arg["out_channels"] * self.width_multiplier) block_arg["norm_layer"] = norm_layer block_arg["norm_act"] = norm_act # linearly scale keep prob block_arg["keep_prob"] = 1 - drop_connect_rate * block_idx / len(blocks_args) repeats = block_arg.pop("num_repeat") repeats = int(math.ceil(repeats * self.depth_multiplier)) # when dilating conv with stride 2 we want it to have dilation // 2 # it prevents checkerboard artifacts with OS=16 and OS=8 dilation = block_arg.get("dilation", 1) # save block values if block_arg.pop("no_first_dilation", False): block_arg["dilation"] = max(1, block_arg["dilation"] // 2) block.append(InvertedResidual(**block_arg)) # only first layer in block is strided block_arg["stride"] = 1 block_arg["dilation"] = dilation block_arg["in_channels"] = block_arg["out_channels"] for _ in range(repeats - 1): block.append(InvertedResidual(**block_arg)) self.blocks.append(nn.Sequential(*block)) # Head if encoder: self.forward = self.encoder_features else: out_channels = block_arg["out_channels"] num_features = make_divisible(1280 * width_multiplier) self.conv_head = conv1x1(out_channels, num_features) self.bn2 = norm_layer(num_features, activation=norm_act) self.global_pool = nn.AdaptiveAvgPool2d(1) self.dropout = nn.Dropout(drop_rate, inplace=True) self.classifier = nn.Linear(num_features, num_classes) patch_bn(self) # adjust epsilon initialize(self) if match_tf_same_padding: conv_to_same_conv(self) maxpool_to_same_maxpool(self)