def forward(self, x, reduction=None): assert reduction is None, "users shoud give reduction" if reduction == 0: return x in_channel = x.size(1) num_mid = make_divisible(int(in_channel // reduction), divisor=8) if self.weight_sharing: y = x.mean(3, keepdim=True).mean(2, keepdim=True) # reduce reduce_conv = self.fc.reduce reduce_filter = reduce_conv.weight[:num_mid, : in_channel, :, :].contiguous() reduce_bias = reduce_conv.bias[: num_mid] if reduce_conv.bias is not None else None y = F.conv2d(y, reduce_filter, reduce_bias, 1, 0, 1, 1) # relu y = self.fc.relu(y) # expand expand_conv = self.fc.expand expand_filter = expand_conv.weight[:in_channel, : num_mid, :, :].contiguous() expand_bias = expand_conv.bias[: in_channel] if expand_conv.bias is not None else None y = F.conv2d(y, expand_filter, expand_bias, 1, 0, 1, 1) # hard sigmoid y = self.fc.h_sigmoid(y) else: assert in_channel in self.in_channel_list, "in_channel should in in_channel_list" assert reduction in self.reduction_list, "reduction should in reduction_list" name = "{}_{}".format(in_channel, reduction) y = self.fc[name](x) return x * y
def __init__(self, channel_list, reduction_list, weight_sharing=True): super(DynamicSE, self).__init__() self.channel_list = channel_list self.reduction_list = reduction_list self.max_channel = max(self.channel_list) self.min_reduction = min([i for i in self.reduction_list if i > 0]) self.weight_sharing = weight_sharing if weight_sharing: num_mid = make_divisible(int(self.max_channel // self.min_reduction), divisor=8) self.fc = nn.Sequential( OrderedDict([ ('reduce', nn.Conv2d(self.channel, num_mid, 1, 1, 0, bias=True)), ('relu', nn.ReLU(inplace=True)), ('expand', nn.Conv2d(num_mid, self.channel, 1, 1, 0, bias=True)), ('h_sigmoid', Hsigmoid(inplace=True)), ])) else: self.fc = nn.ModuleDict for _channel in channel_list: for _reduction in reduction_list: if _reduction == 0: continue num_mid = make_divisible(int(_channel // _reduction), divisor=8) name = "{}_{}".format(_channel, _reduction) self.fc[name] = nn.Sequential( OrderedDict([ ('reduce', nn.Conv2d(_channel, num_mid, 1, 1, 0, bias=True)), ('relu', nn.ReLU(inplace=True)), ('expand', nn.Conv2d(num_mid, _channel, 1, 1, 0, bias=True)), ('h_sigmoid', Hsigmoid(inplace=True)), ]))
def __init__(self, channel, reduction=0.25): super(SEModule, self).__init__() self.channel = channel self.reduction = reduction num_mid = make_divisible(int(self.channel * self.reduction), divisor=8) self.fc = nn.Sequential(OrderedDict([ ('reduce', nn.Conv2d(self.channel, num_mid, 1, 1, 0, bias=True)), ('relu', nn.ReLU(inplace=True)), ('expand', nn.Conv2d(num_mid, self.channel, 1, 1, 0, bias=True)), ('h_sigmoid', Hsigmoid(inplace=True)), ]))
def __init__(self, in_channel, expand_ratio, kernel_size, stride, act_func, se, out_channel): # expansion, 3x3 dwise, BN, Swish, SE, 1x1, BN, skip_connection super(MBConv, self).__init__() middle_channel = int(in_channel * expand_ratio) middle_channel = make_divisible(middle_channel, 8) if middle_channel != in_channel: self.expand = True self.inverted_bottleneck_conv = nn.Conv2d(in_channel, middle_channel, 1, stride=1, padding=0, bias=False) self.inverted_bottleneck_bn = nn.BatchNorm2d(middle_channel, eps=cfg.BN.EPS, momentum=cfg.BN.MOM) self.inverted_bottleneck_act = build_activation(act_func) else: self.expand = False self.depth_conv = nn.Conv2d(middle_channel, middle_channel, kernel_size, stride=stride, groups=middle_channel, padding=get_same_padding(kernel_size), bias=False) self.depth_bn = nn.BatchNorm2d(middle_channel, eps=cfg.BN.EPS, momentum=cfg.BN.MOM) self.depth_act = build_activation(act_func) if se > 0: self.depth_se = SEModule(middle_channel, se) self.point_linear_conv = nn.Conv2d(middle_channel, out_channel, 1, stride=1, padding=0, bias=False) self.point_linear_bn = nn.BatchNorm2d(out_channel, eps=cfg.BN.EPS, momentum=cfg.BN.MOM) # Skip connection if in and out shapes are the same (MN-V2 style) self.has_skip = stride == 1 and in_channel == out_channel
def __init__(self, n_classes=1000, width_mult=1.2, depth=4): super(MobileNetV3, self).__init__() self.width_mult = width_mult self.depth = depth self.conv_candidates = [ '3x3_MBConv3', '3x3_MBConv6', '5x5_MBConv3', '5x5_MBConv6', '7x7_MBConv3', '7x7_MBConv6', ] if len(cfg.MB.BASIC_OP) == 0 else cfg.MB.BASIC_OP # ofa self.base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280] final_expand_width = make_divisible( self.base_stage_width[-2] * self.width_mult, 8) last_channel = make_divisible( self.base_stage_width[-1] * self.width_mult, 8) self.stride_stages = [1, 2, 2, 2, 1, 2] if len( cfg.MB.STRIDE_STAGES) == 0 else cfg.MB.STRIDE_STAGES self.act_stages = [ 'relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish' ] if len(cfg.MB.ACT_STAGES) == 0 else cfg.MB.ACT_STAGES self.se_stages = [False, False, True, False, True, True] if len( cfg.MB.SE_STAGES) == 0 else cfg.MB.SE_STAGES n_block_list = [1] + [self.depth] * 5 width_list = [] for base_width in self.base_stage_width[:-2]: width = make_divisible(base_width * self.width_mult, 8) width_list.append(width) input_channel = width_list[0] # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, act_func='h_swish') # first block first_block_conv = MBInvertedConvLayer( in_channels=input_channel, out_channels=input_channel, kernel_size=3, stride=self.stride_stages[0], expand_ratio=1, act_func=self.act_stages[0], use_se=self.se_stages[0], ) first_block = MobileInvertedResidualBlock( first_block_conv, IdentityLayer(input_channel, input_channel)) # inverted residual blocks blocks = nn.ModuleList() blocks.append(first_block) feature_dim = input_channel self.candidate_ops = [] for width, n_block, s, act_func, use_se in zip(width_list[1:], n_block_list[1:], self.stride_stages[1:], self.act_stages[1:], self.se_stages[1:]): for i in range(n_block): if i == 0: stride = s else: stride = 1 # conv if stride == 1 and feature_dim == width: modified_conv_candidates = self.conv_candidates + ['Zero'] else: modified_conv_candidates = self.conv_candidates + \ ['3x3_MBConv1'] self.candidate_ops.append(modified_conv_candidates) conv_op = MixedEdge(candidate_ops=build_candidate_ops( modified_conv_candidates, feature_dim, width, stride, 'weight_bn_act', act_func=act_func, use_se=use_se), ) if stride == 1 and feature_dim == width: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None blocks.append(MobileInvertedResidualBlock(conv_op, shortcut)) feature_dim = width # final expand layer, feature mix layer & classifier final_expand_layer = ConvLayer(feature_dim, final_expand_width, kernel_size=1, act_func='h_swish') feature_mix_layer = ConvLayer( final_expand_width, last_channel, kernel_size=1, bias=False, use_bn=False, act_func='h_swish', ) classifier = LinearLayer(last_channel, n_classes) self.first_conv = first_conv self.blocks = blocks self.final_expand_layer = final_expand_layer self.feature_mix_layer = feature_mix_layer self.classifier = classifier self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.all_edges = len(self.blocks) - 1 self.num_edges = len(self.blocks) - 1 self.num_ops = len(self.conv_candidates) + 1
def __init__(self, n_classes=1000, space_name='proxyless', width_mult=1.3, depth=4): super(ProxylessNASNets, self).__init__() self.width_mult = width_mult self.depth = depth self.conv_candidates = [ '3x3_MBConv3', '3x3_MBConv6', '5x5_MBConv3', '5x5_MBConv6', '7x7_MBConv3', '7x7_MBConv6', ] if len(cfg.MB.BASIC_OP) == 0 else cfg.MB.BASIC_OP if space_name == 'google': self.base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] elif space_name == 'proxyless': self.base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = make_divisible(self.base_stage_width[0] * width_mult, 8) first_block_width = make_divisible( self.base_stage_width[1] * width_mult, 8) last_channel = make_divisible(self.base_stage_width[-1] * width_mult, 8) # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # first block first_block_conv = MBInvertedConvLayer( in_channels=input_channel, out_channels=first_block_width, kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', ) first_block = MobileInvertedResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks blocks = nn.ModuleList() blocks.append(first_block) self.stride_stages = [2, 2, 2, 1, 2, 1] if len( cfg.MB.STRIDE_STAGES) == 0 else cfg.MB.STRIDE_STAGES n_block_list = [self.depth] * 5 + [1] width_list = [] for base_width in self.base_stage_width[2:-1]: width = make_divisible(base_width * self.width_mult, 8) width_list.append(width) feature_dim = input_channel self.candidate_ops = [] for width, n_block, s in zip(width_list, n_block_list, self.stride_stages): for i in range(n_block): if i == 0: stride = s else: stride = 1 if stride == 1 and feature_dim == width: modified_conv_candidates = self.conv_candidates + ['Zero'] else: modified_conv_candidates = self.conv_candidates + \ ['3x3_MBConv1'] self.candidate_ops.append(modified_conv_candidates) conv_op = MixedEdge(candidate_ops=build_candidate_ops( modified_conv_candidates, feature_dim, width, stride, 'weight_bn_act', act_func='relu6', use_se=False), ) if stride == 1 and feature_dim == width: shortcut = IdentityLayer(feature_dim, feature_dim) else: shortcut = None mb_inverted_block = MobileInvertedResidualBlock( conv_op, shortcut) blocks.append(mb_inverted_block) feature_dim = width # 1x1_conv before global average pooling feature_mix_layer = ConvLayer( feature_dim, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ) classifier = LinearLayer(last_channel, n_classes) self.first_conv = first_conv self.blocks = blocks self.feature_mix_layer = feature_mix_layer self.classifier = classifier self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.all_edges = len(self.blocks) - 1 self.num_edges = len(self.blocks) - 1 self.num_ops = len(self.conv_candidates) + 1