示例#1
0
    def __init__(self, n_classes=1000, width_mult=1, bn_param=(0.1, 1e-3), dropout_rate=0.2,
                 ks=None, expand_ratio=None, depth_param=None, stage_width_list=None):
        input_channel = 16
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult, 8)
        last_channel = make_divisible(last_channel * width_mult, 8) if width_mult > 1.0 else last_channel

        cfg = {
            #    k,     exp,    c,      se,         nl,         s,      e,
            '0': [
                [3,     16,     16,     False,      'relu',     1,      1],
            ],
            '1': [
                [3,     64,     24,     False,      'relu',     2,      None],  # 4
                [3,     72,     24,     False,      'relu',     1,      None],  # 3
            ],
            '2': [
                [5,     72,     40,     True,       'relu',     2,      None],  # 3
                [5,     120,    40,     True,       'relu',     1,      None],  # 3
                [5,     120,    40,     True,       'relu',     1,      None],  # 3
            ],
            '3': [
                [3,     240,    80,     False,      'h_swish',  2,      None],  # 6
                [3,     200,    80,     False,      'h_swish',  1,      None],  # 2.5
                [3,     184,    80,     False,      'h_swish',  1,      None],  # 2.3
                [3,     184,    80,     False,      'h_swish',  1,      None],  # 2.3
            ],
            '4': [
                [3,     480,    112,    True,       'h_swish',  1,      None],  # 6
                [3,     672,    112,    True,       'h_swish',  1,      None],  # 6
            ],
            '5': [
                [5,     672,    160,    True,       'h_swish',  2,      None],  # 6
                [5,     960,    160,    True,       'h_swish',  1,      None],  # 6
                [5,     960,    160,    True,       'h_swish',  1,      None],  # 6
            ]
        }

        cfg = self.adjust_cfg(cfg, ks, expand_ratio, depth_param, stage_width_list)
        # width multiplier on mobile setting, change `exp: 1` and `c: 2`
        for stage_id, block_config_list in cfg.items():
            for block_config in block_config_list:
                if block_config[1] is not None:
                    block_config[1] = make_divisible(block_config[1] * width_mult, 8)
                block_config[2] = make_divisible(block_config[2] * width_mult, 8)

        first_conv, blocks, final_expand_layer, feature_mix_layer, classifier = self.build_net_via_cfg(
            cfg, input_channel, last_channel, n_classes, dropout_rate
        )
        super(MobileNetV3Large, self).__init__(first_conv, blocks, final_expand_layer, feature_mix_layer, classifier)
        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
示例#2
0
    def forward(self, x):
        in_channel = x.size(1)
        num_mid = make_divisible(in_channel // self.reduction, divisor=8)

        y = x.mean(3, keepdim=True).mean(2, keepdim=True)
        # reduce
        reduce_conv = self.fc.reduce
        reduce_filter = reduce_conv.weight[:num_mid, :
                                           in_channel, :, :].contiguous()
        reduce_bias = reduce_conv.bias[:
                                       num_mid] if reduce_conv.bias is not None else None
        y = F.conv2d(y, reduce_filter, reduce_bias, 1, 0, 1, 1)
        # relu
        y = self.fc.relu(y)
        # expand
        expand_conv = self.fc.expand
        expand_filter = expand_conv.weight[:in_channel, :
                                           num_mid, :, :].contiguous()
        expand_bias = expand_conv.bias[:
                                       in_channel] if expand_conv.bias is not None else None
        y = F.conv2d(y, expand_filter, expand_bias, 1, 0, 1, 1)
        # hard sigmoid
        y = self.fc.h_sigmoid(y)

        return x * y
示例#3
0
    def forward(self, x):
        in_channel = x.size(1)

        if self.inverted_bottleneck is not None:
            self.inverted_bottleneck.conv.active_out_channel = \
                make_divisible(round(in_channel * self.active_expand_ratio), 8)

        self.depth_conv.conv.active_kernel_size = self.active_kernel_size
        self.point_linear.conv.active_out_channel = self.active_out_channel

        if self.inverted_bottleneck is not None:
            x = self.inverted_bottleneck(x)
        x = self.depth_conv(x)
        x = self.point_linear(x)
        return x
示例#4
0
    def get_active_subnet(self, in_channel, preserve_weight=True):
        middle_channel = make_divisible(
            round(in_channel * self.active_expand_ratio), 8)

        # build the new layer
        sub_layer = MBInvertedQConvLayer(
            in_channel,
            self.active_out_channel,
            self.active_kernel_size,
            self.stride,
            self.active_expand_ratio,
            act_func=self.act_func,
            mid_channels=middle_channel,
            pw_w_bit=self.point_linear.conv.w_bit,
            pw_a_bit=self.point_linear.conv.a_bit,
            dw_w_bit=self.depth_conv.conv.w_bit,
            dw_a_bit=self.depth_conv.conv.a_bit,
        )
        sub_layer = sub_layer.to(get_net_device(self))

        if not preserve_weight:
            return sub_layer

        # copy weight from current layer
        if sub_layer.inverted_bottleneck is not None:
            sub_layer.inverted_bottleneck.conv.weight.data.copy_(
                self.inverted_bottleneck.conv.conv.weight.
                data[:middle_channel, :in_channel, :, :])
            copy_bn(sub_layer.inverted_bottleneck.bn,
                    self.inverted_bottleneck.bn.bn)

        sub_layer.depth_conv.conv.weight.data.copy_(
            self.depth_conv.conv.get_active_filter(
                middle_channel, self.active_kernel_size).data)
        copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn)

        sub_layer.point_linear.conv.weight.data.copy_(
            self.point_linear.conv.conv.weight.
            data[:self.active_out_channel, :middle_channel, :, :])
        copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn)

        return sub_layer
示例#5
0
    def __init__(self,
                 n_classes=1000,
                 width_mult=1,
                 bn_param=(0.1, 1e-3),
                 dropout_rate=0.2,
                 ks=None,
                 expand_ratio=None,
                 depth_param=None,
                 stage_width_list=None):

        if ks is None:
            ks = 3
        if expand_ratio is None:
            expand_ratio = 6

        input_channel = 32
        last_channel = 1280

        input_channel = make_divisible(input_channel * width_mult, 8)
        last_channel = make_divisible(last_channel * width_mult,
                                      8) if width_mult > 1.0 else last_channel

        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [expand_ratio, 24, 2, 2],
            [expand_ratio, 32, 3, 2],
            [expand_ratio, 64, 4, 2],
            [expand_ratio, 96, 3, 1],
            [expand_ratio, 160, 3, 2],
            [expand_ratio, 320, 1, 1],
        ]

        if depth_param is not None:
            assert isinstance(depth_param, int)
            for i in range(1, len(inverted_residual_setting) - 1):
                inverted_residual_setting[i][2] = depth_param

        if stage_width_list is not None:
            for i in range(len(inverted_residual_setting)):
                inverted_residual_setting[i][1] = stage_width_list[i]

        ks = int2list(ks,
                      sum([n for _, _, n, _ in inverted_residual_setting]) - 1)
        _pt = 0

        # first conv layer
        first_conv = ConvLayer(3,
                               input_channel,
                               kernel_size=3,
                               stride=2,
                               use_bn=True,
                               act_func='relu6',
                               ops_order='weight_bn_act')
        # inverted residual blocks
        blocks = []
        for t, c, n, s in inverted_residual_setting:
            output_channel = make_divisible(c * width_mult, 8)
            for i in range(n):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                if t == 1:
                    kernel_size = 3
                else:
                    kernel_size = ks[_pt]
                    _pt += 1
                mobile_inverted_conv = MBInvertedConvLayer(
                    in_channels=input_channel,
                    out_channels=output_channel,
                    kernel_size=kernel_size,
                    stride=stride,
                    expand_ratio=t,
                )
                if stride == 1:
                    if input_channel == output_channel:
                        shortcut = IdentityLayer(input_channel, input_channel)
                    else:
                        shortcut = None
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                input_channel = output_channel
        # 1x1_conv before global average pooling
        feature_mix_layer = ConvLayer(
            input_channel,
            last_channel,
            kernel_size=1,
            use_bn=True,
            act_func='relu6',
            ops_order='weight_bn_act',
        )

        classifier = LinearLayer(last_channel,
                                 n_classes,
                                 dropout_rate=dropout_rate)

        super(MobileNetV2, self).__init__(first_conv, blocks,
                                          feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
    def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None,
                 width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4,
                 depth_ensemble_list=None, depth_ensemble_mode='avg'):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)

        self.depth_ensemble_list = depth_ensemble_list
        self.depth_ensemble_mode = depth_ensemble_mode

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        if base_stage_width == 'v2':
            base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280]
        elif base_stage_width == 'old':
            base_stage_width = [32, 16, 32, 40, 80, 96, 192, 320, 1280]
        else:
            # ProxylessNAS Stage Width
            base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280]

        input_channel = [make_divisible(base_stage_width[0] * width_mult, 8) for width_mult in self.width_mult_list]
        first_block_width = [make_divisible(base_stage_width[1] * width_mult, 8) for width_mult in self.width_mult_list]
        last_channel = [
            make_divisible(base_stage_width[-1] * width_mult, 8) if width_mult > 1.0 else base_stage_width[-1]
            for width_mult in self.width_mult_list
        ]

        # first conv layer
        if len(input_channel) == 1:
            first_conv = QConvLayer(
                3, max(input_channel), kernel_size=3, stride=2, use_bn=True, act_func='relu6',
                ops_order='weight_bn_act',
                w_bit=8, a_bit=-1, half_wave=False
            )
        else:
            first_conv = DynamicQConvLayer(
                in_channel_list=int2list(3, len(input_channel)), out_channel_list=input_channel, kernel_size=3,
                stride=2, act_func='relu6', w_bit=8, a_bit=8, half_wave=False
            )
        # first block
        if len(first_block_width) == 1:
            first_block_conv = MBInvertedQConvLayer(
                in_channels=max(input_channel), out_channels=max(first_block_width), kernel_size=3, stride=1,
                expand_ratio=1, act_func='relu6', pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8
            )
        else:
            first_block_conv = DynamicMBQConvLayer(
                in_channel_list=input_channel, out_channel_list=first_block_width, kernel_size_list=3,
                expand_ratio_list=1, stride=1, act_func='relu6',
                # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4
                pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8
            )
        first_block = MobileInvertedResidualBlock(first_block_conv, None)

        input_channel = first_block_width

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1

        stride_stages = [2, 2, 2, 1, 2, 1]
        if depth_list is None:
            n_block_list = [2, 3, 4, 3, 3, 1]
            self.depth_list = [4]
        else:
            n_block_list = [max(self.depth_list)] * 5 + [1]

        width_list = []
        for base_width in base_stage_width[2:-1]:
            width = [make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list]
            width_list.append(width)

        for width, n_block, s in zip(width_list, n_block_list, stride_stages):
            self.block_group_info.append(
                ([_block_index + i for i in range(n_block)], width)
            )
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1

                mobile_inverted_conv = DynamicMBQConvLayer(
                    in_channel_list=int2list(input_channel, 1), out_channel_list=int2list(output_channel, 1),
                    kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6',
                    # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4
                    pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8
                )

                if stride == 1 and input_channel == output_channel:
                    shortcut = IdentityLayer(input_channel, input_channel)
                else:
                    shortcut = None

                mb_inverted_block = MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)

                blocks.append(mb_inverted_block)
                input_channel = output_channel
        # 1x1_conv before global average pooling
        if len(last_channel) == 1:
            feature_mix_layer = QConvLayer(
                max(input_channel), max(last_channel), kernel_size=1, use_bn=True, act_func='relu6',
                w_bit=8, a_bit=8, half_wave=False
            )
            classifier = QLinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate, w_bit=8, a_bit=8)
        else:
            feature_mix_layer = DynamicMBQConvLayer(
                in_channel_list=input_channel, out_channel_list=last_channel, kernel_size=1, stride=1, act_func='relu6',
                # pw_w_bit=4, pw_a_bit=4, dw_w_bit=4, dw_a_bit=4
                pw_w_bit=8, pw_a_bit=8, dw_w_bit=8, dw_a_bit=8, half_wave=False
            )
            classifier = DynamicQLinearLayer(
                in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate,
                w_bit=8, a_bit=8
            )

        super(DynamicQuantizedProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx, _ in self.block_group_info
        ]

        if self.depth_ensemble_list is not None:
            self.depth_ensemble_list.sort()
示例#7
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):

        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]

        final_expand_width = [
            make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        self.final_expand_width = final_expand_width
        last_channel = [
            make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        self.last_channel = last_channel

        # stride_stages = [1, 2, 2, 2, 1, 2]
        stride_stages = [1, 2, 2, 2, 1, 1]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [1] + [max(self.depth_list)] * 5
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        input_channel = width_list[0]
        # first conv layer

        # if width_mult_list has only one elem
        if len(set(input_channel)) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   act_func='h_swish')
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(input_channel),
                kernel_size=3,
                stride=stride_stages[0],
                expand_ratio=1,
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        else:
            first_conv = DynamicConvLayer(
                in_channel_list=int2list(3, len(input_channel)),
                out_channel_list=input_channel,
                kernel_size=3,
                stride=2,
                act_func='h_swish',
            )
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=input_channel,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=stride_stages[0],
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        first_block = MobileInvertedResidualBlock(
            first_block_conv, IdentityLayer(input_channel, input_channel))

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = input_channel

        for width, n_block, s, act_func, use_se in zip(width_list[1:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        if len(final_expand_width) == 1:
            final_expand_layer = ConvLayer(max(feature_dim),
                                           max(final_expand_width),
                                           kernel_size=1,
                                           act_func='h_swish')
            feature_mix_layer = ConvLayer(
                max(final_expand_width),
                max(last_channel),
                kernel_size=1,
                bias=False,
                use_bn=False,
                act_func='h_swish',
            )
        else:
            final_expand_layer = DynamicConvLayer(
                in_channel_list=feature_dim,
                out_channel_list=final_expand_width,
                kernel_size=1,
                act_func='h_swish')
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=final_expand_width,
                out_channel_list=last_channel,
                kernel_size=1,
                use_bn=False,
                act_func='h_swish',
            )
        if len(set(last_channel)) == 1:
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)
        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]
示例#8
0
    def __init__(self,
                 n_classes=1000,
                 bn_param=(0.1, 1e-5),
                 dropout_rate=0.1,
                 base_stage_width=None,
                 width_mult_list=1.0,
                 ks_list=3,
                 expand_ratio_list=6,
                 depth_list=4):
        """
        Args:
            n_classes: 分类类数
            bn_param: bn参数
            dropout_rate: 用在哪些层里面呢
            width_mult_list: 在单层layer重复一些操作[~~网络基础宽度缩放 X 并不是~~]
            ks_list: 卷积核的候选大小
            expand_ratio_list: 网络宽度/channel数的扩大倍数
            depth_list: 网络深度/layer的重复/堆叠次数
        """

        # int2list 将列表,元组,整数都变为一个列表
        self.width_mult_list = int2list(width_mult_list, 1)
        self.ks_list = int2list(ks_list, 1)
        self.expand_ratio_list = int2list(expand_ratio_list, 1)
        self.depth_list = int2list(depth_list, 1)
        self.base_stage_width = base_stage_width

        self.width_mult_list.sort()
        self.ks_list.sort()
        self.expand_ratio_list.sort()
        self.depth_list.sort()

        base_stage_width = [16, 24, 40, 80, 112, 160, 960, 1280]
        # make_divisible 使得卷积channel数为8的倍数,并以8为基底3舍4入
        final_expand_width = [
            make_divisible(base_stage_width[-2] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        last_channel = [
            make_divisible(base_stage_width[-1] * max(self.width_mult_list), 8)
            for _ in self.width_mult_list
        ]
        # 步长,决定下采样; 激活函数; se指的是,难道是self-attention
        stride_stages = [1, 2, 2, 2, 1, 2]
        act_stages = ['relu', 'relu', 'relu', 'h_swish', 'h_swish', 'h_swish']
        se_stages = [False, False, True, False, True, True]
        # 深度的配置除了第一个卷积,其他五层都可能expand
        if depth_list is None:
            n_block_list = [1, 2, 3, 4, 2, 3]
            self.depth_list = [4, 4]
            print('Use MobileNetV3 Depth Setting')
        else:
            n_block_list = [1] + [max(self.depth_list)] * 5
        # 宽度/channel数配置
        width_list = []
        for base_width in base_stage_width[:-2]:
            width = [
                make_divisible(base_width * width_mult, 8)
                for width_mult in self.width_mult_list
            ]
            width_list.append(width)

        # width_list好想和我想象的功能不太一样,我以为是初始channel的expand倍数
        input_channel = width_list[0]
        # first conv layer
        if len(set(input_channel)) == 1:
            first_conv = ConvLayer(3,
                                   max(input_channel),
                                   kernel_size=3,
                                   stride=2,
                                   act_func='h_swish')
            first_block_conv = MBInvertedConvLayer(
                in_channels=max(input_channel),
                out_channels=max(input_channel),
                kernel_size=3,
                stride=stride_stages[0],
                expand_ratio=1,
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        else:
            first_conv = DynamicConvLayer(
                in_channel_list=int2list(3, len(input_channel)),
                out_channel_list=input_channel,
                kernel_size=3,
                stride=2,
                act_func='h_swish',
            )
            first_block_conv = DynamicMBConvLayer(
                in_channel_list=input_channel,
                out_channel_list=input_channel,
                kernel_size_list=3,
                expand_ratio_list=1,
                stride=stride_stages[0],
                act_func=act_stages[0],
                use_se=se_stages[0],
            )
        first_block = MobileInvertedResidualBlock(
            first_block_conv, IdentityLayer(input_channel, input_channel))

        # inverted residual blocks
        self.block_group_info = []
        blocks = [first_block]
        _block_index = 1
        feature_dim = input_channel

        for width, n_block, s, act_func, use_se in zip(width_list[1:],
                                                       n_block_list[1:],
                                                       stride_stages[1:],
                                                       act_stages[1:],
                                                       se_stages[1:]):
            self.block_group_info.append(
                [_block_index + i for i in range(n_block)])
            _block_index += n_block

            output_channel = width
            for i in range(n_block):
                if i == 0:
                    stride = s
                else:
                    stride = 1
                mobile_inverted_conv = DynamicMBConvLayer(
                    in_channel_list=feature_dim,
                    out_channel_list=output_channel,
                    kernel_size_list=ks_list,
                    expand_ratio_list=expand_ratio_list,
                    stride=stride,
                    act_func=act_func,
                    use_se=use_se,
                )
                if stride == 1 and feature_dim == output_channel:
                    shortcut = IdentityLayer(feature_dim, feature_dim)
                else:
                    shortcut = None
                blocks.append(
                    MobileInvertedResidualBlock(mobile_inverted_conv,
                                                shortcut))
                feature_dim = output_channel
        # final expand layer, feature mix layer & classifier
        if len(final_expand_width) == 1:
            final_expand_layer = ConvLayer(max(feature_dim),
                                           max(final_expand_width),
                                           kernel_size=1,
                                           act_func='h_swish')
            feature_mix_layer = ConvLayer(
                max(final_expand_width),
                max(last_channel),
                kernel_size=1,
                bias=False,
                use_bn=False,
                act_func='h_swish',
            )
        else:
            final_expand_layer = DynamicConvLayer(
                in_channel_list=feature_dim,
                out_channel_list=final_expand_width,
                kernel_size=1,
                act_func='h_swish')
            feature_mix_layer = DynamicConvLayer(
                in_channel_list=final_expand_width,
                out_channel_list=last_channel,
                kernel_size=1,
                use_bn=False,
                act_func='h_swish',
            )
        if len(set(last_channel)) == 1:
            classifier = LinearLayer(max(last_channel),
                                     n_classes,
                                     dropout_rate=dropout_rate)
        else:
            classifier = DynamicLinearLayer(in_features_list=last_channel,
                                            out_features=n_classes,
                                            bias=True,
                                            dropout_rate=dropout_rate)
        super(OFAMobileNetV3,
              self).__init__(first_conv, blocks, final_expand_layer,
                             feature_mix_layer, classifier)

        # set bn param
        self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])

        # runtime_depth
        self.runtime_depth = [
            len(block_idx) for block_idx in self.block_group_info
        ]