def get_active_subnet(self, in_channel, preserve_weight=True): middle_channel = make_divisible( round(in_channel * self.active_expand_ratio), 8) # build the new layer sub_layer = MBInvertedConvLayer( in_channel, self.active_out_channel, self.active_kernel_size, self.stride, self.active_expand_ratio, act_func=self.act_func, mid_channels=middle_channel, use_se=self.use_se, ) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer # copy weight from current layer if sub_layer.inverted_bottleneck is not None: sub_layer.inverted_bottleneck.conv.weight.data.copy_( self.inverted_bottleneck.conv.conv.weight. data[:middle_channel, :in_channel, :, :]) copy_bn(sub_layer.inverted_bottleneck.bn, self.inverted_bottleneck.bn.bn) sub_layer.depth_conv.conv.weight.data.copy_( self.depth_conv.conv.get_active_filter( middle_channel, self.active_kernel_size).data) copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn) if self.use_se: se_mid = make_divisible(middle_channel // SEModule.REDUCTION, divisor=8) sub_layer.depth_conv.se.fc.reduce.weight.data.copy_( self.depth_conv.se.fc.reduce.weight. data[:se_mid, :middle_channel, :, :]) sub_layer.depth_conv.se.fc.reduce.bias.data.copy_( self.depth_conv.se.fc.reduce.bias.data[:se_mid]) sub_layer.depth_conv.se.fc.expand.weight.data.copy_( self.depth_conv.se.fc.expand.weight. data[:middle_channel, :se_mid, :, :]) sub_layer.depth_conv.se.fc.expand.bias.data.copy_( self.depth_conv.se.fc.expand.bias.data[:middle_channel]) sub_layer.point_linear.conv.weight.data.copy_( self.point_linear.conv.conv.weight. data[:self.active_out_channel, :middle_channel, :, :]) copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn) return sub_layer
def set_active(self, mode: str, sync=False, generator=None): # input stem first_conv, first_block = self.backbone["input_stem"].op_list if mode in ["min", "min_w"]: first_conv.conv.active_out_channels = min(first_conv.out_channels_list) first_block.conv.point_conv.conv.active_out_channels = min( first_block.conv.point_conv.out_channels_list ) elif mode in ["random", "min_e"]: first_conv.conv.active_out_channels = torch_random_choices( first_conv.out_channels_list, generator, ) first_block.conv.point_conv.conv.active_out_channels = torch_random_choices( first_block.conv.point_conv.out_channels_list, generator, ) else: raise NotImplementedError if sync: first_conv.conv.active_out_channels = sync_width( first_conv.conv.active_out_channels ) first_block.conv.point_conv.conv.active_out_channels = sync_width( first_block.conv.point_conv.conv.active_out_channels ) # stages in_channels = first_block.conv.point_conv.conv.active_out_channels for block in self.all_blocks: if block.shortcut is None: if mode in ["min", "min_w"]: active_out_channels = min(block.conv.point_conv.out_channels_list) elif mode in ["random", "min_e"]: active_out_channels = torch_random_choices( block.conv.point_conv.out_channels_list, generator, ) else: raise NotImplementedError else: active_out_channels = in_channels if mode in ["min", "min_e"]: active_expand_ratio = min(block.conv.expand_ratio_list) elif mode in ["min_w", "random"]: active_expand_ratio = torch_random_choices( block.conv.expand_ratio_list, generator, ) else: raise NotImplementedError active_mid_channels = make_divisible(active_expand_ratio * in_channels, 1) if sync: active_mid_channels = sync_width(active_mid_channels) active_out_channels = sync_width(active_out_channels) block.conv.inverted_conv.conv.active_out_channels = active_mid_channels block.conv.point_conv.conv.active_out_channels = active_out_channels in_channels = active_out_channels
def _pad_target(target, max_target_len): max_target_len = make_divisible(max_target_len, hp.outputs_per_step) padded = np.zeros(max_target_len - len(target), hp.num_mels) + hp.target_padding return np.concatenate((target, padded), axis=0).reshape(max_target_len / hp.outputs_per_step, hp.num_mels * hp.outputs_per_step)
def forward(self, x): in_channel = x.size(1) if self.inverted_bottleneck is not None: self.inverted_bottleneck.conv.active_out_channel = \ make_divisible(round(in_channel * self.active_expand_ratio), 8) self.depth_conv.conv.active_kernel_size = self.active_kernel_size self.point_linear.conv.active_out_channel = self.active_out_channel if self.inverted_bottleneck is not None: x = self.inverted_bottleneck(x) x = self.depth_conv(x) x = self.point_linear(x) return x
def __init__( self, in_channels: int, out_channels: List[int], kernel_size: int, expand_ratio: List[float], stride=1, act_func=("relu6", "relu6", None), norm=("bn_2d", "bn_2d", "bn_2d"), ): nn.Module.__init__(self) mid_channels = make_divisible(in_channels * max(expand_ratio), 1) self.inverted_conv = DynamicConvLayer( in_channels=in_channels, out_channels=[mid_channels], kernel_size=1, norm=norm[0], act_func=act_func[0], ) self.depth_conv = DynamicDepthwiseConvLayer( in_channels=mid_channels, kernel_size=kernel_size, stride=stride, norm=norm[1], act_func=act_func[1], ) self.point_conv = DynamicConvLayer( in_channels=mid_channels, out_channels=out_channels, kernel_size=1, norm=norm[2], act_func=act_func[2], ) self.expand_ratio_list = copy.deepcopy(expand_ratio)
def __init__(self, width_mult=1.0, channel_divisor=8, n_classes=1000, dropout_rate=0): super(MobileNetV3, self).__init__() stage_width_list = [16, 24, 40, 80, 112, 160] head_width_list = [960, 1280] block_configs = [ [[64, 72], 3, 2, 2, "relu", False], [[72, 120, 120], 5, 3, 2, "relu", True], [[240, 200, 184, 184], 3, 4, 2, "h_swish", False], [[480, 672], 3, 2, 1, "h_swish", True], [[672, 960, 960], 5, 3, 2, "h_swish", True], ] for i, w in enumerate(stage_width_list): stage_width_list[i] = make_divisible(w * width_mult, channel_divisor) for i, w in enumerate(head_width_list): head_width_list[i] = make_divisible(w * width_mult, channel_divisor) head_width_list[1] = max(head_width_list[1], 1280) input_stem = OpSequential([ ConvLayer(3, stage_width_list[0], 3, 2, act_func="h_swish"), ResidualBlock( DsConvLayer( stage_width_list[0], stage_width_list[0], 3, 1, ("relu", None), ), shortcut=nn.Identity(), ), ]) # stages stages = [] in_channels = stage_width_list[0] for (mid_c_list, ks, n, s, act_func, use_se), c in zip(block_configs, stage_width_list[1:]): blocks = [] for i in range(n): stride = s if i == 0 else 1 mid_channels = make_divisible( round(mid_c_list[i] * width_mult), channel_divisor) if use_se: conv = SeInvertedBlock( in_channels, c, ks, stride, mid_channels=mid_channels, act_func=(act_func, act_func, None), se_config={ "act_func": "relu", "mid_channels": max( make_divisible(mid_channels / 4, channel_divisor), 16), }, ) else: conv = InvertedBlock( in_channels, c, ks, stride, mid_channels=mid_channels, act_func=(act_func, act_func, None), ) mb_conv = ResidualBlock( conv, shortcut=nn.Identity() if (stride == 1 and in_channels == c and i != 0) else None, ) blocks.append(mb_conv) in_channels = c stages.append(OpSequential(blocks)) # head head = OpSequential([ ConvLayer(in_channels, head_width_list[0], 1, act_func="h_swish"), nn.AdaptiveAvgPool2d(1), ConvLayer( head_width_list[0], head_width_list[1], 1, act_func="h_swish", norm=None, use_bias=True, ), LinearLayer(head_width_list[1], n_classes, dropout_rate=dropout_rate), ]) self.backbone = nn.ModuleDict({ "input_stem": input_stem, "stages": nn.ModuleList(stages), }) self.head = head
def __init__( self, base_net: MobileNetV3, aug_expand_list: List[float], aug_width_mult_list: List[float], n_classes: int, dropout_rate=0.0, ): nn.Module.__init__(self) max_width_mult = max(aug_width_mult_list) # input stem base_input_stem = base_net.backbone["input_stem"] aug_input_stem = OpSequential([ DynamicConvLayer( 3, aug_width(base_input_stem.op_list[0].out_channels, aug_width_mult_list, 1), stride=2, act_func="h_swish", ), ResidualBlock( DynamicDsConvLayer( make_divisible( base_input_stem.op_list[1].conv.in_channels * max_width_mult, 1, ), aug_width( base_input_stem.op_list[1].conv.out_channels, aug_width_mult_list, 1, ), act_func=("relu", None), ), shortcut=nn.Identity(), ), ]) # stages aug_stages = [] for base_stage in base_net.backbone["stages"]: stage = [] for base_block in base_stage.op_list: if isinstance(base_block.conv, SeInvertedBlock): se_config = { "reduction": (base_block.conv.se_layer.in_channels / base_block.conv.se_layer.mid_channels + 1.0e-10), "act_func": base_block.conv.se_layer.act, } dynamic_block_cls = partial(DynamicSeInvertedBlock, se_config=se_config) elif isinstance(base_block.conv, InvertedBlock): dynamic_block_cls = DynamicInvertedBlock else: raise NotImplementedError stage.append( ResidualBlock( dynamic_block_cls( in_channels=make_divisible( base_block.conv.in_channels * max_width_mult, 1), out_channels=aug_width( base_block.conv.out_channels, aug_width_mult_list, 1), kernel_size=base_block.conv.kernel_size, expand_ratio=aug_width( base_block.conv.expand_ratio, aug_expand_list), stride=base_block.conv.stride, act_func=( base_block.conv.inverted_conv.act, base_block.conv.depth_conv.act, base_block.conv.point_conv.act, ), ), shortcut=base_block.shortcut, )) aug_stages.append(OpSequential(stage)) # head base_head = base_net.head aug_head = OpSequential([ DynamicConvLayer( make_divisible( base_head.op_list[0].in_channels * max_width_mult, 1), aug_width(base_head.op_list[0].out_channels, aug_width_mult_list, 1), 1, act_func=base_head.op_list[0].act, ), nn.AdaptiveAvgPool2d(1), DynamicConvLayer( make_divisible( base_head.op_list[2].in_channels * max_width_mult, 1), aug_width(base_head.op_list[2].out_channels, aug_width_mult_list, 1), 1, use_bias=True, norm=None, act_func=base_head.op_list[2].act, ), DynamicLinearLayer( make_divisible( base_head.op_list[-1].in_features * max_width_mult, 1), n_classes, dropout_rate=dropout_rate, ), ]) self.backbone = nn.ModuleDict({ "input_stem": aug_input_stem, "stages": nn.ModuleList(aug_stages), }) self.head = aug_head
def __init__(self, channel_divisor=8, n_classes=1000, dropout_rate=0): super(MCUNet, self).__init__() stage_width_list = [16, 8, 16, 24, 40, 48, 96] head_width_list = [160] act_func = "relu6" block_configs = [ [[3, 5, 5, 4], [7, 3, 7, 5], 4, 2], [[5, 5, 5], [5, 5, 5], 3, 2], [[5, 6, 4], [3, 7, 5], 3, 2], [[5, 5, 5], [5, 7, 3], 3, 1], [[6, 5, 4], [3, 7, 3], 3, 2], ] input_stem = OpSequential([ ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func), ResidualBlock( DsConvLayer( stage_width_list[0], stage_width_list[1], 3, 1, (act_func, None), ), shortcut=None, ), ]) # stages stages = [] in_channels = stage_width_list[1] for (e_list, ks_list, n, s), c in zip(block_configs, stage_width_list[2:]): blocks = [] for i in range(n): stride = s if i == 0 else 1 mid_channels = make_divisible(round(e_list[i] * in_channels), channel_divisor) mb_conv = ResidualBlock( InvertedBlock( in_channels, c, ks_list[i], stride, mid_channels=mid_channels, act_func=(act_func, act_func, None), ), shortcut=nn.Identity() if (stride == 1 and in_channels == c and i != 0) else None, ) blocks.append(mb_conv) in_channels = c stages.append(OpSequential(blocks)) # head head = OpSequential([ ResidualBlock( InvertedBlock( in_channels, head_width_list[0], 7, mid_channels=480, act_func=(act_func, act_func, None), ), shortcut=None, ), nn.AdaptiveAvgPool2d(1), LinearLayer(head_width_list[0], n_classes, dropout_rate=dropout_rate), ]) self.backbone = nn.ModuleDict({ "input_stem": input_stem, "stages": nn.ModuleList(stages), }) self.head = head
def __init__( self, base_net: MobileNetV2, aug_expand_list: List[float], aug_width_mult_list: List[float], n_classes: int, dropout_rate=0.0, ): nn.Module.__init__(self) max_width_mult = max(aug_width_mult_list) # input stem base_input_stem = base_net.backbone["input_stem"] aug_input_stem = OpSequential([ DynamicConvLayer( 3, aug_width(base_input_stem.op_list[0].out_channels, aug_width_mult_list, 1), stride=2, act_func="relu6", ), ResidualBlock( DynamicDsConvLayer( make_divisible( base_input_stem.op_list[0].out_channels * max_width_mult, 1), aug_width( base_input_stem.op_list[1].conv.out_channels, aug_width_mult_list, 1, ), act_func=("relu6", None), ), shortcut=None, ), ]) # stages aug_stages = [] for base_stage in base_net.backbone["stages"]: stage = [] for base_block in base_stage.op_list: stage.append( ResidualBlock( DynamicInvertedBlock( in_channels=make_divisible( base_block.conv.in_channels * max_width_mult, 1), out_channels=aug_width( base_block.conv.out_channels, aug_width_mult_list, 1), kernel_size=base_block.conv.kernel_size, expand_ratio=aug_width( base_block.conv.expand_ratio, aug_expand_list), stride=base_block.conv.stride, act_func=( base_block.conv.inverted_conv.act, base_block.conv.depth_conv.act, base_block.conv.point_conv.act, ), ), shortcut=base_block.shortcut, )) aug_stages.append(OpSequential(stage)) # head base_head = base_net.head aug_head = OpSequential([ ResidualBlock( DynamicInvertedBlock( make_divisible( base_head.op_list[0].conv.in_channels * max_width_mult, 1), aug_width( base_head.op_list[0].conv.out_channels, aug_width_mult_list, 1, ), base_head.op_list[0].conv.kernel_size, expand_ratio=aug_width( base_head.op_list[0].conv.expand_ratio, aug_expand_list), act_func=("relu6", "relu6", None), ), shortcut=None, ), DynamicConvLayer( make_divisible( base_head.op_list[1].in_channels * max_width_mult, 1), aug_width(base_head.op_list[1].out_channels, aug_width_mult_list, 1), 1, act_func=base_head.op_list[1].act, ), nn.AdaptiveAvgPool2d(1), DynamicLinearLayer( make_divisible( base_head.op_list[-1].in_features * max_width_mult, 1), n_classes, dropout_rate=dropout_rate, ), ]) self.backbone = nn.ModuleDict({ "input_stem": aug_input_stem, "stages": nn.ModuleList(aug_stages), }) self.head = aug_head
def __init__(self, width_mult=1.0, channel_divisor=8, n_classes=1000, dropout_rate=0): super(MobileNetV2, self).__init__() stage_width_list = [32, 16, 24, 32, 64, 96, 160] head_width_list = [320, 1280] act_func = "relu6" block_configs = [ # t, n, s [6, 2, 2], [6, 3, 2], [6, 4, 2], [6, 3, 1], [6, 3, 2], ] for i, w in enumerate(stage_width_list): stage_width_list[i] = make_divisible(w * width_mult, channel_divisor) for i, w in enumerate(head_width_list): head_width_list[i] = make_divisible(w * width_mult, channel_divisor) head_width_list[1] = max(head_width_list[1], 1280) input_stem = OpSequential([ ConvLayer(3, stage_width_list[0], 3, 2, act_func=act_func), ResidualBlock( DsConvLayer( stage_width_list[0], stage_width_list[1], 3, 1, (act_func, None), ), shortcut=None, ), ]) # stages stages = [] in_channels = stage_width_list[1] for (t, n, s), c in zip(block_configs, stage_width_list[2:]): blocks = [] for i in range(n): stride = s if i == 0 else 1 mid_channels = make_divisible(round(t * in_channels), channel_divisor) mb_conv = ResidualBlock( InvertedBlock( in_channels, c, 3, stride, mid_channels=mid_channels, act_func=(act_func, act_func, None), ), shortcut=nn.Identity() if (stride == 1 and in_channels == c and i != 0) else None, ) blocks.append(mb_conv) in_channels = c stages.append(OpSequential(blocks)) # head head = OpSequential([ ResidualBlock( InvertedBlock( in_channels, head_width_list[0], 3, expand_ratio=6, act_func=(act_func, act_func, None), ), shortcut=None, ), ConvLayer(head_width_list[0], head_width_list[1], 1, act_func=act_func), nn.AdaptiveAvgPool2d(1), LinearLayer(head_width_list[1], n_classes, dropout_rate=dropout_rate), ]) self.backbone = nn.ModuleDict({ "input_stem": input_stem, "stages": nn.ModuleList(stages), }) self.head = head
def __init__(self, n_classes=1000, width_mult=1, bn_param=(0.1, 1e-3), dropout_rate=0.2, ks=None, expand_ratio=None, depth_param=None, stage_width_list=None, no_mix_layer=False, disable_keep_last_channel=False): if ks is None: ks = 3 if expand_ratio is None: expand_ratio = 6 input_channel = 32 last_channel = 1280 input_channel = make_divisible(input_channel * width_mult, 8) if disable_keep_last_channel: last_channel = make_divisible(last_channel * width_mult, 8) else: last_channel = make_divisible( last_channel * width_mult, 8) if width_mult > 1.0 else last_channel inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [expand_ratio, 24, 2, 2], [expand_ratio, 32, 3, 2], [expand_ratio, 64, 4, 2], [expand_ratio, 96, 3, 1], [expand_ratio, 160, 3, 2], [expand_ratio, 320, 1, 1], ] if depth_param is not None: assert isinstance(depth_param, int) for i in range(1, len(inverted_residual_setting) - 1): inverted_residual_setting[i][2] = depth_param if stage_width_list is not None: for i in range(len(inverted_residual_setting)): inverted_residual_setting[i][1] = stage_width_list[i] ks = val2list(ks, sum([n for _, _, n, _ in inverted_residual_setting]) - 1) _pt = 0 # first conv layer first_conv = ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # inverted residual blocks blocks = [] for t, c, n, s in inverted_residual_setting: output_channel = make_divisible(c * width_mult, 8) for i in range(n): if i == 0: stride = s else: stride = 1 if t == 1: kernel_size = 3 else: kernel_size = ks[_pt] _pt += 1 mobile_inverted_conv = MBInvertedConvLayer( in_channels=input_channel, out_channels=output_channel, kernel_size=kernel_size, stride=stride, expand_ratio=t, ) if t > 1 and stride == 1: # NOTICE: we enforce no residual for the first block if input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None else: shortcut = None blocks.append( MobileInvertedResidualBlock(mobile_inverted_conv, shortcut)) input_channel = output_channel # 1x1_conv before global average pooling if no_mix_layer: feature_mix_layer = None classifier = LinearLayer(input_channel, n_classes, dropout_rate=dropout_rate) else: feature_mix_layer = ConvLayer( input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act', ) classifier = LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) super(MobileNetV2, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])
def __init__(self, n_classes=1000, bn_param=(0.1, 1e-3), dropout_rate=0.1, base_stage_width=None, width_mult_list=1.0, ks_list=3, expand_ratio_list=6, depth_list=4, no_mix_layer=False): self.width_mult_list = val2list(width_mult_list, 1) self.ks_list = val2list(ks_list, 1) self.expand_ratio_list = val2list(expand_ratio_list, 1) self.depth_list = val2list(depth_list, 1) self.base_stage_width = base_stage_width self.width_mult_list.sort() self.ks_list.sort() self.expand_ratio_list.sort() self.depth_list.sort() if base_stage_width == 'google': base_stage_width = [32, 16, 24, 32, 64, 96, 160, 320, 1280] else: # ProxylessNAS Stage Width base_stage_width = [32, 16, 24, 40, 80, 96, 192, 320, 1280] input_channel = [ make_divisible(base_stage_width[0] * width_mult, 8) for width_mult in self.width_mult_list ] first_block_width = [ make_divisible(base_stage_width[1] * width_mult, 8) for width_mult in self.width_mult_list ] last_channel = [ make_divisible(base_stage_width[-1] * width_mult, 8) if width_mult > 1.0 else base_stage_width[-1] for width_mult in self.width_mult_list ] # first conv layer if len(input_channel) == 1: first_conv = ConvLayer(3, max(input_channel), kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') else: first_conv = DynamicConvLayer(in_channel_list=val2list( 3, len(input_channel)), out_channel_list=input_channel, kernel_size=3, stride=2, act_func='relu6') # first block if len(first_block_width) == 1: first_block_conv = MBInvertedConvLayer( in_channels=max(input_channel), out_channels=max(first_block_width), kernel_size=3, stride=1, expand_ratio=1, act_func='relu6', ) else: first_block_conv = DynamicMBConvLayer( in_channel_list=input_channel, out_channel_list=first_block_width, kernel_size_list=3, expand_ratio_list=1, stride=1, act_func='relu6', ) first_block = MobileInvertedResidualBlock(first_block_conv, None) input_channel = first_block_width # inverted residual blocks self.block_group_info = [] blocks = [first_block] _block_index = 1 stride_stages = [2, 2, 2, 1, 2, 1] if depth_list is None: n_block_list = [2, 3, 4, 3, 3, 1] self.depth_list = [4, 4] print('Use MobileNetV2 Depth Setting') else: n_block_list = [max(self.depth_list)] * 5 + [1] width_list = [] for base_width in base_stage_width[2:-1]: width = [ make_divisible(base_width * width_mult, 8) for width_mult in self.width_mult_list ] width_list.append(width) for width, n_block, s in zip(width_list, n_block_list, stride_stages): self.block_group_info.append( [_block_index + i for i in range(n_block)]) _block_index += n_block output_channel = width for i in range(n_block): if i == 0: stride = s else: stride = 1 mobile_inverted_conv = DynamicMBConvLayer( in_channel_list=val2list(input_channel, 1), out_channel_list=val2list(output_channel, 1), kernel_size_list=ks_list, expand_ratio_list=expand_ratio_list, stride=stride, act_func='relu6', ) if stride == 1 and input_channel == output_channel: shortcut = IdentityLayer(input_channel, input_channel) else: shortcut = None mb_inverted_block = MobileInvertedResidualBlock( mobile_inverted_conv, shortcut) blocks.append(mb_inverted_block) input_channel = output_channel # 1x1_conv before global average pooling if no_mix_layer: # remove mix layer to reduce model size feature_mix_layer = None if len(self.width_mult_list) == 1: classifier = LinearLayer(max(input_channel), n_classes, dropout_rate=dropout_rate) else: classifier = DynamicLinearLayer(in_features_list=input_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) else: if len(last_channel) == 1: feature_mix_layer = ConvLayer( max(input_channel), max(last_channel), kernel_size=1, use_bn=True, act_func='relu6', ) classifier = LinearLayer(max(last_channel), n_classes, dropout_rate=dropout_rate) else: feature_mix_layer = DynamicConvLayer( in_channel_list=input_channel, out_channel_list=last_channel, kernel_size=1, stride=1, act_func='relu6', ) classifier = DynamicLinearLayer(in_features_list=last_channel, out_features=n_classes, bias=True, dropout_rate=dropout_rate) super(OFAProxylessNASNets, self).__init__(first_conv, blocks, feature_mix_layer, classifier) # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1]) # runtime_depth self.runtime_depth = [ len(block_idx) for block_idx in self.block_group_info ]
def active_mid_channels(self): return make_divisible( max(self.active_in_channels / self.reduction, self.min_dim), 1 )